From d6d3f46c78a21e379dd420261c64d28fa7285d66 Mon Sep 17 00:00:00 2001 From: Marcel Gansfusz Date: Sat, 12 Jul 2025 14:47:22 +0200 Subject: [PATCH] improved LVID extraction; added some docstrings --- app/__pycache__/main.cpython-313.pyc | Bin 24370 -> 24947 bytes app/init.py | 3 ++- app/main.py | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/app/__pycache__/main.cpython-313.pyc b/app/__pycache__/main.cpython-313.pyc index d3135f019054cc72ea12b8c0ad39e2651c4e7d77..61eadd45dd6bc02b623328481a0757bf5468fc81 100644 GIT binary patch delta 2598 zcmZ`)U2Gd!6`ngk{vSJb;y8|-c+%!~+a{#>X`4{B!KqqrgVSW$VyO8Cf3Q`3EB$$w_fQLn#J9XN$ zEMs}jx##bG=iGD8{P3q}_vgsF%kwS@p5d!nu6O)h?-fU+6rRg3$+}R?$wETO$hW(5 z#f5nxo1d5QoMV~~fcc-PWopkxc^ZsgJE91=@n4*;UUVt+2%SY)>NFFhvgibpMO~g! zza|$KHRW+U5(ENEMIo)Ks;24%RmteWt$ZuIgZ6P&HeeFi_`LvEYMrKn@z1zMXoaos+2g`y;yqEd7# z{6!{(SgMtBP|0stI31;<2XT&F9ucytCP;!VOIkW7$4H5Xs_1Pj;{vK1I(xE?*}LDK848vOb%eO2VkZi zR}vb_D(sRppOcn^~M1i$*@x`Se2jes^(J~ z7S9>2^%3OzD#_`u`#IGc$OC z{6_c;a^Mfb1D3@E@YfpEAeUn(Y6*RvNpc=ap+!z%L?_;EjQ=0a$w&)$z7$!2_xq7E z$cY`%FCrH%MRnxGe~X?+K0Mge!*L?~@QtQERD-|ObfnAw1SJv!>=Vq2FXaPtwJDx- zcZwJ5@IRWy;Re3b9C3R@USv1e7%W}5039(;jsotZE-V)2RVgE9 zx`jDeku|9Zvc%}Hpi5*95Sx9zAi+h2Gupog*d9BMlE$gn6*}+}z@zWM6{`*hhue6o z<+m(nPw|trxNCk|9soy8cBiugnQKsXD(@Z2^WC+%d^& z&ctXbitk*A;GW(A?^O~KZn1^u5!QM?h?W{Pc|j#r%s`Rz(CD&NPa%=f@%iJY`B~!g zBKRy*ro*^*{3eSE#wUZ{Wzjw3+rz^gx^Fx@8)VQgjJs!*=}K z4%1ZiA~berV`{BrTC&q;fszO{pdk^$CJmf<7U8AoLw65V-GFX8JuAi68%+*)_=>uC zYh@=Z$NtJmRxS$Qxe4c?Y_65o6}=o^J$x^*a;M^Iu5htE9(22Qnx;(~v6?~zZNn=K zE8UwYyhDE;A=}32{l-db|6P+JtM{h%EK2Y-nwEk21$y7>g-sMR={D53h@zyC+x{*IL)x%hMHii)kl92gTQy(`)_fgXNnQ z&mq%Egsx!<{11e56RGDpf1U`v&-we>aOGO4o~Y`dvk$f5|6B{5BC2M}-?-&)zdrS~ z%Pa03hW6WbV<0Da)>|o=mrxsuZn=1pO?2DqhZJyfCNu^sg4tiLE2mcn$~P+h7J$76 zu%qB(L;h99q=0O&dxV$)H}Y+Pd23l;jc*)YOO)?a0__#P!(`m}-I*{4dNXSLW+sHj z9H?$JZBjrs(tCvL)Yk&vXHr1!i$Cap`z4G0S>MM*VjgvaHgkWd(mrTXz`qA_AR3uu F{{`>&z%T#+ delta 2065 zcma)6Yitx%6uxI3`8q? zpp7C0gREYSt&OC_N5CIOm>(dC#;7qGV{}uZjTTc3ks#6B+VaC68qb}yP(c$XnK|ct z=bm%EbMLt``!SsT5InQ0>LU2WUYdIJaPt{Yy945M$kCyA5xhnQ$rVL6uQ@^hE^2c< z<0rjBgU|y#q)v>G9#|#zK&cX^7hP}D{YuKa8l(En`Os;*$INr@03h@^wOkgH#E|F( zwMmxewdHiRM^|J+N|L1*gWfHkrW#vKJ0V>HAxR;MuR)R^ktlJH_^VbqdpvO(zISck z*}Zq??%uAxrw5i9jm?d*L_TfM!n9O7tF7lWKA)$-9Of@t5)^S!ofEzlbiXg>AGJ^T zsuWwzQ&{_MtS!@RYqJ&mqL{yYtx?BKZX$9Mk(qmf2nMcK&^yeGg(hw}PCr8n}{frr+G%V~AHM)!wHMXjxt&IB{*g|NTWTLstwJ_Z;* zi7Z&XT2f0&Mn+QQdzMskAOkw?nuMGpx;HRgseE-l)utE zn%^#X&tj7P-*M!uiMPAVxx#NHJR!9$lb);zBJbZ?*o+Pn!(Zx&;+F4KZEIc=D8B{p0a20;7Yx5dTH5fZ|o}vwT!RWhadF2%|pvo zL5vTCXncK;URhb?Y2|JlNGrhp{ zK%i@k2#9pE5lKijFXN3^g_{fy>-d!nS;nOXF>Dg)P*Js>Bp4DuiiHE>dW-$$YP)qz z7+Z|ELHDf7rk4$U)Bj0=KL^7K??npTNYxNpq=j<5fSRKwWB!K$md8O`5(R=YPR}Lo zqL&hf-1)pgeiH`Cl-ZknM}#Ufck`bD7*v1o%Z&gfvmy~ZOhAn}h#()<>7tSuF$W@(-b`l6{3R&q3!S-ji1Mf$CZNWu7ox%w zkTV@?oDoCwQCvm}#}lIi%);vKL&-B}=;0Z1nSdI*m!9h^JXkOvg>u^I9^QK-z+}{! zjkr|HvUAI^U^%(vBFIkRnw!{M!_ndXRBU9)i%r7^(ynk?j?Ag(o3#sqz-)X~8*s8Y zLpj6CCLni4Xa=y2uRRq=M=K99!maa(IZJYS0cI^=4+#rVWfPFigeU|iof)Z$QEAMU zs!cm1Ohjnfm6?i-W{vq%8`7?PCL@I0-PtK)G&)w4>PRaE%+5gvQT?gz(aN#v)ULF< zkU2T%+CeaWQA&lQ?%U#34tj2j*Er}U9(~fG97#OCaafrV1&@vSIPSkKgC3>zTlF>E z6(+vG;t-#0Hi9_s+;26>;t++FP=s>jjdUHDR2sqT@g)MII10X8y+ zjf5~p2H%Lt2#RrOjtk~>fi{WDqpALp*!YssrqqFSMp0TVW+HlMs#BLyvViH`rbDwG zvPQZYK{4Jv$Hh!m2I>`zpg3i`S9yAirG8X#gCnd=LGSicHR+;iMo>SGIEp_0fB73h CM*q41 diff --git a/app/init.py b/app/init.py index 8c8a102..50a598f 100644 --- a/app/init.py +++ b/app/init.py @@ -78,7 +78,8 @@ def get_dirstruct(): if lvid is None: continue lvid = lvid.group()[:3] + lvid.group()[4:] - name = fname[:-8] + # name = fname[:-8] + name = re.sub(r"[a-zA-Z0-9]{3}\.[a-zA-Z0-9]{3}", "", fname) # print(name) # print(lvid) cur = db.cursor() diff --git a/app/main.py b/app/main.py index 978118d..8837364 100644 --- a/app/main.py +++ b/app/main.py @@ -50,11 +50,13 @@ FILES_IN_PROGRESS = "./app/files/" # locpaths = ["./VO_Mathematik_3.pdf"] # replace this with a database @app.get("/") async def get_index(): + """gives the Index.html file""" return FileResponse("./index.html") @app.get("/files/{file_id}") async def get_file(file_id: str): + """returns the file that cooorosponds with the given ID""" if file_id == "unsupported": return FileResponse(FILES_IN_PROGRESS + "unsupported.pdf") if file_id == "empty": @@ -75,6 +77,7 @@ async def get_file(file_id: str): async def search_lva( searchterm: str, searchlim: int = 10 ) -> List[Dict[str, int | str]]: + """returns the LVA for a search in the database""" res = [] cur = db.cursor(dictionary=True) if await is_LVID(searchterm): @@ -103,6 +106,7 @@ async def search_lva( async def search_profs( searchterm: str = "", lid: int | None = None, searchlim: int = 10 ) -> List[Dict[str, str | int]]: + """returns the Prof for a searchterm and LVA id""" res = [] zw = [] cur = db.cursor(dictionary=True) @@ -141,6 +145,7 @@ async def search_subcats( cat: int | None = None, searchlim: int = 10, ) -> List[Dict[str, str | int]]: + """searches for avaliable subcatrgories in a specific LVA with a specific Prof(optional)""" res = [] rest = [] cur = db.cursor(dictionary=True) @@ -177,6 +182,7 @@ async def search_subcats( @app.post("/uploadfile/") async def create_upload_file(files: List[UploadFile], c2pdf: bool = True): + """Handles files uploaded. generates ID; saves file; saves path in database""" if len(files) == 0: raise HTTPException(status_code=400, detail="No files found in file submission") filename = files[0].filename if files[0].filename is not None else "None" @@ -275,6 +281,7 @@ async def get_submission( str, Form() ], # Scales of Pages # Annotated[List[Dict[str, float]], Form()], ): + """handles submission""" print(lva, prof, fname, stype, subcat, sem, ex_date, rects, pagescales) rects_p = json.loads(rects) scales_p = json.loads(pagescales) @@ -301,6 +308,7 @@ def censor_pdf( rects: List[List[List[float]]], scales: List[Dict[str, float]], ): + """Censors pdf and runs OCR""" doc = pymupdf.open(path) output = pymupdf.open() page = doc[0] @@ -335,6 +343,7 @@ def censor_pdf( async def is_LVID(term: str) -> bool: + """Returns weather a string has the format of a LVA ID""" if re.match(r"[a-zA-Z0-9]{3}\.[a-zA-Z0-9]*", term): return True if term.isdigit(): @@ -345,6 +354,7 @@ async def is_LVID(term: str) -> bool: def remove_duplicates( results: List[Dict[str, str | int]], ) -> List[Dict[str, str | int]]: + """removes duplicate file Ids""" ids = [] res = [] for result in results: @@ -532,6 +542,7 @@ def convert_to_pdf(file: bytes) -> bytes | None: def filename_to_pdf(filename: str) -> str: + """converts any filename.any to filename.pdf""" farr = filename.split(".") if len(farr) > 1: farr[-1] = "pdf" @@ -542,6 +553,7 @@ def filename_to_pdf(filename: str) -> str: def make_filename_unique(filename: str, idx: int | None = None) -> str: + """makes sure, there are no duplicate filenames in the temporary folder""" cur = db.cursor() cur.execute("SELECT id FROM FIP WHERE filename=?", (filename,)) res = cur.fetchall()