improved LVID extraction; added some docstrings
This commit is contained in:
Binary file not shown.
@@ -78,7 +78,8 @@ def get_dirstruct():
|
|||||||
if lvid is None:
|
if lvid is None:
|
||||||
continue
|
continue
|
||||||
lvid = lvid.group()[:3] + lvid.group()[4:]
|
lvid = lvid.group()[:3] + lvid.group()[4:]
|
||||||
name = fname[:-8]
|
# name = fname[:-8]
|
||||||
|
name = re.sub(r"[a-zA-Z0-9]{3}\.[a-zA-Z0-9]{3}", "", fname)
|
||||||
# print(name)
|
# print(name)
|
||||||
# print(lvid)
|
# print(lvid)
|
||||||
cur = db.cursor()
|
cur = db.cursor()
|
||||||
|
|||||||
12
app/main.py
12
app/main.py
@@ -50,11 +50,13 @@ FILES_IN_PROGRESS = "./app/files/"
|
|||||||
# locpaths = ["./VO_Mathematik_3.pdf"] # replace this with a database
|
# locpaths = ["./VO_Mathematik_3.pdf"] # replace this with a database
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def get_index():
|
async def get_index():
|
||||||
|
"""gives the Index.html file"""
|
||||||
return FileResponse("./index.html")
|
return FileResponse("./index.html")
|
||||||
|
|
||||||
|
|
||||||
@app.get("/files/{file_id}")
|
@app.get("/files/{file_id}")
|
||||||
async def get_file(file_id: str):
|
async def get_file(file_id: str):
|
||||||
|
"""returns the file that cooorosponds with the given ID"""
|
||||||
if file_id == "unsupported":
|
if file_id == "unsupported":
|
||||||
return FileResponse(FILES_IN_PROGRESS + "unsupported.pdf")
|
return FileResponse(FILES_IN_PROGRESS + "unsupported.pdf")
|
||||||
if file_id == "empty":
|
if file_id == "empty":
|
||||||
@@ -75,6 +77,7 @@ async def get_file(file_id: str):
|
|||||||
async def search_lva(
|
async def search_lva(
|
||||||
searchterm: str, searchlim: int = 10
|
searchterm: str, searchlim: int = 10
|
||||||
) -> List[Dict[str, int | str]]:
|
) -> List[Dict[str, int | str]]:
|
||||||
|
"""returns the LVA for a search in the database"""
|
||||||
res = []
|
res = []
|
||||||
cur = db.cursor(dictionary=True)
|
cur = db.cursor(dictionary=True)
|
||||||
if await is_LVID(searchterm):
|
if await is_LVID(searchterm):
|
||||||
@@ -103,6 +106,7 @@ async def search_lva(
|
|||||||
async def search_profs(
|
async def search_profs(
|
||||||
searchterm: str = "", lid: int | None = None, searchlim: int = 10
|
searchterm: str = "", lid: int | None = None, searchlim: int = 10
|
||||||
) -> List[Dict[str, str | int]]:
|
) -> List[Dict[str, str | int]]:
|
||||||
|
"""returns the Prof for a searchterm and LVA id"""
|
||||||
res = []
|
res = []
|
||||||
zw = []
|
zw = []
|
||||||
cur = db.cursor(dictionary=True)
|
cur = db.cursor(dictionary=True)
|
||||||
@@ -141,6 +145,7 @@ async def search_subcats(
|
|||||||
cat: int | None = None,
|
cat: int | None = None,
|
||||||
searchlim: int = 10,
|
searchlim: int = 10,
|
||||||
) -> List[Dict[str, str | int]]:
|
) -> List[Dict[str, str | int]]:
|
||||||
|
"""searches for avaliable subcatrgories in a specific LVA with a specific Prof(optional)"""
|
||||||
res = []
|
res = []
|
||||||
rest = []
|
rest = []
|
||||||
cur = db.cursor(dictionary=True)
|
cur = db.cursor(dictionary=True)
|
||||||
@@ -177,6 +182,7 @@ async def search_subcats(
|
|||||||
|
|
||||||
@app.post("/uploadfile/")
|
@app.post("/uploadfile/")
|
||||||
async def create_upload_file(files: List[UploadFile], c2pdf: bool = True):
|
async def create_upload_file(files: List[UploadFile], c2pdf: bool = True):
|
||||||
|
"""Handles files uploaded. generates ID; saves file; saves path in database"""
|
||||||
if len(files) == 0:
|
if len(files) == 0:
|
||||||
raise HTTPException(status_code=400, detail="No files found in file submission")
|
raise HTTPException(status_code=400, detail="No files found in file submission")
|
||||||
filename = files[0].filename if files[0].filename is not None else "None"
|
filename = files[0].filename if files[0].filename is not None else "None"
|
||||||
@@ -275,6 +281,7 @@ async def get_submission(
|
|||||||
str, Form()
|
str, Form()
|
||||||
], # Scales of Pages # Annotated[List[Dict[str, float]], Form()],
|
], # Scales of Pages # Annotated[List[Dict[str, float]], Form()],
|
||||||
):
|
):
|
||||||
|
"""handles submission"""
|
||||||
print(lva, prof, fname, stype, subcat, sem, ex_date, rects, pagescales)
|
print(lva, prof, fname, stype, subcat, sem, ex_date, rects, pagescales)
|
||||||
rects_p = json.loads(rects)
|
rects_p = json.loads(rects)
|
||||||
scales_p = json.loads(pagescales)
|
scales_p = json.loads(pagescales)
|
||||||
@@ -301,6 +308,7 @@ def censor_pdf(
|
|||||||
rects: List[List[List[float]]],
|
rects: List[List[List[float]]],
|
||||||
scales: List[Dict[str, float]],
|
scales: List[Dict[str, float]],
|
||||||
):
|
):
|
||||||
|
"""Censors pdf and runs OCR"""
|
||||||
doc = pymupdf.open(path)
|
doc = pymupdf.open(path)
|
||||||
output = pymupdf.open()
|
output = pymupdf.open()
|
||||||
page = doc[0]
|
page = doc[0]
|
||||||
@@ -335,6 +343,7 @@ def censor_pdf(
|
|||||||
|
|
||||||
|
|
||||||
async def is_LVID(term: str) -> bool:
|
async def is_LVID(term: str) -> bool:
|
||||||
|
"""Returns weather a string has the format of a LVA ID"""
|
||||||
if re.match(r"[a-zA-Z0-9]{3}\.[a-zA-Z0-9]*", term):
|
if re.match(r"[a-zA-Z0-9]{3}\.[a-zA-Z0-9]*", term):
|
||||||
return True
|
return True
|
||||||
if term.isdigit():
|
if term.isdigit():
|
||||||
@@ -345,6 +354,7 @@ async def is_LVID(term: str) -> bool:
|
|||||||
def remove_duplicates(
|
def remove_duplicates(
|
||||||
results: List[Dict[str, str | int]],
|
results: List[Dict[str, str | int]],
|
||||||
) -> List[Dict[str, str | int]]:
|
) -> List[Dict[str, str | int]]:
|
||||||
|
"""removes duplicate file Ids"""
|
||||||
ids = []
|
ids = []
|
||||||
res = []
|
res = []
|
||||||
for result in results:
|
for result in results:
|
||||||
@@ -532,6 +542,7 @@ def convert_to_pdf(file: bytes) -> bytes | None:
|
|||||||
|
|
||||||
|
|
||||||
def filename_to_pdf(filename: str) -> str:
|
def filename_to_pdf(filename: str) -> str:
|
||||||
|
"""converts any filename.any to filename.pdf"""
|
||||||
farr = filename.split(".")
|
farr = filename.split(".")
|
||||||
if len(farr) > 1:
|
if len(farr) > 1:
|
||||||
farr[-1] = "pdf"
|
farr[-1] = "pdf"
|
||||||
@@ -542,6 +553,7 @@ def filename_to_pdf(filename: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def make_filename_unique(filename: str, idx: int | None = None) -> str:
|
def make_filename_unique(filename: str, idx: int | None = None) -> str:
|
||||||
|
"""makes sure, there are no duplicate filenames in the temporary folder"""
|
||||||
cur = db.cursor()
|
cur = db.cursor()
|
||||||
cur.execute("SELECT id FROM FIP WHERE filename=?", (filename,))
|
cur.execute("SELECT id FROM FIP WHERE filename=?", (filename,))
|
||||||
res = cur.fetchall()
|
res = cur.fetchall()
|
||||||
|
|||||||
Reference in New Issue
Block a user