diff --git a/app/__pycache__/main.cpython-313.pyc b/app/__pycache__/main.cpython-313.pyc index 95361c3..5371756 100644 Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ diff --git a/app/main.py b/app/main.py index 78118a8..41d7fc7 100644 --- a/app/main.py +++ b/app/main.py @@ -1,12 +1,15 @@ from typing import Annotated from typing import List, Dict, Tuple, Sequence + +from starlette.responses import StreamingResponse from annotated_types import IsDigit from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form from fastapi.responses import FileResponse + # import multiprocessing # import threading # import concurrent.futures -# import asyncio +import asyncio # import fastapi from fastapi.staticfiles import StaticFiles @@ -77,6 +80,9 @@ GREETINGFILE = "./app/graphics/greeting.pdf" # for l in cur: # print(l) # locpaths = ["./VO_Mathematik_3.pdf"] # replace this with a database +censor_status_update_events: Dict[str, asyncio.Event] = {} +censor_status_datas: Dict[str, Dict[str, int | None | str | bool]] = {} +# censor_finished_flags: Dict[str, asyncio.Event] = {} def _sql_quarry( @@ -442,22 +448,62 @@ async def get_submission( except ValueError as e: error(f"Error creating savepath: {e}") raise HTTPException(status_code=400, detail=f"Error creation savepath: {e}") - await censor_pdf( - filepath, dest, rects_p, scales_p, False if censor == "False" else True + # censor_finished_flags[fileId] = asyncio.Event() + censor_status_datas[fileId] = {} + if fileId not in censor_status_update_events: + censor_status_update_events[fileId] = asyncio.Event() + await asyncio.to_thread( + censor_pdf, + filepath, + dest, + rects_p, + scales_p, + False if censor == "False" else True, + fileId, ) + # return {"done": "ok"} # print(dest) + # await censor_finished_flags[fileId].wait() + # censor_finished_flags[fileId].clear() info(f"Saved file {fileId} as {dest}") delete_from_FIP(fileId) return FileResponse(dest, content_disposition_type="inline") -async def censor_pdf( +@app.get("/get_censor_status/{file_id}") +async def get_censor_status(file_id: str): + """Yields the currrent page being censored and the total number of pages""" + if len(sql("Select filename from FIP where id=?", (file_id,))) < 1: + raise HTTPException( + 400, + detail="You are trying to get a status updater for a file that dosent exist.", + ) + if file_id not in censor_status_update_events: + censor_status_update_events[file_id] = asyncio.Event() + return StreamingResponse( + yield_censor_status(file_id), media_type="text/event-stream" + ) + + +async def yield_censor_status(file_id: str): + while True: + await censor_status_update_events[file_id].wait() + censor_status_update_events[file_id].clear() + yield f"event: censorUpdate\ndata: {json.dumps(censor_status_datas[file_id])}\n\n" + if censor_status_datas[file_id]["done"]: + del censor_status_update_events[file_id] + del censor_status_datas[file_id] + return + + +def censor_pdf( path: str, destpath: str, rects: List[List[List[float]]], scales: List[Dict[str, float]], secure: bool, + file_id: str, ): """Censors pdf and runs OCR If Secure is True the file is converted to Pixels and then recreated; else the censored sections are just covering the text below and can be easiliy removed with e.g. Inkscape @@ -499,13 +545,20 @@ async def censor_pdf( fill=(0, 0, 0), ) if secure: + censor_status_datas[file_id]["page"] = i + 1 + censor_status_datas[file_id]["pages"] = npage + censor_status_datas[file_id]["done"] = False + censor_status_update_events[file_id].set() + # pages.append(page) + # THis Costs us dearly bitmap = page.get_pixmap(dpi=400) pdf_bytes = bitmap.pdfocr_tobytes( language="deu", tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files ) output.insert_pdf(pymupdf.Document(stream=pdf_bytes)) + # End of the costly part # tasks.append(asyncio.create_task(censor_page(page))) print(f"Page {i + 1}/{npage}: CENSORING DONE") else: @@ -530,6 +583,10 @@ async def censor_pdf( # output.insert_pdf(pymupdf.Document(stream=await task)) # print("CENSORING DONE") output.save(destpath) + if secure: + censor_status_datas[file_id]["done"] = True + censor_status_update_events[file_id].set() + # censor_finished_flags[file_id].set() def test_function(i: int) -> bytes: diff --git a/static/app.js b/static/app.js index 03c8f07..ea7d806 100644 --- a/static/app.js +++ b/static/app.js @@ -281,10 +281,18 @@ function submitPdf(eve) { } async function submitForm(formData) { try { + const updateEventSource = new EventSource( + "http://127.0.0.1:8000/get_censor_status/" + doc.fID, + ); + console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID); + updateEventSource.addEventListener("censorUpdate", function(eve) { + console.log(eve.data); + }); const response = await fetch("http://127.0.0.1:8000/submit", { method: "POST", body: formData, }); + updateEventSource.close(); //let responseJSON=await response.json(); if (response.ok) { console.log("Submit OK");