Added afunctionality to readout wich page is currently being censored; It probably helps with patience; It is at the moment only implemented in the backend; A pretty frontend is still nessecerry

This commit is contained in:
Marcel Gansfusz
2025-10-22 01:12:25 +02:00
parent 7d828a7c3b
commit 352540a3b1
3 changed files with 69 additions and 4 deletions

Binary file not shown.

View File

@@ -1,12 +1,15 @@
from typing import Annotated
from typing import List, Dict, Tuple, Sequence
from starlette.responses import StreamingResponse
from annotated_types import IsDigit
from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form
from fastapi.responses import FileResponse
# import multiprocessing
# import threading
# import concurrent.futures
# import asyncio
import asyncio
# import fastapi
from fastapi.staticfiles import StaticFiles
@@ -77,6 +80,9 @@ GREETINGFILE = "./app/graphics/greeting.pdf"
# for l in cur:
# print(l)
# locpaths = ["./VO_Mathematik_3.pdf"] # replace this with a database
censor_status_update_events: Dict[str, asyncio.Event] = {}
censor_status_datas: Dict[str, Dict[str, int | None | str | bool]] = {}
# censor_finished_flags: Dict[str, asyncio.Event] = {}
def _sql_quarry(
@@ -442,22 +448,62 @@ async def get_submission(
except ValueError as e:
error(f"Error creating savepath: {e}")
raise HTTPException(status_code=400, detail=f"Error creation savepath: {e}")
await censor_pdf(
filepath, dest, rects_p, scales_p, False if censor == "False" else True
# censor_finished_flags[fileId] = asyncio.Event()
censor_status_datas[fileId] = {}
if fileId not in censor_status_update_events:
censor_status_update_events[fileId] = asyncio.Event()
await asyncio.to_thread(
censor_pdf,
filepath,
dest,
rects_p,
scales_p,
False if censor == "False" else True,
fileId,
)
# return {"done": "ok"}
# print(dest)
# await censor_finished_flags[fileId].wait()
# censor_finished_flags[fileId].clear()
info(f"Saved file {fileId} as {dest}")
delete_from_FIP(fileId)
return FileResponse(dest, content_disposition_type="inline")
async def censor_pdf(
@app.get("/get_censor_status/{file_id}")
async def get_censor_status(file_id: str):
"""Yields the currrent page being censored and the total number of pages"""
if len(sql("Select filename from FIP where id=?", (file_id,))) < 1:
raise HTTPException(
400,
detail="You are trying to get a status updater for a file that dosent exist.",
)
if file_id not in censor_status_update_events:
censor_status_update_events[file_id] = asyncio.Event()
return StreamingResponse(
yield_censor_status(file_id), media_type="text/event-stream"
)
async def yield_censor_status(file_id: str):
while True:
await censor_status_update_events[file_id].wait()
censor_status_update_events[file_id].clear()
yield f"event: censorUpdate\ndata: {json.dumps(censor_status_datas[file_id])}\n\n"
if censor_status_datas[file_id]["done"]:
del censor_status_update_events[file_id]
del censor_status_datas[file_id]
return
def censor_pdf(
path: str,
destpath: str,
rects: List[List[List[float]]],
scales: List[Dict[str, float]],
secure: bool,
file_id: str,
):
"""Censors pdf and runs OCR
If Secure is True the file is converted to Pixels and then recreated; else the censored sections are just covering the text below and can be easiliy removed with e.g. Inkscape
@@ -499,13 +545,20 @@ async def censor_pdf(
fill=(0, 0, 0),
)
if secure:
censor_status_datas[file_id]["page"] = i + 1
censor_status_datas[file_id]["pages"] = npage
censor_status_datas[file_id]["done"] = False
censor_status_update_events[file_id].set()
# pages.append(page)
# THis Costs us dearly
bitmap = page.get_pixmap(dpi=400)
pdf_bytes = bitmap.pdfocr_tobytes(
language="deu",
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
)
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
# End of the costly part
# tasks.append(asyncio.create_task(censor_page(page)))
print(f"Page {i + 1}/{npage}: CENSORING DONE")
else:
@@ -530,6 +583,10 @@ async def censor_pdf(
# output.insert_pdf(pymupdf.Document(stream=await task))
# print("CENSORING DONE")
output.save(destpath)
if secure:
censor_status_datas[file_id]["done"] = True
censor_status_update_events[file_id].set()
# censor_finished_flags[file_id].set()
def test_function(i: int) -> bytes:

View File

@@ -281,10 +281,18 @@ function submitPdf(eve) {
}
async function submitForm(formData) {
try {
const updateEventSource = new EventSource(
"http://127.0.0.1:8000/get_censor_status/" + doc.fID,
);
console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
updateEventSource.addEventListener("censorUpdate", function(eve) {
console.log(eve.data);
});
const response = await fetch("http://127.0.0.1:8000/submit", {
method: "POST",
body: formData,
});
updateEventSource.close();
//let responseJSON=await response.json();
if (response.ok) {
console.log("Submit OK");