Added afunctionality to readout wich page is currently being censored; It probably helps with patience; It is at the moment only implemented in the backend; A pretty frontend is still nessecerry

This commit is contained in:
Marcel Gansfusz
2025-10-22 01:12:25 +02:00
parent 7d828a7c3b
commit 352540a3b1
3 changed files with 69 additions and 4 deletions

Binary file not shown.

View File

@@ -1,12 +1,15 @@
from typing import Annotated from typing import Annotated
from typing import List, Dict, Tuple, Sequence from typing import List, Dict, Tuple, Sequence
from starlette.responses import StreamingResponse
from annotated_types import IsDigit from annotated_types import IsDigit
from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form
from fastapi.responses import FileResponse from fastapi.responses import FileResponse
# import multiprocessing # import multiprocessing
# import threading # import threading
# import concurrent.futures # import concurrent.futures
# import asyncio import asyncio
# import fastapi # import fastapi
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
@@ -77,6 +80,9 @@ GREETINGFILE = "./app/graphics/greeting.pdf"
# for l in cur: # for l in cur:
# print(l) # print(l)
# locpaths = ["./VO_Mathematik_3.pdf"] # replace this with a database # locpaths = ["./VO_Mathematik_3.pdf"] # replace this with a database
censor_status_update_events: Dict[str, asyncio.Event] = {}
censor_status_datas: Dict[str, Dict[str, int | None | str | bool]] = {}
# censor_finished_flags: Dict[str, asyncio.Event] = {}
def _sql_quarry( def _sql_quarry(
@@ -442,22 +448,62 @@ async def get_submission(
except ValueError as e: except ValueError as e:
error(f"Error creating savepath: {e}") error(f"Error creating savepath: {e}")
raise HTTPException(status_code=400, detail=f"Error creation savepath: {e}") raise HTTPException(status_code=400, detail=f"Error creation savepath: {e}")
await censor_pdf( # censor_finished_flags[fileId] = asyncio.Event()
filepath, dest, rects_p, scales_p, False if censor == "False" else True censor_status_datas[fileId] = {}
if fileId not in censor_status_update_events:
censor_status_update_events[fileId] = asyncio.Event()
await asyncio.to_thread(
censor_pdf,
filepath,
dest,
rects_p,
scales_p,
False if censor == "False" else True,
fileId,
) )
# return {"done": "ok"} # return {"done": "ok"}
# print(dest) # print(dest)
# await censor_finished_flags[fileId].wait()
# censor_finished_flags[fileId].clear()
info(f"Saved file {fileId} as {dest}") info(f"Saved file {fileId} as {dest}")
delete_from_FIP(fileId) delete_from_FIP(fileId)
return FileResponse(dest, content_disposition_type="inline") return FileResponse(dest, content_disposition_type="inline")
async def censor_pdf( @app.get("/get_censor_status/{file_id}")
async def get_censor_status(file_id: str):
"""Yields the currrent page being censored and the total number of pages"""
if len(sql("Select filename from FIP where id=?", (file_id,))) < 1:
raise HTTPException(
400,
detail="You are trying to get a status updater for a file that dosent exist.",
)
if file_id not in censor_status_update_events:
censor_status_update_events[file_id] = asyncio.Event()
return StreamingResponse(
yield_censor_status(file_id), media_type="text/event-stream"
)
async def yield_censor_status(file_id: str):
while True:
await censor_status_update_events[file_id].wait()
censor_status_update_events[file_id].clear()
yield f"event: censorUpdate\ndata: {json.dumps(censor_status_datas[file_id])}\n\n"
if censor_status_datas[file_id]["done"]:
del censor_status_update_events[file_id]
del censor_status_datas[file_id]
return
def censor_pdf(
path: str, path: str,
destpath: str, destpath: str,
rects: List[List[List[float]]], rects: List[List[List[float]]],
scales: List[Dict[str, float]], scales: List[Dict[str, float]],
secure: bool, secure: bool,
file_id: str,
): ):
"""Censors pdf and runs OCR """Censors pdf and runs OCR
If Secure is True the file is converted to Pixels and then recreated; else the censored sections are just covering the text below and can be easiliy removed with e.g. Inkscape If Secure is True the file is converted to Pixels and then recreated; else the censored sections are just covering the text below and can be easiliy removed with e.g. Inkscape
@@ -499,13 +545,20 @@ async def censor_pdf(
fill=(0, 0, 0), fill=(0, 0, 0),
) )
if secure: if secure:
censor_status_datas[file_id]["page"] = i + 1
censor_status_datas[file_id]["pages"] = npage
censor_status_datas[file_id]["done"] = False
censor_status_update_events[file_id].set()
# pages.append(page) # pages.append(page)
# THis Costs us dearly
bitmap = page.get_pixmap(dpi=400) bitmap = page.get_pixmap(dpi=400)
pdf_bytes = bitmap.pdfocr_tobytes( pdf_bytes = bitmap.pdfocr_tobytes(
language="deu", language="deu",
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
) )
output.insert_pdf(pymupdf.Document(stream=pdf_bytes)) output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
# End of the costly part
# tasks.append(asyncio.create_task(censor_page(page))) # tasks.append(asyncio.create_task(censor_page(page)))
print(f"Page {i + 1}/{npage}: CENSORING DONE") print(f"Page {i + 1}/{npage}: CENSORING DONE")
else: else:
@@ -530,6 +583,10 @@ async def censor_pdf(
# output.insert_pdf(pymupdf.Document(stream=await task)) # output.insert_pdf(pymupdf.Document(stream=await task))
# print("CENSORING DONE") # print("CENSORING DONE")
output.save(destpath) output.save(destpath)
if secure:
censor_status_datas[file_id]["done"] = True
censor_status_update_events[file_id].set()
# censor_finished_flags[file_id].set()
def test_function(i: int) -> bytes: def test_function(i: int) -> bytes:

View File

@@ -281,10 +281,18 @@ function submitPdf(eve) {
} }
async function submitForm(formData) { async function submitForm(formData) {
try { try {
const updateEventSource = new EventSource(
"http://127.0.0.1:8000/get_censor_status/" + doc.fID,
);
console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
updateEventSource.addEventListener("censorUpdate", function(eve) {
console.log(eve.data);
});
const response = await fetch("http://127.0.0.1:8000/submit", { const response = await fetch("http://127.0.0.1:8000/submit", {
method: "POST", method: "POST",
body: formData, body: formData,
}); });
updateEventSource.close();
//let responseJSON=await response.json(); //let responseJSON=await response.json();
if (response.ok) { if (response.ok) {
console.log("Submit OK"); console.log("Submit OK");