Added afunctionality to readout wich page is currently being censored; It probably helps with patience; It is at the moment only implemented in the backend; A pretty frontend is still nessecerry
This commit is contained in:
Binary file not shown.
65
app/main.py
65
app/main.py
@@ -1,12 +1,15 @@
|
||||
from typing import Annotated
|
||||
from typing import List, Dict, Tuple, Sequence
|
||||
|
||||
from starlette.responses import StreamingResponse
|
||||
from annotated_types import IsDigit
|
||||
from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
# import multiprocessing
|
||||
# import threading
|
||||
# import concurrent.futures
|
||||
# import asyncio
|
||||
import asyncio
|
||||
|
||||
# import fastapi
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
@@ -77,6 +80,9 @@ GREETINGFILE = "./app/graphics/greeting.pdf"
|
||||
# for l in cur:
|
||||
# print(l)
|
||||
# locpaths = ["./VO_Mathematik_3.pdf"] # replace this with a database
|
||||
censor_status_update_events: Dict[str, asyncio.Event] = {}
|
||||
censor_status_datas: Dict[str, Dict[str, int | None | str | bool]] = {}
|
||||
# censor_finished_flags: Dict[str, asyncio.Event] = {}
|
||||
|
||||
|
||||
def _sql_quarry(
|
||||
@@ -442,22 +448,62 @@ async def get_submission(
|
||||
except ValueError as e:
|
||||
error(f"Error creating savepath: {e}")
|
||||
raise HTTPException(status_code=400, detail=f"Error creation savepath: {e}")
|
||||
await censor_pdf(
|
||||
filepath, dest, rects_p, scales_p, False if censor == "False" else True
|
||||
# censor_finished_flags[fileId] = asyncio.Event()
|
||||
censor_status_datas[fileId] = {}
|
||||
if fileId not in censor_status_update_events:
|
||||
censor_status_update_events[fileId] = asyncio.Event()
|
||||
await asyncio.to_thread(
|
||||
censor_pdf,
|
||||
filepath,
|
||||
dest,
|
||||
rects_p,
|
||||
scales_p,
|
||||
False if censor == "False" else True,
|
||||
fileId,
|
||||
)
|
||||
|
||||
# return {"done": "ok"}
|
||||
# print(dest)
|
||||
# await censor_finished_flags[fileId].wait()
|
||||
# censor_finished_flags[fileId].clear()
|
||||
info(f"Saved file {fileId} as {dest}")
|
||||
delete_from_FIP(fileId)
|
||||
return FileResponse(dest, content_disposition_type="inline")
|
||||
|
||||
|
||||
async def censor_pdf(
|
||||
@app.get("/get_censor_status/{file_id}")
|
||||
async def get_censor_status(file_id: str):
|
||||
"""Yields the currrent page being censored and the total number of pages"""
|
||||
if len(sql("Select filename from FIP where id=?", (file_id,))) < 1:
|
||||
raise HTTPException(
|
||||
400,
|
||||
detail="You are trying to get a status updater for a file that dosent exist.",
|
||||
)
|
||||
if file_id not in censor_status_update_events:
|
||||
censor_status_update_events[file_id] = asyncio.Event()
|
||||
return StreamingResponse(
|
||||
yield_censor_status(file_id), media_type="text/event-stream"
|
||||
)
|
||||
|
||||
|
||||
async def yield_censor_status(file_id: str):
|
||||
while True:
|
||||
await censor_status_update_events[file_id].wait()
|
||||
censor_status_update_events[file_id].clear()
|
||||
yield f"event: censorUpdate\ndata: {json.dumps(censor_status_datas[file_id])}\n\n"
|
||||
if censor_status_datas[file_id]["done"]:
|
||||
del censor_status_update_events[file_id]
|
||||
del censor_status_datas[file_id]
|
||||
return
|
||||
|
||||
|
||||
def censor_pdf(
|
||||
path: str,
|
||||
destpath: str,
|
||||
rects: List[List[List[float]]],
|
||||
scales: List[Dict[str, float]],
|
||||
secure: bool,
|
||||
file_id: str,
|
||||
):
|
||||
"""Censors pdf and runs OCR
|
||||
If Secure is True the file is converted to Pixels and then recreated; else the censored sections are just covering the text below and can be easiliy removed with e.g. Inkscape
|
||||
@@ -499,13 +545,20 @@ async def censor_pdf(
|
||||
fill=(0, 0, 0),
|
||||
)
|
||||
if secure:
|
||||
censor_status_datas[file_id]["page"] = i + 1
|
||||
censor_status_datas[file_id]["pages"] = npage
|
||||
censor_status_datas[file_id]["done"] = False
|
||||
censor_status_update_events[file_id].set()
|
||||
|
||||
# pages.append(page)
|
||||
# THis Costs us dearly
|
||||
bitmap = page.get_pixmap(dpi=400)
|
||||
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||
language="deu",
|
||||
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||
)
|
||||
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||
# End of the costly part
|
||||
# tasks.append(asyncio.create_task(censor_page(page)))
|
||||
print(f"Page {i + 1}/{npage}: CENSORING DONE")
|
||||
else:
|
||||
@@ -530,6 +583,10 @@ async def censor_pdf(
|
||||
# output.insert_pdf(pymupdf.Document(stream=await task))
|
||||
# print("CENSORING DONE")
|
||||
output.save(destpath)
|
||||
if secure:
|
||||
censor_status_datas[file_id]["done"] = True
|
||||
censor_status_update_events[file_id].set()
|
||||
# censor_finished_flags[file_id].set()
|
||||
|
||||
|
||||
def test_function(i: int) -> bytes:
|
||||
|
||||
Reference in New Issue
Block a user