caught exception in trying to censor
This commit is contained in:
19
app/main.py
19
app/main.py
@@ -671,12 +671,31 @@ def censor_pdf_ocr(
|
||||
censor_status_datas[file_id]["done"] = False
|
||||
censor_status_update_events[file_id].set()
|
||||
# THis Costs us dearly
|
||||
try:
|
||||
bitmap = page.get_pixmap(dpi=400)
|
||||
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||
language="deu",
|
||||
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||
)
|
||||
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||
except RuntimeError as e:
|
||||
error(
|
||||
f"Error in OCR for document: {destpath}. Error: {e}. Falling back to standard mode."
|
||||
)
|
||||
if i < len(rects) and rects[i] != []:
|
||||
for rect in rects[i]:
|
||||
prect = pymupdf.Rect(
|
||||
rect[0] * wfac,
|
||||
rect[1] * hfac,
|
||||
(rect[0] + rect[2]) * wfac,
|
||||
(rect[1] + rect[3]) * hfac,
|
||||
)
|
||||
page.add_redact_annot(
|
||||
prect,
|
||||
fill=(0, 0, 0),
|
||||
)
|
||||
page.apply_redactions()
|
||||
output.insert_pdf(page.parent, from_page=page.number, to_page=page.number)
|
||||
# End of the costly part
|
||||
print(f"Page {i + 1}/{npage}: CENSORING DONE")
|
||||
output.save(destpath)
|
||||
|
||||
Reference in New Issue
Block a user