caught exception in trying to censor

This commit is contained in:
Marcel Gansfusz
2025-11-04 17:54:24 +01:00
parent f7c73a0c5a
commit cdd26e0bc3

View File

@@ -671,12 +671,31 @@ def censor_pdf_ocr(
censor_status_datas[file_id]["done"] = False censor_status_datas[file_id]["done"] = False
censor_status_update_events[file_id].set() censor_status_update_events[file_id].set()
# THis Costs us dearly # THis Costs us dearly
bitmap = page.get_pixmap(dpi=400) try:
pdf_bytes = bitmap.pdfocr_tobytes( bitmap = page.get_pixmap(dpi=400)
language="deu", pdf_bytes = bitmap.pdfocr_tobytes(
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files language="deu",
) tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
output.insert_pdf(pymupdf.Document(stream=pdf_bytes)) )
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
except RuntimeError as e:
error(
f"Error in OCR for document: {destpath}. Error: {e}. Falling back to standard mode."
)
if i < len(rects) and rects[i] != []:
for rect in rects[i]:
prect = pymupdf.Rect(
rect[0] * wfac,
rect[1] * hfac,
(rect[0] + rect[2]) * wfac,
(rect[1] + rect[3]) * hfac,
)
page.add_redact_annot(
prect,
fill=(0, 0, 0),
)
page.apply_redactions()
output.insert_pdf(page.parent, from_page=page.number, to_page=page.number)
# End of the costly part # End of the costly part
print(f"Page {i + 1}/{npage}: CENSORING DONE") print(f"Page {i + 1}/{npage}: CENSORING DONE")
output.save(destpath) output.save(destpath)