caught exception in trying to censor
This commit is contained in:
31
app/main.py
31
app/main.py
@@ -671,12 +671,31 @@ def censor_pdf_ocr(
|
|||||||
censor_status_datas[file_id]["done"] = False
|
censor_status_datas[file_id]["done"] = False
|
||||||
censor_status_update_events[file_id].set()
|
censor_status_update_events[file_id].set()
|
||||||
# THis Costs us dearly
|
# THis Costs us dearly
|
||||||
bitmap = page.get_pixmap(dpi=400)
|
try:
|
||||||
pdf_bytes = bitmap.pdfocr_tobytes(
|
bitmap = page.get_pixmap(dpi=400)
|
||||||
language="deu",
|
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||||
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
language="deu",
|
||||||
)
|
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||||
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
)
|
||||||
|
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||||
|
except RuntimeError as e:
|
||||||
|
error(
|
||||||
|
f"Error in OCR for document: {destpath}. Error: {e}. Falling back to standard mode."
|
||||||
|
)
|
||||||
|
if i < len(rects) and rects[i] != []:
|
||||||
|
for rect in rects[i]:
|
||||||
|
prect = pymupdf.Rect(
|
||||||
|
rect[0] * wfac,
|
||||||
|
rect[1] * hfac,
|
||||||
|
(rect[0] + rect[2]) * wfac,
|
||||||
|
(rect[1] + rect[3]) * hfac,
|
||||||
|
)
|
||||||
|
page.add_redact_annot(
|
||||||
|
prect,
|
||||||
|
fill=(0, 0, 0),
|
||||||
|
)
|
||||||
|
page.apply_redactions()
|
||||||
|
output.insert_pdf(page.parent, from_page=page.number, to_page=page.number)
|
||||||
# End of the costly part
|
# End of the costly part
|
||||||
print(f"Page {i + 1}/{npage}: CENSORING DONE")
|
print(f"Page {i + 1}/{npage}: CENSORING DONE")
|
||||||
output.save(destpath)
|
output.save(destpath)
|
||||||
|
|||||||
Reference in New Issue
Block a user