Compare commits

8 Commits

Author SHA1 Message Date
Marcel Gansfusz
6d5c390350 fixed error in compose.yml 2025-11-04 21:24:38 +01:00
Marcel Gansfusz
e13d06d4a7 fixed regular deletions of files 2025-11-04 19:16:59 +01:00
Marcel Gansfusz
547411ba03 made to kill fip entrys when 1d passes 2025-11-04 19:04:41 +01:00
Marcel Gansfusz
cdd26e0bc3 caught exception in trying to censor 2025-11-04 17:54:24 +01:00
Marcel Gansfusz
f7c73a0c5a fixed js 2025-11-04 17:17:13 +01:00
Marcel Gansfusz
8e74848397 fixed js 2025-11-04 17:13:18 +01:00
Marcel Gansfusz
8704aee82e fixed tesseract in Dockerfile 2025-11-04 16:56:24 +01:00
Marcel Gansfusz
594ac1fa00 updated censoring status logic 2025-11-04 16:55:11 +01:00
5 changed files with 58 additions and 14 deletions

View File

@@ -21,7 +21,8 @@ RUN apk add --no-cache \
tk-dev \ tk-dev \
tcl-dev \ tcl-dev \
libwebp-dev \ libwebp-dev \
tesseract-ocr tesseract-ocr \
tesseract-ocr-data-deu
RUN python -m ensurepip --upgrade RUN python -m ensurepip --upgrade
RUN pip install setuptools wheel RUN pip install setuptools wheel
RUN pip install -r requirements.txt RUN pip install -r requirements.txt

View File

@@ -8,7 +8,7 @@ import os
import json import json
import mariadb import mariadb
import logging import logging
from pathlib import Path
import schedule import schedule
import time import time
import pytz import pytz
@@ -24,7 +24,8 @@ CATEGORIES = [
] ]
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"] SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug") unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug")
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
logging.basicConfig( logging.basicConfig(
filename="init.log", filename="init.log",
@@ -79,6 +80,19 @@ except mariadb.OperationalError:
db.commit() db.commit()
def remove_old_FIP_entrys():
cur = db.cursor(dictionary=True)
cur.execute(
"SELECT id,filename FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24 "
)
files = cur.fetchall()
info(f"Remove Files: {files}")
for file in files:
c.execute("DELETE FROM FIP WHERE id=?", (file["id"],))
os.remove(FILES_IN_PROGRESS / file["filename"])
db.commit()
def get_dirstruct(): def get_dirstruct():
# with open("app/pwfile.json", "r") as f: # with open("app/pwfile.json", "r") as f:
# cred = json.load(f) # cred = json.load(f)
@@ -149,6 +163,7 @@ def get_dirstruct():
(lid, pid, idx, subcat.name), (lid, pid, idx, subcat.name),
) )
db.commit() db.commit()
remove_old_FIP_entrys()
def link_prof(firstname, lastname, lid): def link_prof(firstname, lastname, lid):

View File

@@ -671,12 +671,31 @@ def censor_pdf_ocr(
censor_status_datas[file_id]["done"] = False censor_status_datas[file_id]["done"] = False
censor_status_update_events[file_id].set() censor_status_update_events[file_id].set()
# THis Costs us dearly # THis Costs us dearly
try:
bitmap = page.get_pixmap(dpi=400) bitmap = page.get_pixmap(dpi=400)
pdf_bytes = bitmap.pdfocr_tobytes( pdf_bytes = bitmap.pdfocr_tobytes(
language="deu", language="deu",
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
) )
output.insert_pdf(pymupdf.Document(stream=pdf_bytes)) output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
except RuntimeError as e:
error(
f"Error in OCR for document: {destpath}. Error: {e}. Falling back to standard mode."
)
if i < len(rects) and rects[i] != []:
for rect in rects[i]:
prect = pymupdf.Rect(
rect[0] * wfac,
rect[1] * hfac,
(rect[0] + rect[2]) * wfac,
(rect[1] + rect[3]) * hfac,
)
page.add_redact_annot(
prect,
fill=(0, 0, 0),
)
page.apply_redactions()
output.insert_pdf(page.parent, from_page=page.number, to_page=page.number)
# End of the costly part # End of the costly part
print(f"Page {i + 1}/{npage}: CENSORING DONE") print(f"Page {i + 1}/{npage}: CENSORING DONE")
output.save(destpath) output.save(destpath)

View File

@@ -283,10 +283,10 @@ function submitPdf(eve) {
submitForm(formdata); submitForm(formdata);
} }
async function submitForm(formData) { async function submitForm(formData) {
var updateEventSource = null;
try { try {
const updateEventSource = new EventSource( updateEventSource = new EventSource("/get_censor_status/" + doc.fID);
window.location + "get_censor_status/" + doc.fID,
);
modal.style.display = "flex"; modal.style.display = "flex";
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID); // console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
updateEventSource.addEventListener("censorUpdate", function(eve) { updateEventSource.addEventListener("censorUpdate", function(eve) {
@@ -295,11 +295,19 @@ async function submitForm(formData) {
upload_status.innerText = upload_status.innerText =
"Censoring Page " + data.page + "/" + data.pages; "Censoring Page " + data.page + "/" + data.pages;
}); });
} catch {
console.error(
"Error geting eventsource for updating censoring page count: " + error,
);
}
try {
const response = await fetch("/submit/", { const response = await fetch("/submit/", {
method: "POST", method: "POST",
body: formData, body: formData,
}); });
if (updateEventSource !== null) {
updateEventSource.close(); updateEventSource.close();
}
modal.style.display = "none"; modal.style.display = "none";
//let responseJSON=await response.json(); //let responseJSON=await response.json();
if (response.ok) { if (response.ok) {
@@ -320,7 +328,7 @@ async function submitForm(formData) {
window.alert("Error: " + (await response.json())["detail"]); window.alert("Error: " + (await response.json())["detail"]);
} }
} catch (error) { } catch (error) {
console.error("Error" + error); console.error("Error submitting: " + error);
} }
} }
function uploadPdf(eve) { function uploadPdf(eve) {

View File

@@ -57,6 +57,7 @@ services:
environment: environment:
ENTRY_COMMAND: python /python/init.py ENTRY_COMMAND: python /python/init.py
UNIZEUG_PATH: /unizeug UNIZEUG_PATH: /unizeug
APP_ROOT_PATH: /python
DB_HOST: db DB_HOST: db
DB_USER: app DB_USER: app
DB_PASSWORD: DBPassword DB_PASSWORD: DBPassword