Compare commits
6 Commits
8704aee82e
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d5c390350 | ||
|
|
e13d06d4a7 | ||
|
|
547411ba03 | ||
|
|
cdd26e0bc3 | ||
|
|
f7c73a0c5a | ||
|
|
8e74848397 |
19
app/init.py
19
app/init.py
@@ -8,7 +8,7 @@ import os
|
||||
import json
|
||||
import mariadb
|
||||
import logging
|
||||
|
||||
from pathlib import Path
|
||||
import schedule
|
||||
import time
|
||||
import pytz
|
||||
@@ -24,7 +24,8 @@ CATEGORIES = [
|
||||
]
|
||||
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
|
||||
unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug")
|
||||
|
||||
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
|
||||
FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
|
||||
log = logging.getLogger(__name__)
|
||||
logging.basicConfig(
|
||||
filename="init.log",
|
||||
@@ -79,6 +80,19 @@ except mariadb.OperationalError:
|
||||
db.commit()
|
||||
|
||||
|
||||
def remove_old_FIP_entrys():
|
||||
cur = db.cursor(dictionary=True)
|
||||
cur.execute(
|
||||
"SELECT id,filename FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24 "
|
||||
)
|
||||
files = cur.fetchall()
|
||||
info(f"Remove Files: {files}")
|
||||
for file in files:
|
||||
c.execute("DELETE FROM FIP WHERE id=?", (file["id"],))
|
||||
os.remove(FILES_IN_PROGRESS / file["filename"])
|
||||
db.commit()
|
||||
|
||||
|
||||
def get_dirstruct():
|
||||
# with open("app/pwfile.json", "r") as f:
|
||||
# cred = json.load(f)
|
||||
@@ -149,6 +163,7 @@ def get_dirstruct():
|
||||
(lid, pid, idx, subcat.name),
|
||||
)
|
||||
db.commit()
|
||||
remove_old_FIP_entrys()
|
||||
|
||||
|
||||
def link_prof(firstname, lastname, lid):
|
||||
|
||||
19
app/main.py
19
app/main.py
@@ -671,12 +671,31 @@ def censor_pdf_ocr(
|
||||
censor_status_datas[file_id]["done"] = False
|
||||
censor_status_update_events[file_id].set()
|
||||
# THis Costs us dearly
|
||||
try:
|
||||
bitmap = page.get_pixmap(dpi=400)
|
||||
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||
language="deu",
|
||||
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||
)
|
||||
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||
except RuntimeError as e:
|
||||
error(
|
||||
f"Error in OCR for document: {destpath}. Error: {e}. Falling back to standard mode."
|
||||
)
|
||||
if i < len(rects) and rects[i] != []:
|
||||
for rect in rects[i]:
|
||||
prect = pymupdf.Rect(
|
||||
rect[0] * wfac,
|
||||
rect[1] * hfac,
|
||||
(rect[0] + rect[2]) * wfac,
|
||||
(rect[1] + rect[3]) * hfac,
|
||||
)
|
||||
page.add_redact_annot(
|
||||
prect,
|
||||
fill=(0, 0, 0),
|
||||
)
|
||||
page.apply_redactions()
|
||||
output.insert_pdf(page.parent, from_page=page.number, to_page=page.number)
|
||||
# End of the costly part
|
||||
print(f"Page {i + 1}/{npage}: CENSORING DONE")
|
||||
output.save(destpath)
|
||||
|
||||
@@ -283,8 +283,9 @@ function submitPdf(eve) {
|
||||
submitForm(formdata);
|
||||
}
|
||||
async function submitForm(formData) {
|
||||
var updateEventSource = null;
|
||||
try {
|
||||
const updateEventSource = new EventSource("/get_censor_status/" + doc.fID);
|
||||
updateEventSource = new EventSource("/get_censor_status/" + doc.fID);
|
||||
|
||||
modal.style.display = "flex";
|
||||
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
|
||||
@@ -304,7 +305,9 @@ async function submitForm(formData) {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
if (updateEventSource !== null) {
|
||||
updateEventSource.close();
|
||||
}
|
||||
modal.style.display = "none";
|
||||
//let responseJSON=await response.json();
|
||||
if (response.ok) {
|
||||
|
||||
@@ -57,6 +57,7 @@ services:
|
||||
environment:
|
||||
ENTRY_COMMAND: python /python/init.py
|
||||
UNIZEUG_PATH: /unizeug
|
||||
APP_ROOT_PATH: /python
|
||||
DB_HOST: db
|
||||
DB_USER: app
|
||||
DB_PASSWORD: DBPassword
|
||||
|
||||
Reference in New Issue
Block a user