Compare commits
18 Commits
e6727daf8e
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d5c390350 | ||
|
|
e13d06d4a7 | ||
|
|
547411ba03 | ||
|
|
cdd26e0bc3 | ||
|
|
f7c73a0c5a | ||
|
|
8e74848397 | ||
|
|
8704aee82e | ||
|
|
594ac1fa00 | ||
|
|
2ee90cd0d7 | ||
|
|
d42bab5b19 | ||
|
|
c3a87ceee6 | ||
|
|
6f2d373292 | ||
|
|
a37206d6a4 | ||
|
|
6bd75bf93f | ||
|
|
5bc24a32d5 | ||
|
|
a9233926e5 | ||
|
|
90235d2788 | ||
|
|
da316a9351 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -7,3 +7,5 @@ init.log
|
||||
app/__pycache__/
|
||||
mariadb/*
|
||||
unizeug
|
||||
.mypy_cache
|
||||
.nvim
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
# remote_path="/srv/http/"
|
||||
# remote_path="dev@10.0.0.25:/var/www/html/"
|
||||
@@ -20,7 +20,9 @@ RUN apk add --no-cache \
|
||||
tiff-dev \
|
||||
tk-dev \
|
||||
tcl-dev \
|
||||
libwebp-dev
|
||||
libwebp-dev \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-data-deu
|
||||
RUN python -m ensurepip --upgrade
|
||||
RUN pip install setuptools wheel
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 89 KiB After Width: | Height: | Size: 103 KiB |
19
app/init.py
19
app/init.py
@@ -8,7 +8,7 @@ import os
|
||||
import json
|
||||
import mariadb
|
||||
import logging
|
||||
|
||||
from pathlib import Path
|
||||
import schedule
|
||||
import time
|
||||
import pytz
|
||||
@@ -24,7 +24,8 @@ CATEGORIES = [
|
||||
]
|
||||
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
|
||||
unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug")
|
||||
|
||||
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
|
||||
FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
|
||||
log = logging.getLogger(__name__)
|
||||
logging.basicConfig(
|
||||
filename="init.log",
|
||||
@@ -79,6 +80,19 @@ except mariadb.OperationalError:
|
||||
db.commit()
|
||||
|
||||
|
||||
def remove_old_FIP_entrys():
|
||||
cur = db.cursor(dictionary=True)
|
||||
cur.execute(
|
||||
"SELECT id,filename FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24 "
|
||||
)
|
||||
files = cur.fetchall()
|
||||
info(f"Remove Files: {files}")
|
||||
for file in files:
|
||||
c.execute("DELETE FROM FIP WHERE id=?", (file["id"],))
|
||||
os.remove(FILES_IN_PROGRESS / file["filename"])
|
||||
db.commit()
|
||||
|
||||
|
||||
def get_dirstruct():
|
||||
# with open("app/pwfile.json", "r") as f:
|
||||
# cred = json.load(f)
|
||||
@@ -149,6 +163,7 @@ def get_dirstruct():
|
||||
(lid, pid, idx, subcat.name),
|
||||
)
|
||||
db.commit()
|
||||
remove_old_FIP_entrys()
|
||||
|
||||
|
||||
def link_prof(firstname, lastname, lid):
|
||||
|
||||
96
app/main.py
96
app/main.py
@@ -3,7 +3,7 @@ from typing import List, Dict, Tuple, Sequence
|
||||
|
||||
from starlette.responses import StreamingResponse
|
||||
from annotated_types import IsDigit
|
||||
from fastapi import FastAPI, File, HTTPException, Path, UploadFile, Request, Form
|
||||
from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
# import multiprocessing
|
||||
@@ -21,7 +21,9 @@ import re
|
||||
|
||||
|
||||
import os
|
||||
import signal
|
||||
import mariadb
|
||||
import sys
|
||||
|
||||
import filetype
|
||||
|
||||
@@ -36,11 +38,19 @@ log = logging.getLogger(__name__)
|
||||
logging.basicConfig(
|
||||
filename=os.environ.get("APP_LOG_PATH"),
|
||||
level=logging.INFO,
|
||||
format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s() ]%(levelname)s: %(message)s",
|
||||
format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s()] %(levelname)s: %(message)s",
|
||||
)
|
||||
debug = log.debug
|
||||
info = log.info
|
||||
error = log.error
|
||||
critical = log.critical
|
||||
|
||||
|
||||
def exception_handler(etype, value, tb):
|
||||
log.exception(f"Uncought Exception: {value}")
|
||||
|
||||
|
||||
sys.excepthook = exception_handler
|
||||
|
||||
db = mariadb.connect(
|
||||
host=os.environ.get("DB_HOST", "db"),
|
||||
@@ -117,12 +127,40 @@ def _sql_quarry(
|
||||
)
|
||||
|
||||
|
||||
def sql_connector_is_active(connector: mariadb.Connection) -> bool:
|
||||
try:
|
||||
connector.ping()
|
||||
except mariadb.Error as e:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def sql_connect(connector: mariadb.Connection) -> mariadb.Connection:
|
||||
try:
|
||||
connector = mariadb.connect(
|
||||
host=os.environ.get("DB_HOST", "db"),
|
||||
user=os.environ.get("DB_USER", "user"),
|
||||
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
|
||||
database=os.environ.get("DB_DATABASE", "Unizeug"),
|
||||
)
|
||||
except mariadb.Error as e:
|
||||
critical(
|
||||
f"Cannot reconnect to Database {os.environ.get('DB_DATABASE', 'Unizeug')} on {os.environ.get('DB_HOST', 'db')}. Got Mariadb Error: {e}"
|
||||
)
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
raise HTTPException(500, detail="Database failed")
|
||||
return connector
|
||||
|
||||
|
||||
def sql(
|
||||
querry: str,
|
||||
data: Tuple[str | int, ...] | str | int = (),
|
||||
return_result: bool = True,
|
||||
commit: bool = False,
|
||||
) -> List[Tuple]:
|
||||
global db
|
||||
if not sql_connector_is_active(db):
|
||||
db = sql_connect(db)
|
||||
cur = db.cursor(dictionary=False)
|
||||
return _sql_quarry(cur, querry, data, return_result, commit)
|
||||
|
||||
@@ -133,6 +171,10 @@ def sqlT(
|
||||
return_result: bool = True,
|
||||
commit: bool = False,
|
||||
) -> List[Dict]:
|
||||
global db
|
||||
if not sql_connector_is_active(db):
|
||||
db = sql_connect(db)
|
||||
|
||||
cur = db.cursor(dictionary=True)
|
||||
return _sql_quarry(cur, querry, data, return_result, commit)
|
||||
|
||||
@@ -193,7 +235,7 @@ async def get_file(file_id: str):
|
||||
# status_code=500, detail="Somethings wrong with the database"
|
||||
# )
|
||||
# filename = cur.fetchone()[0]
|
||||
return FileResponse(FILES_IN_PROGRESS + filename)
|
||||
return FileResponse(FILES_IN_PROGRESS / filename)
|
||||
|
||||
|
||||
@app.get("/search/lva")
|
||||
@@ -445,7 +487,7 @@ async def get_submission(
|
||||
f"lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
|
||||
)
|
||||
info(
|
||||
f"lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
|
||||
f"Got Submission: lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
|
||||
)
|
||||
rects_p = json.loads(rects)
|
||||
scales_p = json.loads(pagescales)
|
||||
@@ -534,8 +576,8 @@ async def yield_censor_status(file_id: str):
|
||||
|
||||
|
||||
def censor_pdf(
|
||||
path: str,
|
||||
destpath: str,
|
||||
path: os.PathLike,
|
||||
destpath: os.PathLike,
|
||||
rects: List[List[List[float]]],
|
||||
scales: List[Dict[str, float]],
|
||||
file_id: str,
|
||||
@@ -550,6 +592,7 @@ def censor_pdf(
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
info(f"started Censoring for file {path} to be saved to {destpath}")
|
||||
doc = pymupdf.open(path)
|
||||
page = doc[0]
|
||||
npage = doc.page_count
|
||||
@@ -582,8 +625,8 @@ def censor_pdf(
|
||||
|
||||
|
||||
def censor_pdf_ocr(
|
||||
path: str,
|
||||
destpath: str,
|
||||
path: os.PathLike,
|
||||
destpath: os.PathLike,
|
||||
rects: List[List[List[float]]],
|
||||
scales: List[Dict[str, float]],
|
||||
file_id: str,
|
||||
@@ -600,6 +643,7 @@ def censor_pdf_ocr(
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
info(f"started Censoring in OCR Mode for file {path} to be saved to {destpath}")
|
||||
doc = pymupdf.open(path)
|
||||
output = pymupdf.open()
|
||||
page = doc[0]
|
||||
@@ -627,12 +671,31 @@ def censor_pdf_ocr(
|
||||
censor_status_datas[file_id]["done"] = False
|
||||
censor_status_update_events[file_id].set()
|
||||
# THis Costs us dearly
|
||||
try:
|
||||
bitmap = page.get_pixmap(dpi=400)
|
||||
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||
language="deu",
|
||||
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||
)
|
||||
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||
except RuntimeError as e:
|
||||
error(
|
||||
f"Error in OCR for document: {destpath}. Error: {e}. Falling back to standard mode."
|
||||
)
|
||||
if i < len(rects) and rects[i] != []:
|
||||
for rect in rects[i]:
|
||||
prect = pymupdf.Rect(
|
||||
rect[0] * wfac,
|
||||
rect[1] * hfac,
|
||||
(rect[0] + rect[2]) * wfac,
|
||||
(rect[1] + rect[3]) * hfac,
|
||||
)
|
||||
page.add_redact_annot(
|
||||
prect,
|
||||
fill=(0, 0, 0),
|
||||
)
|
||||
page.apply_redactions()
|
||||
output.insert_pdf(page.parent, from_page=page.number, to_page=page.number)
|
||||
# End of the costly part
|
||||
print(f"Page {i + 1}/{npage}: CENSORING DONE")
|
||||
output.save(destpath)
|
||||
@@ -691,15 +754,16 @@ def make_savepath(
|
||||
ftype: str,
|
||||
) -> os.PathLike:
|
||||
"""Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories."""
|
||||
info(f"Started to make Savepath for '{fname}' in '{lva}' with prof '{prof}'.")
|
||||
lv = get_lvpath(lva)
|
||||
lvpath = lv[1] + "/"
|
||||
lvpath = Path(lv[1])
|
||||
pf = get_profpath(prof, lv[0])
|
||||
pfpath = pf[1] + "/"
|
||||
catpath = CATEGORIES[int(cat)] + "/"
|
||||
scpath = ""
|
||||
pfpath = Path(pf[1])
|
||||
catpath = Path(CATEGORIES[int(cat)])
|
||||
scpath: str | os.PathLike = ""
|
||||
if int(cat) in SUBCAT_CATEGORIES_I and subcat != "":
|
||||
sc = get_subcatpath(subcat, int(cat), pf[0], lv[0])
|
||||
scpath = sc[1] + "/"
|
||||
scpath = Path(sc[1])
|
||||
if int(cat) == 6:
|
||||
savepath = UNIZEUG_PATH / (lv[1] + "_Multimedia_only/") / pfpath
|
||||
else:
|
||||
@@ -724,10 +788,12 @@ def make_savepath(
|
||||
destpath = savepath / file
|
||||
i = 0
|
||||
while destpath.is_file():
|
||||
info(f"{destpath} already exists.")
|
||||
file = filename + f"_{i}." + ftype
|
||||
i += 1
|
||||
destpath = savepath / file
|
||||
destpath.touch()
|
||||
info(f"Path for file to be saved generated as: {savepath / file}")
|
||||
return savepath / file
|
||||
|
||||
|
||||
@@ -958,7 +1024,7 @@ async def remove_old_FIP_entrys():
|
||||
# return_result=False,
|
||||
# )
|
||||
db.commit()
|
||||
return FileResponse(APP_ROOT_PATH / "/index.html")
|
||||
return FileResponse(APP_ROOT_PATH / "index.html")
|
||||
|
||||
|
||||
def delete_from_FIP(uuid: str):
|
||||
@@ -966,4 +1032,4 @@ def delete_from_FIP(uuid: str):
|
||||
if len(res) < 1:
|
||||
raise HTTPException(500, "I am trying to delete a file that dose not exist")
|
||||
sql("DELETE FROM FIP WHERE id=?", (uuid,), return_result=False, commit=True)
|
||||
os.remove(FILES_IN_PROGRESS + res[0]["filename"])
|
||||
os.remove(FILES_IN_PROGRESS / res[0]["filename"])
|
||||
|
||||
@@ -283,10 +283,10 @@ function submitPdf(eve) {
|
||||
submitForm(formdata);
|
||||
}
|
||||
async function submitForm(formData) {
|
||||
var updateEventSource = null;
|
||||
try {
|
||||
const updateEventSource = new EventSource(
|
||||
window.location + "get_censor_status/" + doc.fID,
|
||||
);
|
||||
updateEventSource = new EventSource("/get_censor_status/" + doc.fID);
|
||||
|
||||
modal.style.display = "flex";
|
||||
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
|
||||
updateEventSource.addEventListener("censorUpdate", function(eve) {
|
||||
@@ -295,11 +295,19 @@ async function submitForm(formData) {
|
||||
upload_status.innerText =
|
||||
"Censoring Page " + data.page + "/" + data.pages;
|
||||
});
|
||||
const response = await fetch(window.location + "submit", {
|
||||
} catch {
|
||||
console.error(
|
||||
"Error geting eventsource for updating censoring page count: " + error,
|
||||
);
|
||||
}
|
||||
try {
|
||||
const response = await fetch("/submit/", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
if (updateEventSource !== null) {
|
||||
updateEventSource.close();
|
||||
}
|
||||
modal.style.display = "none";
|
||||
//let responseJSON=await response.json();
|
||||
if (response.ok) {
|
||||
@@ -320,7 +328,7 @@ async function submitForm(formData) {
|
||||
window.alert("Error: " + (await response.json())["detail"]);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error" + error);
|
||||
console.error("Error submitting: " + error);
|
||||
}
|
||||
}
|
||||
function uploadPdf(eve) {
|
||||
@@ -338,7 +346,7 @@ function uploadPdf(eve) {
|
||||
}
|
||||
async function uploadFile(formData) {
|
||||
try {
|
||||
const response = await fetch(window.location + "uploadfile", {
|
||||
const response = await fetch("/uploadfile/", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
var url = window.location + "search/";
|
||||
var url = "/search/";
|
||||
var lid = null;
|
||||
var pid = null;
|
||||
var activeAutocompletion = null;
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
version: "3"
|
||||
services:
|
||||
app:
|
||||
container_name: python-app
|
||||
@@ -58,6 +57,7 @@ services:
|
||||
environment:
|
||||
ENTRY_COMMAND: python /python/init.py
|
||||
UNIZEUG_PATH: /unizeug
|
||||
APP_ROOT_PATH: /python
|
||||
DB_HOST: db
|
||||
DB_USER: app
|
||||
DB_PASSWORD: DBPassword
|
||||
|
||||
Reference in New Issue
Block a user