Compare commits

27 Commits

Author SHA1 Message Date
Marcel Gansfusz
6d5c390350 fixed error in compose.yml 2025-11-04 21:24:38 +01:00
Marcel Gansfusz
e13d06d4a7 fixed regular deletions of files 2025-11-04 19:16:59 +01:00
Marcel Gansfusz
547411ba03 made to kill fip entrys when 1d passes 2025-11-04 19:04:41 +01:00
Marcel Gansfusz
cdd26e0bc3 caught exception in trying to censor 2025-11-04 17:54:24 +01:00
Marcel Gansfusz
f7c73a0c5a fixed js 2025-11-04 17:17:13 +01:00
Marcel Gansfusz
8e74848397 fixed js 2025-11-04 17:13:18 +01:00
Marcel Gansfusz
8704aee82e fixed tesseract in Dockerfile 2025-11-04 16:56:24 +01:00
Marcel Gansfusz
594ac1fa00 updated censoring status logic 2025-11-04 16:55:11 +01:00
Marcel Gansfusz
2ee90cd0d7 added tessercat to Dockerfile 2025-11-04 15:45:37 +01:00
Marcel Gansfusz
d42bab5b19 changed the fetch in js to be relative (no explicite url; just a path); removed version from docker compose 2025-11-04 14:55:04 +01:00
Marcel Gansfusz
c3a87ceee6 changed stryle of greeting file 2025-10-31 17:48:40 +01:00
Marcel Gansfusz
6f2d373292 updated greeting file to represent new censoring mechanism 2025-10-31 16:18:43 +01:00
Marcel Gansfusz
a37206d6a4 added logging statement 2025-10-30 15:48:51 +01:00
Marcel Gansfusz
6bd75bf93f removed .nvim; added log statements 2025-10-30 15:31:00 +01:00
Marcel Gansfusz
5bc24a32d5 removed __pycache__ 2025-10-30 15:09:16 +01:00
Marcel Gansfusz
a9233926e5 added logging statements 2025-10-30 14:45:53 +01:00
Marcel Gansfusz
90235d2788 Made the database reconnect when connection is broken 2025-10-30 13:03:02 +01:00
Marcel Gansfusz
da316a9351 changed from string paths tp pathlib 2025-10-29 12:14:32 +01:00
Marcel Gansfusz
e6727daf8e i forgor 2025-10-28 19:32:33 +01:00
Marcel Gansfusz
d6508c739d in between state before converting to pathlib 2025-10-28 19:32:01 +01:00
Marcel Gansfusz
856c401c06 moved DOCKERFILE to Dockerfile 2025-10-27 18:17:43 +01:00
Marcel Gansfusz
4da77c95d1 finished writeing compatibility with docker; untested 2025-10-24 21:19:36 +02:00
Marcel Gansfusz
98742107b2 changed structure for docker usage 2025-10-24 21:02:42 +02:00
b9eb5e8bd4 Merge pull request 'improve_censoring_speed' (#1) from improve_censoring_speed into main
Reviewed-on: #1
2025-10-23 15:43:40 +02:00
Marcel Gansfusz
5c6a8dfba2 fixed bug in js, that blocked showing prof suggestions when nothing is entered in the field 2025-10-23 15:40:45 +02:00
Marcel Gansfusz
c30d69d205 added back option to run OCR 2025-10-23 00:06:25 +02:00
Marcel Gansfusz
56d3468889 changed the censoring mode to built in censoring with pymupdf 2025-10-22 23:26:33 +02:00
26 changed files with 845 additions and 192 deletions

4
.gitignore vendored
View File

@@ -5,3 +5,7 @@ app/dest
app.log
init.log
app/__pycache__/
mariadb/*
unizeug
.mypy_cache
.nvim

View File

@@ -1,2 +0,0 @@
# remote_path="/srv/http/"
# remote_path="dev@10.0.0.25:/var/www/html/"

31
Dockerfile Normal file
View File

@@ -0,0 +1,31 @@
FROM python:3.13-rc-alpine
WORKDIR /usr/src/
COPY requirements.txt /usr/src/requirements.txt
COPY entrypoint.sh /usr/src/entrypoint.sh
RUN apk add --no-cache \
gcc \
g++ \
musl-dev \
python3-dev \
libffi-dev \
openssl-dev \
cargo \
make \
mariadb-connector-c-dev \
jpeg-dev \
zlib-dev \
freetype-dev \
lcms2-dev \
openjpeg-dev \
tiff-dev \
tk-dev \
tcl-dev \
libwebp-dev \
tesseract-ocr \
tesseract-ocr-data-deu
RUN python -m ensurepip --upgrade
RUN pip install setuptools wheel
RUN pip install -r requirements.txt
WORKDIR /python
CMD /bin/sh /usr/src/entrypoint.sh
# ENTRYPOINT ["/usr/src/entrypoint.sh"]

Binary file not shown.

Binary file not shown.

View File

Before

Width:  |  Height:  |  Size: 7.4 KiB

After

Width:  |  Height:  |  Size: 7.4 KiB

View File

Before

Width:  |  Height:  |  Size: 4.2 KiB

After

Width:  |  Height:  |  Size: 4.2 KiB

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 78 KiB

View File

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 89 KiB

After

Width:  |  Height:  |  Size: 103 KiB

View File

@@ -134,11 +134,10 @@
</div>
<input
type="checkbox"
name="censor"
name="ocr"
id="sec_censor"
value="True"
checked
/><label for="sec_censor">Zensieren</label><br /><br />
/><label for="sec_censor">OCR</label><br /><br />
<button type="submit" id="send">Senden</button>
</form>
</div>

View File

@@ -2,11 +2,16 @@ from os.path import isdir
from stat import S_ISDIR, S_ISREG
import re
import pathlib
import os
# from base64 import decodebytes
import json
import mariadb
import logging
from pathlib import Path
import schedule
import time
import pytz
CATEGORIES = [
"Prüfungen",
@@ -18,8 +23,9 @@ CATEGORIES = [
"Multimedia",
]
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
unizeug_path = "/home/wildarch/web/fet_unizeug/unizeug/"
unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug")
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
log = logging.getLogger(__name__)
logging.basicConfig(
filename="init.log",
@@ -31,7 +37,10 @@ info = log.info
error = log.error
db = mariadb.connect(
host="localhost", user="wildserver", password="DBPassword", database="Unizeug"
host=os.environ.get("DB_HOST", "db"),
user=os.environ.get("DB_USER", "user"),
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
database=os.environ.get("DB_DATABASE", "unizeug"),
)
c = db.cursor()
try:
@@ -62,12 +71,31 @@ except mariadb.OperationalError:
c.execute(
"CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))"
)
try:
c.execute(
"CREATE TABLE FIP(id UUID DEFAULT(UUID()), filename VARCHAR(256), filetype VARCHAR(8),initTimeStamp DATETIME, PRIMARY KEY(id))"
)
except mariadb.OperationalError:
pass
db.commit()
def remove_old_FIP_entrys():
cur = db.cursor(dictionary=True)
cur.execute(
"SELECT id,filename FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24 "
)
files = cur.fetchall()
info(f"Remove Files: {files}")
for file in files:
c.execute("DELETE FROM FIP WHERE id=?", (file["id"],))
os.remove(FILES_IN_PROGRESS / file["filename"])
db.commit()
def get_dirstruct():
# with open("app/pwfile.json", "r") as f:
# cred = json.load(f)
# cred = json.load(f)
# ssh = paramiko.SSHClient()
# print(cred["sftpurl"])
# ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
@@ -76,7 +104,7 @@ def get_dirstruct():
# ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"])
# sftp = ssh.open_sftp()
# folders = sftp.listdir_attr(unizeug_path)
folders=pathlib.Path(unizeug_path)
folders = pathlib.Path(unizeug_path)
for entry in folders.iterdir():
if entry is None:
continue
@@ -135,6 +163,7 @@ def get_dirstruct():
(lid, pid, idx, subcat.name),
)
db.commit()
remove_old_FIP_entrys()
def link_prof(firstname, lastname, lid):
@@ -161,3 +190,8 @@ def link_prof(firstname, lastname, lid):
if __name__ == "__main__":
get_dirstruct()
info("Database updated")
schedule.every().day.at("04:00", "Europe/Vienna").do(get_dirstruct)
while True:
schedule.run_pending()
time.sleep(1)

View File

@@ -14,35 +14,49 @@ import asyncio
# import fastapi
from fastapi.staticfiles import StaticFiles
import pymupdf
# import fitz as pymupdf
import json
import re
import os
import signal
import mariadb
import sys
import filetype
import datetime
import logging
import inspect
import pathlib
from pathlib import Path
from starlette.types import HTTPExceptionHandler
log = logging.getLogger(__name__)
logging.basicConfig(
filename="app.log",
filename=os.environ.get("APP_LOG_PATH"),
level=logging.INFO,
format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s() ]%(levelname)s: %(message)s",
format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s()] %(levelname)s: %(message)s",
)
debug = log.debug
info = log.info
error = log.error
critical = log.critical
def exception_handler(etype, value, tb):
log.exception(f"Uncought Exception: {value}")
sys.excepthook = exception_handler
db = mariadb.connect(
host="localhost", user="wildserver", password="DBPassword", database="Unizeug"
host=os.environ.get("DB_HOST", "db"),
user=os.environ.get("DB_USER", "user"),
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
database=os.environ.get("DB_DATABASE", "unizeug"),
)
info("App Started")
@@ -51,8 +65,6 @@ info("App Started")
# startup()
app = FastAPI()
app.mount("/favicon", StaticFiles(directory="./favicon"), name="favicon")
app.mount("/static", StaticFiles(directory="./static"), name="static")
CATEGORIES = [
@@ -64,15 +76,18 @@ CATEGORIES = [
"Zusammenfassungen",
"Multimedia",
]
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
SUBCAT_CATEGORIES_I = [1, 2, 3]
EX_DATE_CATEGORIES = ["Prüfungen", "Klausuren"]
EX_DATE_CATEGORIES_I = [0, 1]
UNIZEUG_PATH = "./app/dest/"
FILES_IN_PROGRESS = "./app/files/"
EMPTYFILE = "./app/graphics/empty.pdf"
UNSUPPORTEDFILE = "./app/graphics/unsupported.pdf"
GREETINGFILE = "./app/graphics/greeting.pdf"
UNIZEUG_PATH = Path(os.environ.get("UNIZEUG_PATH", "./app/dest"))
FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
EMPTYFILE = APP_ROOT_PATH / "graphics/empty.pdf"
UNSUPPORTEDFILE = APP_ROOT_PATH / "graphics/unsupported.pdf"
GREETINGFILE = APP_ROOT_PATH / "graphics/greeting.pdf"
FAVICON = APP_ROOT_PATH / "favicon"
STATIC_FILES = APP_ROOT_PATH / "static"
# cur = db.cursor()
@@ -112,12 +127,40 @@ def _sql_quarry(
)
def sql_connector_is_active(connector: mariadb.Connection) -> bool:
try:
connector.ping()
except mariadb.Error as e:
return False
return True
def sql_connect(connector: mariadb.Connection) -> mariadb.Connection:
try:
connector = mariadb.connect(
host=os.environ.get("DB_HOST", "db"),
user=os.environ.get("DB_USER", "user"),
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
database=os.environ.get("DB_DATABASE", "Unizeug"),
)
except mariadb.Error as e:
critical(
f"Cannot reconnect to Database {os.environ.get('DB_DATABASE', 'Unizeug')} on {os.environ.get('DB_HOST', 'db')}. Got Mariadb Error: {e}"
)
os.kill(os.getpid(), signal.SIGTERM)
raise HTTPException(500, detail="Database failed")
return connector
def sql(
querry: str,
data: Tuple[str | int, ...] | str | int = (),
return_result: bool = True,
commit: bool = False,
) -> List[Tuple]:
global db
if not sql_connector_is_active(db):
db = sql_connect(db)
cur = db.cursor(dictionary=False)
return _sql_quarry(cur, querry, data, return_result, commit)
@@ -128,6 +171,10 @@ def sqlT(
return_result: bool = True,
commit: bool = False,
) -> List[Dict]:
global db
if not sql_connector_is_active(db):
db = sql_connect(db)
cur = db.cursor(dictionary=True)
return _sql_quarry(cur, querry, data, return_result, commit)
@@ -146,10 +193,22 @@ def sqlT(
# )
app.mount(
"/favicon",
StaticFiles(directory=os.environ.get("FAVICON_PATH", FAVICON)),
name="favicon",
)
app.mount(
"/static",
StaticFiles(directory=os.environ.get("STATIC_PATH", STATIC_FILES)),
name="static",
)
@app.get("/")
async def get_index():
"""gives the Index.html file"""
return FileResponse("./index.html")
return FileResponse(APP_ROOT_PATH / "index.html")
@app.get("/files/{file_id}")
@@ -176,7 +235,7 @@ async def get_file(file_id: str):
# status_code=500, detail="Somethings wrong with the database"
# )
# filename = cur.fetchone()[0]
return FileResponse(FILES_IN_PROGRESS + filename)
return FileResponse(FILES_IN_PROGRESS / filename)
@app.get("/search/lva")
@@ -222,6 +281,9 @@ async def search_lva(
)
# res += cur.fetchall()
res = remove_duplicates(res + zw)
info(
f"LVA Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0:
return res
else:
@@ -256,6 +318,9 @@ async def search_profs(
)
# res += cur.fetchall()
res = remove_duplicates(res + zw)
info(
f"Prof Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0:
return res
else:
@@ -296,6 +361,9 @@ async def search_subcats(
)
# res += cur.fetchall()
res = remove_duplicates(res + rest)
info(
f"Subcatrgory Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0:
return res
else:
@@ -352,7 +420,7 @@ async def create_upload_file(files: List[UploadFile], c2pdf: bool = True):
content = doc.tobytes()
if ft != "dir":
filename = make_filename_unique(filename)
locpath = FILES_IN_PROGRESS + filename
locpath = FILES_IN_PROGRESS / filename
# locpaths.append(locpath)
# cur = db.cursor()
# try:
@@ -412,14 +480,14 @@ async def get_submission(
pagescales: Annotated[
str, Form()
], # Scales of Pages # Annotated[List[Dict[str, float]], Form()],
censor: Annotated[str, Form()],
ocr: Annotated[str, Form()],
):
"""handles submission"""
print(
f"lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, censor: {censor}"
f"lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
)
info(
f"lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, censor: {censor}"
f"Got Submission: lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
)
rects_p = json.loads(rects)
scales_p = json.loads(pagescales)
@@ -436,7 +504,7 @@ async def get_submission(
error(f"User tried to upload a file without specifying the {th[1]}")
raise HTTPException(400, f"You need to specify a {th[1]}")
filepath = "./app/files/" + res[0][0]
filepath = FILES_IN_PROGRESS / res[0][0]
# except mariadb.Error as e:
# print(f"Mariadb Error: {e}")
# raise HTTPException(
@@ -452,15 +520,24 @@ async def get_submission(
censor_status_datas[fileId] = {}
if fileId not in censor_status_update_events:
censor_status_update_events[fileId] = asyncio.Event()
await asyncio.to_thread(
censor_pdf,
filepath,
dest,
rects_p,
scales_p,
False if censor == "False" else True,
fileId,
)
if ocr == "True":
await asyncio.to_thread(
censor_pdf_ocr,
filepath,
dest,
rects_p,
scales_p,
fileId,
)
else:
await asyncio.to_thread(
censor_pdf,
filepath,
dest,
rects_p,
scales_p,
fileId,
)
# return {"done": "ok"}
# print(dest)
@@ -487,6 +564,7 @@ async def get_censor_status(file_id: str):
async def yield_censor_status(file_id: str):
"""Internal function to yield updates to the stream"""
while True:
await censor_status_update_events[file_id].wait()
censor_status_update_events[file_id].clear()
@@ -498,15 +576,63 @@ async def yield_censor_status(file_id: str):
def censor_pdf(
path: str,
destpath: str,
path: os.PathLike,
destpath: os.PathLike,
rects: List[List[List[float]]],
scales: List[Dict[str, float]],
file_id: str,
):
"""Censors pdf and saves the file to the given Destpath.
Args:
path: path to the pdf document
destpath: Path where the result is supposed to be saved to
rects: Coordinates of rectangles to be placed on the pdf document
scales: Scales of the rects coordinates for the pdf document
secure: weather or not the pdf document is supposed to be converted into an Image (and back) to make shure, the censoring is irreversible
Returns:
None
"""
info(f"started Censoring for file {path} to be saved to {destpath}")
doc = pymupdf.open(path)
page = doc[0]
npage = doc.page_count
for i in range(npage):
page = doc[i]
if i < len(rects) and rects[i] != []:
print(i)
wfac = page.rect.width / scales[i]["width"]
hfac = page.rect.height / scales[i]["height"]
for rect in rects[i]:
prect = pymupdf.Rect(
rect[0] * wfac,
rect[1] * hfac,
(rect[0] + rect[2]) * wfac,
(rect[1] + rect[3]) * hfac,
)
page.add_redact_annot(
prect,
fill=(0, 0, 0),
)
page.apply_redactions()
censor_status_datas[file_id]["page"] = i + 1
censor_status_datas[file_id]["pages"] = npage
censor_status_datas[file_id]["done"] = False
censor_status_update_events[file_id].set()
doc.set_metadata({})
doc.save(destpath, garbage=4, deflate=True, clean=True)
censor_status_datas[file_id]["done"] = True
censor_status_update_events[file_id].set()
def censor_pdf_ocr(
path: os.PathLike,
destpath: os.PathLike,
rects: List[List[List[float]]],
scales: List[Dict[str, float]],
secure: bool,
file_id: str,
):
"""Censors pdf and runs OCR
If Secure is True the file is converted to Pixels and then recreated; else the censored sections are just covering the text below and can be easiliy removed with e.g. Inkscape
The file is converted to Pixels and then recreated.
Saves the file to the given Destpath.
Args:
path: path to the pdf document
@@ -517,15 +643,11 @@ def censor_pdf(
Returns:
None
"""
info(f"started Censoring in OCR Mode for file {path} to be saved to {destpath}")
doc = pymupdf.open(path)
output = pymupdf.open()
page = doc[0]
# width = page.rect.width
# height = page.rect.height
# print(width, height)
npage = doc.page_count
# pages = []
# tasks = []
for i in range(npage):
page = doc[i]
if i < len(rects) and rects[i] != []:
@@ -544,49 +666,41 @@ def censor_pdf(
color=(0, 0, 0),
fill=(0, 0, 0),
)
if secure:
censor_status_datas[file_id]["page"] = i + 1
censor_status_datas[file_id]["pages"] = npage
censor_status_datas[file_id]["done"] = False
censor_status_update_events[file_id].set()
# pages.append(page)
# THis Costs us dearly
censor_status_datas[file_id]["page"] = i + 1
censor_status_datas[file_id]["pages"] = npage
censor_status_datas[file_id]["done"] = False
censor_status_update_events[file_id].set()
# THis Costs us dearly
try:
bitmap = page.get_pixmap(dpi=400)
pdf_bytes = bitmap.pdfocr_tobytes(
language="deu",
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
)
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
# End of the costly part
# tasks.append(asyncio.create_task(censor_page(page)))
print(f"Page {i + 1}/{npage}: CENSORING DONE")
else:
output.insert_pdf(doc, i, i)
# if secure:
# pages_bytes: List[bytes] = []
# censor_page(pages[0])
# with multiprocessing.Pool(npage) as p:
# pages_bytes = p.map(censor_page, pages)
# pages_bytes = p.map(test_function, [1, 2, 3, 4])
# for pdf_bytes in pages_bytes:
# output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
# with concurrent.futures.ThreadPoolExecutor() as executor:
# futures = []
# for page in pages:
# futures.append(executor.submit(censor_page, page))
# for future in futures:
# output.insert_pdf(pymupdf.Document(stream=future.result()))
#
# for task in tasks:
# output.insert_pdf(pymupdf.Document(stream=await task))
# print("CENSORING DONE")
except RuntimeError as e:
error(
f"Error in OCR for document: {destpath}. Error: {e}. Falling back to standard mode."
)
if i < len(rects) and rects[i] != []:
for rect in rects[i]:
prect = pymupdf.Rect(
rect[0] * wfac,
rect[1] * hfac,
(rect[0] + rect[2]) * wfac,
(rect[1] + rect[3]) * hfac,
)
page.add_redact_annot(
prect,
fill=(0, 0, 0),
)
page.apply_redactions()
output.insert_pdf(page.parent, from_page=page.number, to_page=page.number)
# End of the costly part
print(f"Page {i + 1}/{npage}: CENSORING DONE")
output.save(destpath)
if secure:
censor_status_datas[file_id]["done"] = True
censor_status_update_events[file_id].set()
# censor_finished_flags[file_id].set()
censor_status_datas[file_id]["done"] = True
censor_status_update_events[file_id].set()
def test_function(i: int) -> bytes:
@@ -638,21 +752,22 @@ def make_savepath(
ex_date: str,
fname: str,
ftype: str,
) -> str:
) -> os.PathLike:
"""Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories."""
info(f"Started to make Savepath for '{fname}' in '{lva}' with prof '{prof}'.")
lv = get_lvpath(lva)
lvpath = lv[1] + "/"
lvpath = Path(lv[1])
pf = get_profpath(prof, lv[0])
pfpath = pf[1] + "/"
catpath = CATEGORIES[int(cat)] + "/"
scpath = ""
pfpath = Path(pf[1])
catpath = Path(CATEGORIES[int(cat)])
scpath: str | os.PathLike = ""
if int(cat) in SUBCAT_CATEGORIES_I and subcat != "":
sc = get_subcatpath(subcat, int(cat), pf[0], lv[0])
scpath = sc[1] + "/"
scpath = Path(sc[1])
if int(cat) == 6:
savepath = UNIZEUG_PATH + lv[1] + "_Multimedia_only/" + pfpath
savepath = UNIZEUG_PATH / (lv[1] + "_Multimedia_only/") / pfpath
else:
savepath = UNIZEUG_PATH + lvpath + pfpath + catpath + scpath
savepath = UNIZEUG_PATH / lvpath / pfpath / catpath / scpath
os.makedirs(savepath, exist_ok=True)
filename = sem + "_"
if int(cat) in EX_DATE_CATEGORIES_I:
@@ -670,14 +785,16 @@ def make_savepath(
filename += fname
file = filename + "." + ftype
destpath = pathlib.Path(savepath + file)
destpath = savepath / file
i = 0
while destpath.is_file():
info(f"{destpath} already exists.")
file = filename + f"_{i}." + ftype
i += 1
destpath = pathlib.Path(savepath + file)
destpath = savepath / file
destpath.touch()
return savepath + file
info(f"Path for file to be saved generated as: {savepath / file}")
return savepath / file
def get_lvpath(lva: str) -> Tuple[int, str]:
@@ -870,10 +987,10 @@ async def save_files_to_folder(files: List[UploadFile]) -> str:
if filename == "":
filename = "None"
filename = make_filename_unique(filename)
os.mkdir(FILES_IN_PROGRESS + filename)
os.mkdir(FILES_IN_PROGRESS / filename)
for idx, file in enumerate(files):
fn = file.filename if file.filename is not None else "None" + str(idx)
with open(FILES_IN_PROGRESS + filename + "/" + fn, "wb") as f:
with open(FILES_IN_PROGRESS / filename / fn, "wb") as f:
f.write(await file.read())
return filename
@@ -901,13 +1018,13 @@ async def remove_old_FIP_entrys():
info(f"Remove Files: {files}")
for file in files:
sql("DELETE FROM FIP WHERE id=?", (file["id"]), return_result=False)
os.remove(FILES_IN_PROGRESS + file["filename"])
os.remove(FILES_IN_PROGRESS / file["filename"])
# sql(
# "DELETE FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24",
# return_result=False,
# )
db.commit()
return FileResponse("./index.html")
return FileResponse(APP_ROOT_PATH / "index.html")
def delete_from_FIP(uuid: str):
@@ -915,4 +1032,4 @@ def delete_from_FIP(uuid: str):
if len(res) < 1:
raise HTTPException(500, "I am trying to delete a file that dose not exist")
sql("DELETE FROM FIP WHERE id=?", (uuid,), return_result=False, commit=True)
os.remove(FILES_IN_PROGRESS + res[0]["filename"])
os.remove(FILES_IN_PROGRESS / res[0]["filename"])

View File

@@ -276,17 +276,17 @@ function submitPdf(eve) {
formdata.append("fileId", doc.fID);
//formdata.append("filename", doc.filename);
formdata.append("ftype", doc.filetype);
if (!formdata.has("censor")) {
formdata.append("censor", "False");
if (!formdata.has("ocr")) {
formdata.append("ocr", "False");
}
console.log(formdata);
submitForm(formdata);
}
async function submitForm(formData) {
var updateEventSource = null;
try {
const updateEventSource = new EventSource(
"http://127.0.0.1:8000/get_censor_status/" + doc.fID,
);
updateEventSource = new EventSource("/get_censor_status/" + doc.fID);
modal.style.display = "flex";
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
updateEventSource.addEventListener("censorUpdate", function(eve) {
@@ -295,11 +295,19 @@ async function submitForm(formData) {
upload_status.innerText =
"Censoring Page " + data.page + "/" + data.pages;
});
const response = await fetch("http://127.0.0.1:8000/submit", {
} catch {
console.error(
"Error geting eventsource for updating censoring page count: " + error,
);
}
try {
const response = await fetch("/submit/", {
method: "POST",
body: formData,
});
updateEventSource.close();
if (updateEventSource !== null) {
updateEventSource.close();
}
modal.style.display = "none";
//let responseJSON=await response.json();
if (response.ok) {
@@ -320,7 +328,7 @@ async function submitForm(formData) {
window.alert("Error: " + (await response.json())["detail"]);
}
} catch (error) {
console.error("Error" + error);
console.error("Error submitting: " + error);
}
}
function uploadPdf(eve) {
@@ -338,7 +346,7 @@ function uploadPdf(eve) {
}
async function uploadFile(formData) {
try {
const response = await fetch("http://127.0.0.1:8000/uploadfile", {
const response = await fetch("/uploadfile/", {
method: "POST",
body: formData,
});

View File

@@ -1,4 +1,4 @@
var url = "http://127.0.0.1:8000/search/";
var url = "/search/";
var lid = null;
var pid = null;
var activeAutocompletion = null;
@@ -21,7 +21,7 @@ function autocomplete(inp, type) {
i,
apirq,
iname,
val = this.value;
val = inp.value;
/*close any already open lists of autocompleted values*/
closeAllLists();
if (!val && type === "lva" && pid === null) {
@@ -56,7 +56,7 @@ function autocomplete(inp, type) {
a.setAttribute("id", this.id + "autocomplete-list");
a.setAttribute("class", "autocomplete-items");
/*append the DIV element as a child of the autocomplete container:*/
this.parentNode.appendChild(a);
inp.parentNode.appendChild(a);
/*for each item in the array...*/
//await response;
if (response.ok) {

67
compose.yml Normal file
View File

@@ -0,0 +1,67 @@
services:
app:
container_name: python-app
# command: python -m uvicorn app.main:app --host 0.0.0.0 --port 80
build:
context: .
dockerfile: Dockerfile
volumes:
- ./app:/python
- ./unizeug:/unizeug:source
ports:
- 80:80
restart: unless-stopped
environment:
ENTRY_COMMAND: python -m uvicorn main:app --host 0.0.0.0 --port 80
APP_LOG_PATH: /python/app.log
APP_ROOT_PATH: /python
UNIZEUG_PATH: /unizeug
DB_HOST: db
DB_USER: app
DB_PASSWORD: DBPassword
DB_DATABASE: Unizeug
TZ: "Europe/Vienna"
depends_on:
- db
- scaner
db:
container_name: db
image: mariadb
restart: unless-stopped
environment:
MARIADB_ROOT_PASSWORD: DBPassword
MARIADB_USER: app
UNIZEUG_PATH: /unizeug
MARIADB_PASSWORD: DBPassword
MARIADB_DATABASE: Unizeug
TZ: "Europe/Vienna"
healthcheck:
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 3
volumes:
- ./mariadb:/var/lib/mysql
scaner:
container_name: python-scaner
# command: python /python/init.py
build:
context: .
dockerfile: Dockerfile
volumes:
- ./app:/python
- ./unizeug:/unizeug:source
restart: unless-stopped
environment:
ENTRY_COMMAND: python /python/init.py
UNIZEUG_PATH: /unizeug
APP_ROOT_PATH: /python
DB_HOST: db
DB_USER: app
DB_PASSWORD: DBPassword
DB_DATABASE: Unizeug
TZ: "Europe/Vienna"
depends_on:
- db

4
entrypoint.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/sh
if [[ -n "$ENTRY_COMMAND" ]]; then
/bin/sh -c "$ENTRY_COMMAND"
fi

View File

@@ -44,10 +44,12 @@ pypdf==5.2.0
pytesseract==0.3.13
python-dotenv==1.0.1
python-multipart==0.0.20
pytz==2025.2
PyYAML==6.0.2
requests==2.32.3
rich==13.9.4
rich-toolkit==0.13.2
schedule==1.2.2
shellingham==1.5.4
sniffio==1.3.1
starlette==0.45.3