Compare commits

...

6 Commits

Author SHA1 Message Date
Marcel Gansfusz
e6727daf8e i forgor 2025-10-28 19:32:33 +01:00
Marcel Gansfusz
d6508c739d in between state before converting to pathlib 2025-10-28 19:32:01 +01:00
Marcel Gansfusz
856c401c06 moved DOCKERFILE to Dockerfile 2025-10-27 18:17:43 +01:00
Marcel Gansfusz
4da77c95d1 finished writeing compatibility with docker; untested 2025-10-24 21:19:36 +02:00
Marcel Gansfusz
98742107b2 changed structure for docker usage 2025-10-24 21:02:42 +02:00
b9eb5e8bd4 Merge pull request 'improve_censoring_speed' (#1) from improve_censoring_speed into main
Reviewed-on: #1
2025-10-23 15:43:40 +02:00
22 changed files with 180 additions and 33 deletions

2
.gitignore vendored
View File

@@ -5,3 +5,5 @@ app/dest
app.log app.log
init.log init.log
app/__pycache__/ app/__pycache__/
mariadb/*
unizeug

29
Dockerfile Normal file
View File

@@ -0,0 +1,29 @@
FROM python:3.13-rc-alpine
WORKDIR /usr/src/
COPY requirements.txt /usr/src/requirements.txt
COPY entrypoint.sh /usr/src/entrypoint.sh
RUN apk add --no-cache \
gcc \
g++ \
musl-dev \
python3-dev \
libffi-dev \
openssl-dev \
cargo \
make \
mariadb-connector-c-dev \
jpeg-dev \
zlib-dev \
freetype-dev \
lcms2-dev \
openjpeg-dev \
tiff-dev \
tk-dev \
tcl-dev \
libwebp-dev
RUN python -m ensurepip --upgrade
RUN pip install setuptools wheel
RUN pip install -r requirements.txt
WORKDIR /python
CMD /bin/sh /usr/src/entrypoint.sh
# ENTRYPOINT ["/usr/src/entrypoint.sh"]

Binary file not shown.

View File

Before

Width:  |  Height:  |  Size: 7.4 KiB

After

Width:  |  Height:  |  Size: 7.4 KiB

View File

Before

Width:  |  Height:  |  Size: 4.2 KiB

After

Width:  |  Height:  |  Size: 4.2 KiB

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 78 KiB

View File

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@@ -2,12 +2,17 @@ from os.path import isdir
from stat import S_ISDIR, S_ISREG from stat import S_ISDIR, S_ISREG
import re import re
import pathlib import pathlib
import os
# from base64 import decodebytes # from base64 import decodebytes
import json import json
import mariadb import mariadb
import logging import logging
import schedule
import time
import pytz
CATEGORIES = [ CATEGORIES = [
"Prüfungen", "Prüfungen",
"Klausuren", "Klausuren",
@@ -18,7 +23,7 @@ CATEGORIES = [
"Multimedia", "Multimedia",
] ]
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"] SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
unizeug_path = "/home/wildarch/web/fet_unizeug/unizeug/" unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug")
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
logging.basicConfig( logging.basicConfig(
@@ -31,7 +36,10 @@ info = log.info
error = log.error error = log.error
db = mariadb.connect( db = mariadb.connect(
host="localhost", user="wildserver", password="DBPassword", database="Unizeug" host=os.environ.get("DB_HOST", "db"),
user=os.environ.get("DB_USER", "user"),
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
database=os.environ.get("DB_DATABASE", "unizeug"),
) )
c = db.cursor() c = db.cursor()
try: try:
@@ -62,6 +70,12 @@ except mariadb.OperationalError:
c.execute( c.execute(
"CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))" "CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))"
) )
try:
c.execute(
"CREATE TABLE FIP(id UUID DEFAULT(UUID()), filename VARCHAR(256), filetype VARCHAR(8),initTimeStamp DATETIME, PRIMARY KEY(id))"
)
except mariadb.OperationalError:
pass
db.commit() db.commit()
@@ -161,3 +175,8 @@ def link_prof(firstname, lastname, lid):
if __name__ == "__main__": if __name__ == "__main__":
get_dirstruct() get_dirstruct()
info("Database updated")
schedule.every().day.at("04:00", "Europe/Vienna").do(get_dirstruct)
while True:
schedule.run_pending()
time.sleep(1)

View File

@@ -3,7 +3,7 @@ from typing import List, Dict, Tuple, Sequence
from starlette.responses import StreamingResponse from starlette.responses import StreamingResponse
from annotated_types import IsDigit from annotated_types import IsDigit
from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form from fastapi import FastAPI, File, HTTPException, Path, UploadFile, Request, Form
from fastapi.responses import FileResponse from fastapi.responses import FileResponse
# import multiprocessing # import multiprocessing
@@ -25,17 +25,16 @@ import mariadb
import filetype import filetype
import datetime
import logging import logging
import inspect
import pathlib import pathlib
from pathlib import Path
from starlette.types import HTTPExceptionHandler from starlette.types import HTTPExceptionHandler
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
logging.basicConfig( logging.basicConfig(
filename="app.log", filename=os.environ.get("APP_LOG_PATH"),
level=logging.INFO, level=logging.INFO,
format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s() ]%(levelname)s: %(message)s", format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s() ]%(levelname)s: %(message)s",
) )
@@ -44,7 +43,10 @@ info = log.info
error = log.error error = log.error
db = mariadb.connect( db = mariadb.connect(
host="localhost", user="wildserver", password="DBPassword", database="Unizeug" host=os.environ.get("DB_HOST", "db"),
user=os.environ.get("DB_USER", "user"),
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
database=os.environ.get("DB_DATABASE", "unizeug"),
) )
info("App Started") info("App Started")
@@ -53,8 +55,6 @@ info("App Started")
# startup() # startup()
app = FastAPI() app = FastAPI()
app.mount("/favicon", StaticFiles(directory="./favicon"), name="favicon")
app.mount("/static", StaticFiles(directory="./static"), name="static")
CATEGORIES = [ CATEGORIES = [
@@ -66,15 +66,18 @@ CATEGORIES = [
"Zusammenfassungen", "Zusammenfassungen",
"Multimedia", "Multimedia",
] ]
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"] SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
SUBCAT_CATEGORIES_I = [1, 2, 3] SUBCAT_CATEGORIES_I = [1, 2, 3]
EX_DATE_CATEGORIES = ["Prüfungen", "Klausuren"] EX_DATE_CATEGORIES = ["Prüfungen", "Klausuren"]
EX_DATE_CATEGORIES_I = [0, 1] EX_DATE_CATEGORIES_I = [0, 1]
UNIZEUG_PATH = "./app/dest/" UNIZEUG_PATH = Path(os.environ.get("UNIZEUG_PATH", "./app/dest"))
FILES_IN_PROGRESS = "./app/files/" FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
EMPTYFILE = "./app/graphics/empty.pdf" EMPTYFILE = APP_ROOT_PATH / "graphics/empty.pdf"
UNSUPPORTEDFILE = "./app/graphics/unsupported.pdf" UNSUPPORTEDFILE = APP_ROOT_PATH / "graphics/unsupported.pdf"
GREETINGFILE = "./app/graphics/greeting.pdf" GREETINGFILE = APP_ROOT_PATH / "graphics/greeting.pdf"
FAVICON = APP_ROOT_PATH / "favicon"
STATIC_FILES = APP_ROOT_PATH / "static"
# cur = db.cursor() # cur = db.cursor()
@@ -148,10 +151,22 @@ def sqlT(
# ) # )
app.mount(
"/favicon",
StaticFiles(directory=os.environ.get("FAVICON_PATH", FAVICON)),
name="favicon",
)
app.mount(
"/static",
StaticFiles(directory=os.environ.get("STATIC_PATH", STATIC_FILES)),
name="static",
)
@app.get("/") @app.get("/")
async def get_index(): async def get_index():
"""gives the Index.html file""" """gives the Index.html file"""
return FileResponse("./index.html") return FileResponse(APP_ROOT_PATH / "index.html")
@app.get("/files/{file_id}") @app.get("/files/{file_id}")
@@ -224,6 +239,9 @@ async def search_lva(
) )
# res += cur.fetchall() # res += cur.fetchall()
res = remove_duplicates(res + zw) res = remove_duplicates(res + zw)
info(
f"LVA Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0: if searchlim == 0:
return res return res
else: else:
@@ -258,6 +276,9 @@ async def search_profs(
) )
# res += cur.fetchall() # res += cur.fetchall()
res = remove_duplicates(res + zw) res = remove_duplicates(res + zw)
info(
f"Prof Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0: if searchlim == 0:
return res return res
else: else:
@@ -298,6 +319,9 @@ async def search_subcats(
) )
# res += cur.fetchall() # res += cur.fetchall()
res = remove_duplicates(res + rest) res = remove_duplicates(res + rest)
info(
f"Subcatrgory Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0: if searchlim == 0:
return res return res
else: else:
@@ -354,7 +378,7 @@ async def create_upload_file(files: List[UploadFile], c2pdf: bool = True):
content = doc.tobytes() content = doc.tobytes()
if ft != "dir": if ft != "dir":
filename = make_filename_unique(filename) filename = make_filename_unique(filename)
locpath = FILES_IN_PROGRESS + filename locpath = FILES_IN_PROGRESS / filename
# locpaths.append(locpath) # locpaths.append(locpath)
# cur = db.cursor() # cur = db.cursor()
# try: # try:
@@ -438,7 +462,7 @@ async def get_submission(
error(f"User tried to upload a file without specifying the {th[1]}") error(f"User tried to upload a file without specifying the {th[1]}")
raise HTTPException(400, f"You need to specify a {th[1]}") raise HTTPException(400, f"You need to specify a {th[1]}")
filepath = "./app/files/" + res[0][0] filepath = FILES_IN_PROGRESS / res[0][0]
# except mariadb.Error as e: # except mariadb.Error as e:
# print(f"Mariadb Error: {e}") # print(f"Mariadb Error: {e}")
# raise HTTPException( # raise HTTPException(
@@ -665,7 +689,7 @@ def make_savepath(
ex_date: str, ex_date: str,
fname: str, fname: str,
ftype: str, ftype: str,
) -> str: ) -> os.PathLike:
"""Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories.""" """Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories."""
lv = get_lvpath(lva) lv = get_lvpath(lva)
lvpath = lv[1] + "/" lvpath = lv[1] + "/"
@@ -677,9 +701,9 @@ def make_savepath(
sc = get_subcatpath(subcat, int(cat), pf[0], lv[0]) sc = get_subcatpath(subcat, int(cat), pf[0], lv[0])
scpath = sc[1] + "/" scpath = sc[1] + "/"
if int(cat) == 6: if int(cat) == 6:
savepath = UNIZEUG_PATH + lv[1] + "_Multimedia_only/" + pfpath savepath = UNIZEUG_PATH / (lv[1] + "_Multimedia_only/") / pfpath
else: else:
savepath = UNIZEUG_PATH + lvpath + pfpath + catpath + scpath savepath = UNIZEUG_PATH / lvpath / pfpath / catpath / scpath
os.makedirs(savepath, exist_ok=True) os.makedirs(savepath, exist_ok=True)
filename = sem + "_" filename = sem + "_"
if int(cat) in EX_DATE_CATEGORIES_I: if int(cat) in EX_DATE_CATEGORIES_I:
@@ -697,14 +721,14 @@ def make_savepath(
filename += fname filename += fname
file = filename + "." + ftype file = filename + "." + ftype
destpath = pathlib.Path(savepath + file) destpath = savepath / file
i = 0 i = 0
while destpath.is_file(): while destpath.is_file():
file = filename + f"_{i}." + ftype file = filename + f"_{i}." + ftype
i += 1 i += 1
destpath = pathlib.Path(savepath + file) destpath = savepath / file
destpath.touch() destpath.touch()
return savepath + file return savepath / file
def get_lvpath(lva: str) -> Tuple[int, str]: def get_lvpath(lva: str) -> Tuple[int, str]:
@@ -897,10 +921,10 @@ async def save_files_to_folder(files: List[UploadFile]) -> str:
if filename == "": if filename == "":
filename = "None" filename = "None"
filename = make_filename_unique(filename) filename = make_filename_unique(filename)
os.mkdir(FILES_IN_PROGRESS + filename) os.mkdir(FILES_IN_PROGRESS / filename)
for idx, file in enumerate(files): for idx, file in enumerate(files):
fn = file.filename if file.filename is not None else "None" + str(idx) fn = file.filename if file.filename is not None else "None" + str(idx)
with open(FILES_IN_PROGRESS + filename + "/" + fn, "wb") as f: with open(FILES_IN_PROGRESS / filename / fn, "wb") as f:
f.write(await file.read()) f.write(await file.read())
return filename return filename
@@ -928,13 +952,13 @@ async def remove_old_FIP_entrys():
info(f"Remove Files: {files}") info(f"Remove Files: {files}")
for file in files: for file in files:
sql("DELETE FROM FIP WHERE id=?", (file["id"]), return_result=False) sql("DELETE FROM FIP WHERE id=?", (file["id"]), return_result=False)
os.remove(FILES_IN_PROGRESS + file["filename"]) os.remove(FILES_IN_PROGRESS / file["filename"])
# sql( # sql(
# "DELETE FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24", # "DELETE FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24",
# return_result=False, # return_result=False,
# ) # )
db.commit() db.commit()
return FileResponse("./index.html") return FileResponse(APP_ROOT_PATH / "/index.html")
def delete_from_FIP(uuid: str): def delete_from_FIP(uuid: str):

View File

@@ -285,7 +285,7 @@ function submitPdf(eve) {
async function submitForm(formData) { async function submitForm(formData) {
try { try {
const updateEventSource = new EventSource( const updateEventSource = new EventSource(
"http://127.0.0.1:8000/get_censor_status/" + doc.fID, window.location + "get_censor_status/" + doc.fID,
); );
modal.style.display = "flex"; modal.style.display = "flex";
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID); // console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
@@ -295,7 +295,7 @@ async function submitForm(formData) {
upload_status.innerText = upload_status.innerText =
"Censoring Page " + data.page + "/" + data.pages; "Censoring Page " + data.page + "/" + data.pages;
}); });
const response = await fetch("http://127.0.0.1:8000/submit", { const response = await fetch(window.location + "submit", {
method: "POST", method: "POST",
body: formData, body: formData,
}); });
@@ -338,7 +338,7 @@ function uploadPdf(eve) {
} }
async function uploadFile(formData) { async function uploadFile(formData) {
try { try {
const response = await fetch("http://127.0.0.1:8000/uploadfile", { const response = await fetch(window.location + "uploadfile", {
method: "POST", method: "POST",
body: formData, body: formData,
}); });

View File

@@ -1,4 +1,4 @@
var url = "http://127.0.0.1:8000/search/"; var url = window.location + "search/";
var lid = null; var lid = null;
var pid = null; var pid = null;
var activeAutocompletion = null; var activeAutocompletion = null;

67
compose.yml Normal file
View File

@@ -0,0 +1,67 @@
version: "3"
services:
app:
container_name: python-app
# command: python -m uvicorn app.main:app --host 0.0.0.0 --port 80
build:
context: .
dockerfile: Dockerfile
volumes:
- ./app:/python
- ./unizeug:/unizeug:source
ports:
- 80:80
restart: unless-stopped
environment:
ENTRY_COMMAND: python -m uvicorn main:app --host 0.0.0.0 --port 80
APP_LOG_PATH: /python/app.log
APP_ROOT_PATH: /python
UNIZEUG_PATH: /unizeug
DB_HOST: db
DB_USER: app
DB_PASSWORD: DBPassword
DB_DATABASE: Unizeug
TZ: "Europe/Vienna"
depends_on:
- db
- scaner
db:
container_name: db
image: mariadb
restart: unless-stopped
environment:
MARIADB_ROOT_PASSWORD: DBPassword
MARIADB_USER: app
UNIZEUG_PATH: /unizeug
MARIADB_PASSWORD: DBPassword
MARIADB_DATABASE: Unizeug
TZ: "Europe/Vienna"
healthcheck:
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 3
volumes:
- ./mariadb:/var/lib/mysql
scaner:
container_name: python-scaner
# command: python /python/init.py
build:
context: .
dockerfile: Dockerfile
volumes:
- ./app:/python
- ./unizeug:/unizeug:source
restart: unless-stopped
environment:
ENTRY_COMMAND: python /python/init.py
UNIZEUG_PATH: /unizeug
DB_HOST: db
DB_USER: app
DB_PASSWORD: DBPassword
DB_DATABASE: Unizeug
TZ: "Europe/Vienna"
depends_on:
- db

4
entrypoint.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/sh
if [[ -n "$ENTRY_COMMAND" ]]; then
/bin/sh -c "$ENTRY_COMMAND"
fi

View File

@@ -44,10 +44,12 @@ pypdf==5.2.0
pytesseract==0.3.13 pytesseract==0.3.13
python-dotenv==1.0.1 python-dotenv==1.0.1
python-multipart==0.0.20 python-multipart==0.0.20
pytz==2025.2
PyYAML==6.0.2 PyYAML==6.0.2
requests==2.32.3 requests==2.32.3
rich==13.9.4 rich==13.9.4
rich-toolkit==0.13.2 rich-toolkit==0.13.2
schedule==1.2.2
shellingham==1.5.4 shellingham==1.5.4
sniffio==1.3.1 sniffio==1.3.1
starlette==0.45.3 starlette==0.45.3