Compare commits

..

6 Commits

Author SHA1 Message Date
Marcel Gansfusz
e6727daf8e i forgor 2025-10-28 19:32:33 +01:00
Marcel Gansfusz
d6508c739d in between state before converting to pathlib 2025-10-28 19:32:01 +01:00
Marcel Gansfusz
856c401c06 moved DOCKERFILE to Dockerfile 2025-10-27 18:17:43 +01:00
Marcel Gansfusz
4da77c95d1 finished writeing compatibility with docker; untested 2025-10-24 21:19:36 +02:00
Marcel Gansfusz
98742107b2 changed structure for docker usage 2025-10-24 21:02:42 +02:00
b9eb5e8bd4 Merge pull request 'improve_censoring_speed' (#1) from improve_censoring_speed into main
Reviewed-on: #1
2025-10-23 15:43:40 +02:00
22 changed files with 180 additions and 33 deletions

2
.gitignore vendored
View File

@@ -5,3 +5,5 @@ app/dest
app.log
init.log
app/__pycache__/
mariadb/*
unizeug

29
Dockerfile Normal file
View File

@@ -0,0 +1,29 @@
FROM python:3.13-rc-alpine
WORKDIR /usr/src/
COPY requirements.txt /usr/src/requirements.txt
COPY entrypoint.sh /usr/src/entrypoint.sh
RUN apk add --no-cache \
gcc \
g++ \
musl-dev \
python3-dev \
libffi-dev \
openssl-dev \
cargo \
make \
mariadb-connector-c-dev \
jpeg-dev \
zlib-dev \
freetype-dev \
lcms2-dev \
openjpeg-dev \
tiff-dev \
tk-dev \
tcl-dev \
libwebp-dev
RUN python -m ensurepip --upgrade
RUN pip install setuptools wheel
RUN pip install -r requirements.txt
WORKDIR /python
CMD /bin/sh /usr/src/entrypoint.sh
# ENTRYPOINT ["/usr/src/entrypoint.sh"]

Binary file not shown.

View File

Before

Width:  |  Height:  |  Size: 7.4 KiB

After

Width:  |  Height:  |  Size: 7.4 KiB

View File

Before

Width:  |  Height:  |  Size: 4.2 KiB

After

Width:  |  Height:  |  Size: 4.2 KiB

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 78 KiB

View File

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@@ -2,12 +2,17 @@ from os.path import isdir
from stat import S_ISDIR, S_ISREG
import re
import pathlib
import os
# from base64 import decodebytes
import json
import mariadb
import logging
import schedule
import time
import pytz
CATEGORIES = [
"Prüfungen",
"Klausuren",
@@ -18,7 +23,7 @@ CATEGORIES = [
"Multimedia",
]
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
unizeug_path = "/home/wildarch/web/fet_unizeug/unizeug/"
unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug")
log = logging.getLogger(__name__)
logging.basicConfig(
@@ -31,7 +36,10 @@ info = log.info
error = log.error
db = mariadb.connect(
host="localhost", user="wildserver", password="DBPassword", database="Unizeug"
host=os.environ.get("DB_HOST", "db"),
user=os.environ.get("DB_USER", "user"),
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
database=os.environ.get("DB_DATABASE", "unizeug"),
)
c = db.cursor()
try:
@@ -62,12 +70,18 @@ except mariadb.OperationalError:
c.execute(
"CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))"
)
try:
c.execute(
"CREATE TABLE FIP(id UUID DEFAULT(UUID()), filename VARCHAR(256), filetype VARCHAR(8),initTimeStamp DATETIME, PRIMARY KEY(id))"
)
except mariadb.OperationalError:
pass
db.commit()
def get_dirstruct():
# with open("app/pwfile.json", "r") as f:
# cred = json.load(f)
# cred = json.load(f)
# ssh = paramiko.SSHClient()
# print(cred["sftpurl"])
# ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
@@ -76,7 +90,7 @@ def get_dirstruct():
# ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"])
# sftp = ssh.open_sftp()
# folders = sftp.listdir_attr(unizeug_path)
folders=pathlib.Path(unizeug_path)
folders = pathlib.Path(unizeug_path)
for entry in folders.iterdir():
if entry is None:
continue
@@ -161,3 +175,8 @@ def link_prof(firstname, lastname, lid):
if __name__ == "__main__":
get_dirstruct()
info("Database updated")
schedule.every().day.at("04:00", "Europe/Vienna").do(get_dirstruct)
while True:
schedule.run_pending()
time.sleep(1)

View File

@@ -3,7 +3,7 @@ from typing import List, Dict, Tuple, Sequence
from starlette.responses import StreamingResponse
from annotated_types import IsDigit
from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form
from fastapi import FastAPI, File, HTTPException, Path, UploadFile, Request, Form
from fastapi.responses import FileResponse
# import multiprocessing
@@ -25,17 +25,16 @@ import mariadb
import filetype
import datetime
import logging
import inspect
import pathlib
from pathlib import Path
from starlette.types import HTTPExceptionHandler
log = logging.getLogger(__name__)
logging.basicConfig(
filename="app.log",
filename=os.environ.get("APP_LOG_PATH"),
level=logging.INFO,
format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s() ]%(levelname)s: %(message)s",
)
@@ -44,7 +43,10 @@ info = log.info
error = log.error
db = mariadb.connect(
host="localhost", user="wildserver", password="DBPassword", database="Unizeug"
host=os.environ.get("DB_HOST", "db"),
user=os.environ.get("DB_USER", "user"),
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
database=os.environ.get("DB_DATABASE", "unizeug"),
)
info("App Started")
@@ -53,8 +55,6 @@ info("App Started")
# startup()
app = FastAPI()
app.mount("/favicon", StaticFiles(directory="./favicon"), name="favicon")
app.mount("/static", StaticFiles(directory="./static"), name="static")
CATEGORIES = [
@@ -66,15 +66,18 @@ CATEGORIES = [
"Zusammenfassungen",
"Multimedia",
]
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
SUBCAT_CATEGORIES_I = [1, 2, 3]
EX_DATE_CATEGORIES = ["Prüfungen", "Klausuren"]
EX_DATE_CATEGORIES_I = [0, 1]
UNIZEUG_PATH = "./app/dest/"
FILES_IN_PROGRESS = "./app/files/"
EMPTYFILE = "./app/graphics/empty.pdf"
UNSUPPORTEDFILE = "./app/graphics/unsupported.pdf"
GREETINGFILE = "./app/graphics/greeting.pdf"
UNIZEUG_PATH = Path(os.environ.get("UNIZEUG_PATH", "./app/dest"))
FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
EMPTYFILE = APP_ROOT_PATH / "graphics/empty.pdf"
UNSUPPORTEDFILE = APP_ROOT_PATH / "graphics/unsupported.pdf"
GREETINGFILE = APP_ROOT_PATH / "graphics/greeting.pdf"
FAVICON = APP_ROOT_PATH / "favicon"
STATIC_FILES = APP_ROOT_PATH / "static"
# cur = db.cursor()
@@ -148,10 +151,22 @@ def sqlT(
# )
app.mount(
"/favicon",
StaticFiles(directory=os.environ.get("FAVICON_PATH", FAVICON)),
name="favicon",
)
app.mount(
"/static",
StaticFiles(directory=os.environ.get("STATIC_PATH", STATIC_FILES)),
name="static",
)
@app.get("/")
async def get_index():
"""gives the Index.html file"""
return FileResponse("./index.html")
return FileResponse(APP_ROOT_PATH / "index.html")
@app.get("/files/{file_id}")
@@ -224,6 +239,9 @@ async def search_lva(
)
# res += cur.fetchall()
res = remove_duplicates(res + zw)
info(
f"LVA Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0:
return res
else:
@@ -258,6 +276,9 @@ async def search_profs(
)
# res += cur.fetchall()
res = remove_duplicates(res + zw)
info(
f"Prof Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0:
return res
else:
@@ -298,6 +319,9 @@ async def search_subcats(
)
# res += cur.fetchall()
res = remove_duplicates(res + rest)
info(
f"Subcatrgory Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0:
return res
else:
@@ -354,7 +378,7 @@ async def create_upload_file(files: List[UploadFile], c2pdf: bool = True):
content = doc.tobytes()
if ft != "dir":
filename = make_filename_unique(filename)
locpath = FILES_IN_PROGRESS + filename
locpath = FILES_IN_PROGRESS / filename
# locpaths.append(locpath)
# cur = db.cursor()
# try:
@@ -438,7 +462,7 @@ async def get_submission(
error(f"User tried to upload a file without specifying the {th[1]}")
raise HTTPException(400, f"You need to specify a {th[1]}")
filepath = "./app/files/" + res[0][0]
filepath = FILES_IN_PROGRESS / res[0][0]
# except mariadb.Error as e:
# print(f"Mariadb Error: {e}")
# raise HTTPException(
@@ -665,7 +689,7 @@ def make_savepath(
ex_date: str,
fname: str,
ftype: str,
) -> str:
) -> os.PathLike:
"""Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories."""
lv = get_lvpath(lva)
lvpath = lv[1] + "/"
@@ -677,9 +701,9 @@ def make_savepath(
sc = get_subcatpath(subcat, int(cat), pf[0], lv[0])
scpath = sc[1] + "/"
if int(cat) == 6:
savepath = UNIZEUG_PATH + lv[1] + "_Multimedia_only/" + pfpath
savepath = UNIZEUG_PATH / (lv[1] + "_Multimedia_only/") / pfpath
else:
savepath = UNIZEUG_PATH + lvpath + pfpath + catpath + scpath
savepath = UNIZEUG_PATH / lvpath / pfpath / catpath / scpath
os.makedirs(savepath, exist_ok=True)
filename = sem + "_"
if int(cat) in EX_DATE_CATEGORIES_I:
@@ -697,14 +721,14 @@ def make_savepath(
filename += fname
file = filename + "." + ftype
destpath = pathlib.Path(savepath + file)
destpath = savepath / file
i = 0
while destpath.is_file():
file = filename + f"_{i}." + ftype
i += 1
destpath = pathlib.Path(savepath + file)
destpath = savepath / file
destpath.touch()
return savepath + file
return savepath / file
def get_lvpath(lva: str) -> Tuple[int, str]:
@@ -897,10 +921,10 @@ async def save_files_to_folder(files: List[UploadFile]) -> str:
if filename == "":
filename = "None"
filename = make_filename_unique(filename)
os.mkdir(FILES_IN_PROGRESS + filename)
os.mkdir(FILES_IN_PROGRESS / filename)
for idx, file in enumerate(files):
fn = file.filename if file.filename is not None else "None" + str(idx)
with open(FILES_IN_PROGRESS + filename + "/" + fn, "wb") as f:
with open(FILES_IN_PROGRESS / filename / fn, "wb") as f:
f.write(await file.read())
return filename
@@ -928,13 +952,13 @@ async def remove_old_FIP_entrys():
info(f"Remove Files: {files}")
for file in files:
sql("DELETE FROM FIP WHERE id=?", (file["id"]), return_result=False)
os.remove(FILES_IN_PROGRESS + file["filename"])
os.remove(FILES_IN_PROGRESS / file["filename"])
# sql(
# "DELETE FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24",
# return_result=False,
# )
db.commit()
return FileResponse("./index.html")
return FileResponse(APP_ROOT_PATH / "/index.html")
def delete_from_FIP(uuid: str):

View File

@@ -285,7 +285,7 @@ function submitPdf(eve) {
async function submitForm(formData) {
try {
const updateEventSource = new EventSource(
"http://127.0.0.1:8000/get_censor_status/" + doc.fID,
window.location + "get_censor_status/" + doc.fID,
);
modal.style.display = "flex";
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
@@ -295,7 +295,7 @@ async function submitForm(formData) {
upload_status.innerText =
"Censoring Page " + data.page + "/" + data.pages;
});
const response = await fetch("http://127.0.0.1:8000/submit", {
const response = await fetch(window.location + "submit", {
method: "POST",
body: formData,
});
@@ -338,7 +338,7 @@ function uploadPdf(eve) {
}
async function uploadFile(formData) {
try {
const response = await fetch("http://127.0.0.1:8000/uploadfile", {
const response = await fetch(window.location + "uploadfile", {
method: "POST",
body: formData,
});

View File

@@ -1,4 +1,4 @@
var url = "http://127.0.0.1:8000/search/";
var url = window.location + "search/";
var lid = null;
var pid = null;
var activeAutocompletion = null;

67
compose.yml Normal file
View File

@@ -0,0 +1,67 @@
version: "3"
services:
app:
container_name: python-app
# command: python -m uvicorn app.main:app --host 0.0.0.0 --port 80
build:
context: .
dockerfile: Dockerfile
volumes:
- ./app:/python
- ./unizeug:/unizeug:source
ports:
- 80:80
restart: unless-stopped
environment:
ENTRY_COMMAND: python -m uvicorn main:app --host 0.0.0.0 --port 80
APP_LOG_PATH: /python/app.log
APP_ROOT_PATH: /python
UNIZEUG_PATH: /unizeug
DB_HOST: db
DB_USER: app
DB_PASSWORD: DBPassword
DB_DATABASE: Unizeug
TZ: "Europe/Vienna"
depends_on:
- db
- scaner
db:
container_name: db
image: mariadb
restart: unless-stopped
environment:
MARIADB_ROOT_PASSWORD: DBPassword
MARIADB_USER: app
UNIZEUG_PATH: /unizeug
MARIADB_PASSWORD: DBPassword
MARIADB_DATABASE: Unizeug
TZ: "Europe/Vienna"
healthcheck:
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 3
volumes:
- ./mariadb:/var/lib/mysql
scaner:
container_name: python-scaner
# command: python /python/init.py
build:
context: .
dockerfile: Dockerfile
volumes:
- ./app:/python
- ./unizeug:/unizeug:source
restart: unless-stopped
environment:
ENTRY_COMMAND: python /python/init.py
UNIZEUG_PATH: /unizeug
DB_HOST: db
DB_USER: app
DB_PASSWORD: DBPassword
DB_DATABASE: Unizeug
TZ: "Europe/Vienna"
depends_on:
- db

4
entrypoint.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/sh
if [[ -n "$ENTRY_COMMAND" ]]; then
/bin/sh -c "$ENTRY_COMMAND"
fi

View File

@@ -44,10 +44,12 @@ pypdf==5.2.0
pytesseract==0.3.13
python-dotenv==1.0.1
python-multipart==0.0.20
pytz==2025.2
PyYAML==6.0.2
requests==2.32.3
rich==13.9.4
rich-toolkit==0.13.2
schedule==1.2.2
shellingham==1.5.4
sniffio==1.3.1
starlette==0.45.3