in between state before converting to pathlib

This commit is contained in:
Marcel Gansfusz
2025-10-28 19:32:01 +01:00
parent 856c401c06
commit d6508c739d
9 changed files with 104 additions and 51 deletions

1
.gitignore vendored
View File

@@ -5,3 +5,4 @@ app/dest
app.log app.log
init.log init.log
app/__pycache__/ app/__pycache__/
mariadb/*

View File

@@ -1,4 +1,29 @@
FROM python:3 FROM python:3.13-rc-alpine
WORKDIR /usr/src/ WORKDIR /usr/src/
COPY requirements.txt /usr/src/requirements.txt COPY requirements.txt /usr/src/requirements.txt
COPY entrypoint.sh /usr/src/entrypoint.sh
RUN apk add --no-cache \
gcc \
g++ \
musl-dev \
python3-dev \
libffi-dev \
openssl-dev \
cargo \
make \
mariadb-connector-c-dev \
jpeg-dev \
zlib-dev \
freetype-dev \
lcms2-dev \
openjpeg-dev \
tiff-dev \
tk-dev \
tcl-dev \
libwebp-dev
RUN python -m ensurepip --upgrade
RUN pip install setuptools wheel
RUN pip install -r requirements.txt RUN pip install -r requirements.txt
WORKDIR /python
CMD /bin/sh /usr/src/entrypoint.sh
# ENTRYPOINT ["/usr/src/entrypoint.sh"]

Binary file not shown.

View File

@@ -10,7 +10,8 @@ import mariadb
import logging import logging
import schedule import schedule
import time import time
import pytz
CATEGORIES = [ CATEGORIES = [
"Prüfungen", "Prüfungen",
@@ -22,7 +23,7 @@ CATEGORIES = [
"Multimedia", "Multimedia",
] ]
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"] SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
unizeug_path = os.environ.get("UNIZEUG_PATH","./unizeug") unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug")
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
logging.basicConfig( logging.basicConfig(
@@ -39,7 +40,6 @@ db = mariadb.connect(
user=os.environ.get("DB_USER", "user"), user=os.environ.get("DB_USER", "user"),
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"), password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
database=os.environ.get("DB_DATABASE", "unizeug"), database=os.environ.get("DB_DATABASE", "unizeug"),
) )
c = db.cursor() c = db.cursor()
try: try:
@@ -71,7 +71,9 @@ c.execute(
"CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))" "CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))"
) )
try: try:
c.execute("CREATE TABLE FIP(id UUID DEFAULT(UUID()), filename VARCHAR(256), filetype VARCHAR(8),initTimeStamp DATETIME, PRIMARY KEY(id))") c.execute(
"CREATE TABLE FIP(id UUID DEFAULT(UUID()), filename VARCHAR(256), filetype VARCHAR(8),initTimeStamp DATETIME, PRIMARY KEY(id))"
)
except mariadb.OperationalError: except mariadb.OperationalError:
pass pass
db.commit() db.commit()
@@ -79,7 +81,7 @@ db.commit()
def get_dirstruct(): def get_dirstruct():
# with open("app/pwfile.json", "r") as f: # with open("app/pwfile.json", "r") as f:
# cred = json.load(f) # cred = json.load(f)
# ssh = paramiko.SSHClient() # ssh = paramiko.SSHClient()
# print(cred["sftpurl"]) # print(cred["sftpurl"])
# ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
@@ -88,7 +90,7 @@ def get_dirstruct():
# ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"]) # ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"])
# sftp = ssh.open_sftp() # sftp = ssh.open_sftp()
# folders = sftp.listdir_attr(unizeug_path) # folders = sftp.listdir_attr(unizeug_path)
folders=pathlib.Path(unizeug_path) folders = pathlib.Path(unizeug_path)
for entry in folders.iterdir(): for entry in folders.iterdir():
if entry is None: if entry is None:
continue continue
@@ -173,7 +175,8 @@ def link_prof(firstname, lastname, lid):
if __name__ == "__main__": if __name__ == "__main__":
get_dirstruct() get_dirstruct()
schedule.every.day.at("04:00","Europe/Vienna").do(get_dirstruct) info("Database updated")
schedule.every().day.at("04:00", "Europe/Vienna").do(get_dirstruct)
while True: while True:
schedule.run_pending() schedule.run_pending()
time.sleep(1) time.sleep(1)

View File

@@ -3,7 +3,7 @@ from typing import List, Dict, Tuple, Sequence
from starlette.responses import StreamingResponse from starlette.responses import StreamingResponse
from annotated_types import IsDigit from annotated_types import IsDigit
from fastapi import FastAPI, File, HTTPException, UploadFile, Request, Form from fastapi import FastAPI, File, HTTPException, Path, UploadFile, Request, Form
from fastapi.responses import FileResponse from fastapi.responses import FileResponse
# import multiprocessing # import multiprocessing
@@ -28,6 +28,7 @@ import filetype
import logging import logging
import pathlib import pathlib
from pathlib import Path
from starlette.types import HTTPExceptionHandler from starlette.types import HTTPExceptionHandler
@@ -54,16 +55,6 @@ info("App Started")
# startup() # startup()
app = FastAPI() app = FastAPI()
app.mount(
"/favicon",
StaticFiles(directory=os.environ.get("FAVICON_PATH", ".app/favicon")),
name="favicon",
)
app.mount(
"/static",
StaticFiles(directory=os.environ.get("STATIC_PATH", "./static")),
name="static",
)
CATEGORIES = [ CATEGORIES = [
@@ -75,16 +66,18 @@ CATEGORIES = [
"Zusammenfassungen", "Zusammenfassungen",
"Multimedia", "Multimedia",
] ]
APP_ROOT_PATH = os.environ.get("APP_ROOT_PATH", "./app") APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"] SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
SUBCAT_CATEGORIES_I = [1, 2, 3] SUBCAT_CATEGORIES_I = [1, 2, 3]
EX_DATE_CATEGORIES = ["Prüfungen", "Klausuren"] EX_DATE_CATEGORIES = ["Prüfungen", "Klausuren"]
EX_DATE_CATEGORIES_I = [0, 1] EX_DATE_CATEGORIES_I = [0, 1]
UNIZEUG_PATH = os.environ.get("UNIZEUG_PATH", "./app/dest") UNIZEUG_PATH = Path(os.environ.get("UNIZEUG_PATH", "./app/dest"))
FILES_IN_PROGRESS = f"{APP_ROOT_PATH}/files/" FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
EMPTYFILE = f"{APP_ROOT_PATH}/graphics/empty.pdf" EMPTYFILE = APP_ROOT_PATH / "graphics/empty.pdf"
UNSUPPORTEDFILE = f"{APP_ROOT_PATH}/graphics/unsupported.pdf" UNSUPPORTEDFILE = APP_ROOT_PATH / "graphics/unsupported.pdf"
GREETINGFILE = f"{APP_ROOT_PATH}/graphics/greeting.pdf" GREETINGFILE = APP_ROOT_PATH / "graphics/greeting.pdf"
FAVICON = APP_ROOT_PATH / "favicon"
STATIC_FILES = APP_ROOT_PATH / "static"
# cur = db.cursor() # cur = db.cursor()
@@ -158,10 +151,22 @@ def sqlT(
# ) # )
app.mount(
"/favicon",
StaticFiles(directory=os.environ.get("FAVICON_PATH", FAVICON)),
name="favicon",
)
app.mount(
"/static",
StaticFiles(directory=os.environ.get("STATIC_PATH", STATIC_FILES)),
name="static",
)
@app.get("/") @app.get("/")
async def get_index(): async def get_index():
"""gives the Index.html file""" """gives the Index.html file"""
return FileResponse(f"{APP_ROOT_PATH}/index.html") return FileResponse(APP_ROOT_PATH / "index.html")
@app.get("/files/{file_id}") @app.get("/files/{file_id}")
@@ -234,6 +239,9 @@ async def search_lva(
) )
# res += cur.fetchall() # res += cur.fetchall()
res = remove_duplicates(res + zw) res = remove_duplicates(res + zw)
info(
f"LVA Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0: if searchlim == 0:
return res return res
else: else:
@@ -268,6 +276,9 @@ async def search_profs(
) )
# res += cur.fetchall() # res += cur.fetchall()
res = remove_duplicates(res + zw) res = remove_duplicates(res + zw)
info(
f"Prof Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0: if searchlim == 0:
return res return res
else: else:
@@ -308,6 +319,9 @@ async def search_subcats(
) )
# res += cur.fetchall() # res += cur.fetchall()
res = remove_duplicates(res + rest) res = remove_duplicates(res + rest)
info(
f"Subcatrgory Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
)
if searchlim == 0: if searchlim == 0:
return res return res
else: else:
@@ -364,7 +378,7 @@ async def create_upload_file(files: List[UploadFile], c2pdf: bool = True):
content = doc.tobytes() content = doc.tobytes()
if ft != "dir": if ft != "dir":
filename = make_filename_unique(filename) filename = make_filename_unique(filename)
locpath = FILES_IN_PROGRESS + filename locpath = FILES_IN_PROGRESS / filename
# locpaths.append(locpath) # locpaths.append(locpath)
# cur = db.cursor() # cur = db.cursor()
# try: # try:
@@ -448,7 +462,7 @@ async def get_submission(
error(f"User tried to upload a file without specifying the {th[1]}") error(f"User tried to upload a file without specifying the {th[1]}")
raise HTTPException(400, f"You need to specify a {th[1]}") raise HTTPException(400, f"You need to specify a {th[1]}")
filepath = "./app/files/" + res[0][0] filepath = FILES_IN_PROGRESS / res[0][0]
# except mariadb.Error as e: # except mariadb.Error as e:
# print(f"Mariadb Error: {e}") # print(f"Mariadb Error: {e}")
# raise HTTPException( # raise HTTPException(
@@ -675,7 +689,7 @@ def make_savepath(
ex_date: str, ex_date: str,
fname: str, fname: str,
ftype: str, ftype: str,
) -> str: ) -> os.PathLike:
"""Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories.""" """Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories."""
lv = get_lvpath(lva) lv = get_lvpath(lva)
lvpath = lv[1] + "/" lvpath = lv[1] + "/"
@@ -687,9 +701,9 @@ def make_savepath(
sc = get_subcatpath(subcat, int(cat), pf[0], lv[0]) sc = get_subcatpath(subcat, int(cat), pf[0], lv[0])
scpath = sc[1] + "/" scpath = sc[1] + "/"
if int(cat) == 6: if int(cat) == 6:
savepath = UNIZEUG_PATH + lv[1] + "_Multimedia_only/" + pfpath savepath = UNIZEUG_PATH / (lv[1] + "_Multimedia_only/") / pfpath
else: else:
savepath = UNIZEUG_PATH + lvpath + pfpath + catpath + scpath savepath = UNIZEUG_PATH / lvpath / pfpath / catpath / scpath
os.makedirs(savepath, exist_ok=True) os.makedirs(savepath, exist_ok=True)
filename = sem + "_" filename = sem + "_"
if int(cat) in EX_DATE_CATEGORIES_I: if int(cat) in EX_DATE_CATEGORIES_I:
@@ -707,14 +721,14 @@ def make_savepath(
filename += fname filename += fname
file = filename + "." + ftype file = filename + "." + ftype
destpath = pathlib.Path(savepath + file) destpath = savepath / file
i = 0 i = 0
while destpath.is_file(): while destpath.is_file():
file = filename + f"_{i}." + ftype file = filename + f"_{i}." + ftype
i += 1 i += 1
destpath = pathlib.Path(savepath + file) destpath = savepath / file
destpath.touch() destpath.touch()
return savepath + file return savepath / file
def get_lvpath(lva: str) -> Tuple[int, str]: def get_lvpath(lva: str) -> Tuple[int, str]:
@@ -907,10 +921,10 @@ async def save_files_to_folder(files: List[UploadFile]) -> str:
if filename == "": if filename == "":
filename = "None" filename = "None"
filename = make_filename_unique(filename) filename = make_filename_unique(filename)
os.mkdir(FILES_IN_PROGRESS + filename) os.mkdir(FILES_IN_PROGRESS / filename)
for idx, file in enumerate(files): for idx, file in enumerate(files):
fn = file.filename if file.filename is not None else "None" + str(idx) fn = file.filename if file.filename is not None else "None" + str(idx)
with open(FILES_IN_PROGRESS + filename + "/" + fn, "wb") as f: with open(FILES_IN_PROGRESS / filename / fn, "wb") as f:
f.write(await file.read()) f.write(await file.read())
return filename return filename
@@ -938,13 +952,13 @@ async def remove_old_FIP_entrys():
info(f"Remove Files: {files}") info(f"Remove Files: {files}")
for file in files: for file in files:
sql("DELETE FROM FIP WHERE id=?", (file["id"]), return_result=False) sql("DELETE FROM FIP WHERE id=?", (file["id"]), return_result=False)
os.remove(FILES_IN_PROGRESS + file["filename"]) os.remove(FILES_IN_PROGRESS / file["filename"])
# sql( # sql(
# "DELETE FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24", # "DELETE FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24",
# return_result=False, # return_result=False,
# ) # )
db.commit() db.commit()
return FileResponse("./index.html") return FileResponse(APP_ROOT_PATH / "/index.html")
def delete_from_FIP(uuid: str): def delete_from_FIP(uuid: str):

View File

@@ -285,7 +285,7 @@ function submitPdf(eve) {
async function submitForm(formData) { async function submitForm(formData) {
try { try {
const updateEventSource = new EventSource( const updateEventSource = new EventSource(
"http://127.0.0.1:8000/get_censor_status/" + doc.fID, window.location + "get_censor_status/" + doc.fID,
); );
modal.style.display = "flex"; modal.style.display = "flex";
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID); // console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
@@ -295,7 +295,7 @@ async function submitForm(formData) {
upload_status.innerText = upload_status.innerText =
"Censoring Page " + data.page + "/" + data.pages; "Censoring Page " + data.page + "/" + data.pages;
}); });
const response = await fetch("http://127.0.0.1:8000/submit", { const response = await fetch(window.location + "submit", {
method: "POST", method: "POST",
body: formData, body: formData,
}); });
@@ -338,7 +338,7 @@ function uploadPdf(eve) {
} }
async function uploadFile(formData) { async function uploadFile(formData) {
try { try {
const response = await fetch("http://127.0.0.1:8000/uploadfile", { const response = await fetch(window.location + "uploadfile", {
method: "POST", method: "POST",
body: formData, body: formData,
}); });

View File

@@ -1,4 +1,4 @@
var url = "http://127.0.0.1:8000/search/"; var url = window.location + "search/";
var lid = null; var lid = null;
var pid = null; var pid = null;
var activeAutocompletion = null; var activeAutocompletion = null;

View File

@@ -2,17 +2,18 @@ version: "3"
services: services:
app: app:
container_name: python-app container_name: python-app
command: python -m uvicorn app.main:app --host 0.0.0.0 --port 80 # command: python -m uvicorn app.main:app --host 0.0.0.0 --port 80
biuld: build:
context: . context: .
dockerfile: DOCKERFILE dockerfile: Dockerfile
volumes: volumes:
- ./app:/python - ./app:/python
- ./unizeug:/unizeug - ./unizeug:/unizeug:source
ports: ports:
- 80:80 - 80:80
restart: unless-stopped restart: unless-stopped
environment: environment:
ENTRY_COMMAND: python -m uvicorn main:app --host 0.0.0.0 --port 80
APP_LOG_PATH: /python/app.log APP_LOG_PATH: /python/app.log
APP_ROOT_PATH: /python APP_ROOT_PATH: /python
UNIZEUG_PATH: /unizeug UNIZEUG_PATH: /unizeug
@@ -30,24 +31,32 @@ services:
image: mariadb image: mariadb
restart: unless-stopped restart: unless-stopped
environment: environment:
MARAIDB_ROOT_PASSWORD: DBPassword MARIADB_ROOT_PASSWORD: DBPassword
MARIADB_USER: app MARIADB_USER: app
UNIZEUG_PATH: /unizeug
MARIADB_PASSWORD: DBPassword MARIADB_PASSWORD: DBPassword
MARIADB_DATABASE: Unizeug MARIADB_DATABASE: Unizeug
TZ: "Europe/Vienna" TZ: "Europe/Vienna"
healthcheck:
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 3
volumes: volumes:
- ./mariadb:/var/lib/mysql - ./mariadb:/var/lib/mysql
scaner: scaner:
container_name: python-scaner container_name: python-scaner
command: python /python/init.py # command: python /python/init.py
biuld: build:
context: . context: .
dockerfile: DOCKERFILE dockerfile: Dockerfile
volumes: volumes:
- ./app:/python - ./app:/python
- ./unizeug - ./unizeug:/unizeug:source
restart: unless-stopped restart: unless-stopped
environment: environment:
ENTRY_COMMAND: python /python/init.py
UNIZEUG_PATH: /unizeug UNIZEUG_PATH: /unizeug
DB_HOST: db DB_HOST: db
DB_USER: app DB_USER: app

View File

@@ -44,6 +44,7 @@ pypdf==5.2.0
pytesseract==0.3.13 pytesseract==0.3.13
python-dotenv==1.0.1 python-dotenv==1.0.1
python-multipart==0.0.20 python-multipart==0.0.20
pytz==2025.2
PyYAML==6.0.2 PyYAML==6.0.2
requests==2.32.3 requests==2.32.3
rich==13.9.4 rich==13.9.4