Compare commits
24 Commits
improve_ce
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d5c390350 | ||
|
|
e13d06d4a7 | ||
|
|
547411ba03 | ||
|
|
cdd26e0bc3 | ||
|
|
f7c73a0c5a | ||
|
|
8e74848397 | ||
|
|
8704aee82e | ||
|
|
594ac1fa00 | ||
|
|
2ee90cd0d7 | ||
|
|
d42bab5b19 | ||
|
|
c3a87ceee6 | ||
|
|
6f2d373292 | ||
|
|
a37206d6a4 | ||
|
|
6bd75bf93f | ||
|
|
5bc24a32d5 | ||
|
|
a9233926e5 | ||
|
|
90235d2788 | ||
|
|
da316a9351 | ||
|
|
e6727daf8e | ||
|
|
d6508c739d | ||
|
|
856c401c06 | ||
|
|
4da77c95d1 | ||
|
|
98742107b2 | ||
| b9eb5e8bd4 |
4
.gitignore
vendored
@@ -5,3 +5,7 @@ app/dest
|
|||||||
app.log
|
app.log
|
||||||
init.log
|
init.log
|
||||||
app/__pycache__/
|
app/__pycache__/
|
||||||
|
mariadb/*
|
||||||
|
unizeug
|
||||||
|
.mypy_cache
|
||||||
|
.nvim
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
# remote_path="/srv/http/"
|
|
||||||
# remote_path="dev@10.0.0.25:/var/www/html/"
|
|
||||||
31
Dockerfile
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
FROM python:3.13-rc-alpine
|
||||||
|
WORKDIR /usr/src/
|
||||||
|
COPY requirements.txt /usr/src/requirements.txt
|
||||||
|
COPY entrypoint.sh /usr/src/entrypoint.sh
|
||||||
|
RUN apk add --no-cache \
|
||||||
|
gcc \
|
||||||
|
g++ \
|
||||||
|
musl-dev \
|
||||||
|
python3-dev \
|
||||||
|
libffi-dev \
|
||||||
|
openssl-dev \
|
||||||
|
cargo \
|
||||||
|
make \
|
||||||
|
mariadb-connector-c-dev \
|
||||||
|
jpeg-dev \
|
||||||
|
zlib-dev \
|
||||||
|
freetype-dev \
|
||||||
|
lcms2-dev \
|
||||||
|
openjpeg-dev \
|
||||||
|
tiff-dev \
|
||||||
|
tk-dev \
|
||||||
|
tcl-dev \
|
||||||
|
libwebp-dev \
|
||||||
|
tesseract-ocr \
|
||||||
|
tesseract-ocr-data-deu
|
||||||
|
RUN python -m ensurepip --upgrade
|
||||||
|
RUN pip install setuptools wheel
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
WORKDIR /python
|
||||||
|
CMD /bin/sh /usr/src/entrypoint.sh
|
||||||
|
# ENTRYPOINT ["/usr/src/entrypoint.sh"]
|
||||||
|
Before Width: | Height: | Size: 7.4 KiB After Width: | Height: | Size: 7.4 KiB |
|
Before Width: | Height: | Size: 4.2 KiB After Width: | Height: | Size: 4.2 KiB |
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 78 KiB After Width: | Height: | Size: 78 KiB |
|
Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB |
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 89 KiB After Width: | Height: | Size: 103 KiB |
42
app/init.py
@@ -2,11 +2,16 @@ from os.path import isdir
|
|||||||
from stat import S_ISDIR, S_ISREG
|
from stat import S_ISDIR, S_ISREG
|
||||||
import re
|
import re
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import os
|
||||||
|
|
||||||
# from base64 import decodebytes
|
# from base64 import decodebytes
|
||||||
import json
|
import json
|
||||||
import mariadb
|
import mariadb
|
||||||
import logging
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
import schedule
|
||||||
|
import time
|
||||||
|
import pytz
|
||||||
|
|
||||||
CATEGORIES = [
|
CATEGORIES = [
|
||||||
"Prüfungen",
|
"Prüfungen",
|
||||||
@@ -18,8 +23,9 @@ CATEGORIES = [
|
|||||||
"Multimedia",
|
"Multimedia",
|
||||||
]
|
]
|
||||||
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
|
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
|
||||||
unizeug_path = "/home/wildarch/web/fet_unizeug/unizeug/"
|
unizeug_path = os.environ.get("UNIZEUG_PATH", "./unizeug")
|
||||||
|
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
|
||||||
|
FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename="init.log",
|
filename="init.log",
|
||||||
@@ -31,7 +37,10 @@ info = log.info
|
|||||||
error = log.error
|
error = log.error
|
||||||
|
|
||||||
db = mariadb.connect(
|
db = mariadb.connect(
|
||||||
host="localhost", user="wildserver", password="DBPassword", database="Unizeug"
|
host=os.environ.get("DB_HOST", "db"),
|
||||||
|
user=os.environ.get("DB_USER", "user"),
|
||||||
|
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
|
||||||
|
database=os.environ.get("DB_DATABASE", "unizeug"),
|
||||||
)
|
)
|
||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
try:
|
try:
|
||||||
@@ -62,9 +71,28 @@ except mariadb.OperationalError:
|
|||||||
c.execute(
|
c.execute(
|
||||||
"CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))"
|
"CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))"
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
c.execute(
|
||||||
|
"CREATE TABLE FIP(id UUID DEFAULT(UUID()), filename VARCHAR(256), filetype VARCHAR(8),initTimeStamp DATETIME, PRIMARY KEY(id))"
|
||||||
|
)
|
||||||
|
except mariadb.OperationalError:
|
||||||
|
pass
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def remove_old_FIP_entrys():
|
||||||
|
cur = db.cursor(dictionary=True)
|
||||||
|
cur.execute(
|
||||||
|
"SELECT id,filename FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24 "
|
||||||
|
)
|
||||||
|
files = cur.fetchall()
|
||||||
|
info(f"Remove Files: {files}")
|
||||||
|
for file in files:
|
||||||
|
c.execute("DELETE FROM FIP WHERE id=?", (file["id"],))
|
||||||
|
os.remove(FILES_IN_PROGRESS / file["filename"])
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
def get_dirstruct():
|
def get_dirstruct():
|
||||||
# with open("app/pwfile.json", "r") as f:
|
# with open("app/pwfile.json", "r") as f:
|
||||||
# cred = json.load(f)
|
# cred = json.load(f)
|
||||||
@@ -76,7 +104,7 @@ def get_dirstruct():
|
|||||||
# ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"])
|
# ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"])
|
||||||
# sftp = ssh.open_sftp()
|
# sftp = ssh.open_sftp()
|
||||||
# folders = sftp.listdir_attr(unizeug_path)
|
# folders = sftp.listdir_attr(unizeug_path)
|
||||||
folders=pathlib.Path(unizeug_path)
|
folders = pathlib.Path(unizeug_path)
|
||||||
for entry in folders.iterdir():
|
for entry in folders.iterdir():
|
||||||
if entry is None:
|
if entry is None:
|
||||||
continue
|
continue
|
||||||
@@ -135,6 +163,7 @@ def get_dirstruct():
|
|||||||
(lid, pid, idx, subcat.name),
|
(lid, pid, idx, subcat.name),
|
||||||
)
|
)
|
||||||
db.commit()
|
db.commit()
|
||||||
|
remove_old_FIP_entrys()
|
||||||
|
|
||||||
|
|
||||||
def link_prof(firstname, lastname, lid):
|
def link_prof(firstname, lastname, lid):
|
||||||
@@ -161,3 +190,8 @@ def link_prof(firstname, lastname, lid):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
get_dirstruct()
|
get_dirstruct()
|
||||||
|
info("Database updated")
|
||||||
|
schedule.every().day.at("04:00", "Europe/Vienna").do(get_dirstruct)
|
||||||
|
while True:
|
||||||
|
schedule.run_pending()
|
||||||
|
time.sleep(1)
|
||||||
|
|||||||
164
app/main.py
@@ -21,30 +21,42 @@ import re
|
|||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import signal
|
||||||
import mariadb
|
import mariadb
|
||||||
|
import sys
|
||||||
|
|
||||||
import filetype
|
import filetype
|
||||||
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import inspect
|
|
||||||
import pathlib
|
import pathlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from starlette.types import HTTPExceptionHandler
|
from starlette.types import HTTPExceptionHandler
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename="app.log",
|
filename=os.environ.get("APP_LOG_PATH"),
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s() ]%(levelname)s: %(message)s",
|
format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s()] %(levelname)s: %(message)s",
|
||||||
)
|
)
|
||||||
debug = log.debug
|
debug = log.debug
|
||||||
info = log.info
|
info = log.info
|
||||||
error = log.error
|
error = log.error
|
||||||
|
critical = log.critical
|
||||||
|
|
||||||
|
|
||||||
|
def exception_handler(etype, value, tb):
|
||||||
|
log.exception(f"Uncought Exception: {value}")
|
||||||
|
|
||||||
|
|
||||||
|
sys.excepthook = exception_handler
|
||||||
|
|
||||||
db = mariadb.connect(
|
db = mariadb.connect(
|
||||||
host="localhost", user="wildserver", password="DBPassword", database="Unizeug"
|
host=os.environ.get("DB_HOST", "db"),
|
||||||
|
user=os.environ.get("DB_USER", "user"),
|
||||||
|
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
|
||||||
|
database=os.environ.get("DB_DATABASE", "unizeug"),
|
||||||
)
|
)
|
||||||
|
|
||||||
info("App Started")
|
info("App Started")
|
||||||
@@ -53,8 +65,6 @@ info("App Started")
|
|||||||
|
|
||||||
# startup()
|
# startup()
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
app.mount("/favicon", StaticFiles(directory="./favicon"), name="favicon")
|
|
||||||
app.mount("/static", StaticFiles(directory="./static"), name="static")
|
|
||||||
|
|
||||||
|
|
||||||
CATEGORIES = [
|
CATEGORIES = [
|
||||||
@@ -66,15 +76,18 @@ CATEGORIES = [
|
|||||||
"Zusammenfassungen",
|
"Zusammenfassungen",
|
||||||
"Multimedia",
|
"Multimedia",
|
||||||
]
|
]
|
||||||
|
APP_ROOT_PATH = Path(os.environ.get("APP_ROOT_PATH", "./app"))
|
||||||
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
|
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
|
||||||
SUBCAT_CATEGORIES_I = [1, 2, 3]
|
SUBCAT_CATEGORIES_I = [1, 2, 3]
|
||||||
EX_DATE_CATEGORIES = ["Prüfungen", "Klausuren"]
|
EX_DATE_CATEGORIES = ["Prüfungen", "Klausuren"]
|
||||||
EX_DATE_CATEGORIES_I = [0, 1]
|
EX_DATE_CATEGORIES_I = [0, 1]
|
||||||
UNIZEUG_PATH = "./app/dest/"
|
UNIZEUG_PATH = Path(os.environ.get("UNIZEUG_PATH", "./app/dest"))
|
||||||
FILES_IN_PROGRESS = "./app/files/"
|
FILES_IN_PROGRESS = APP_ROOT_PATH / "files/"
|
||||||
EMPTYFILE = "./app/graphics/empty.pdf"
|
EMPTYFILE = APP_ROOT_PATH / "graphics/empty.pdf"
|
||||||
UNSUPPORTEDFILE = "./app/graphics/unsupported.pdf"
|
UNSUPPORTEDFILE = APP_ROOT_PATH / "graphics/unsupported.pdf"
|
||||||
GREETINGFILE = "./app/graphics/greeting.pdf"
|
GREETINGFILE = APP_ROOT_PATH / "graphics/greeting.pdf"
|
||||||
|
FAVICON = APP_ROOT_PATH / "favicon"
|
||||||
|
STATIC_FILES = APP_ROOT_PATH / "static"
|
||||||
|
|
||||||
|
|
||||||
# cur = db.cursor()
|
# cur = db.cursor()
|
||||||
@@ -114,12 +127,40 @@ def _sql_quarry(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def sql_connector_is_active(connector: mariadb.Connection) -> bool:
|
||||||
|
try:
|
||||||
|
connector.ping()
|
||||||
|
except mariadb.Error as e:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def sql_connect(connector: mariadb.Connection) -> mariadb.Connection:
|
||||||
|
try:
|
||||||
|
connector = mariadb.connect(
|
||||||
|
host=os.environ.get("DB_HOST", "db"),
|
||||||
|
user=os.environ.get("DB_USER", "user"),
|
||||||
|
password=os.environ.get("DB_PASSWORD", "DBPASSWORD"),
|
||||||
|
database=os.environ.get("DB_DATABASE", "Unizeug"),
|
||||||
|
)
|
||||||
|
except mariadb.Error as e:
|
||||||
|
critical(
|
||||||
|
f"Cannot reconnect to Database {os.environ.get('DB_DATABASE', 'Unizeug')} on {os.environ.get('DB_HOST', 'db')}. Got Mariadb Error: {e}"
|
||||||
|
)
|
||||||
|
os.kill(os.getpid(), signal.SIGTERM)
|
||||||
|
raise HTTPException(500, detail="Database failed")
|
||||||
|
return connector
|
||||||
|
|
||||||
|
|
||||||
def sql(
|
def sql(
|
||||||
querry: str,
|
querry: str,
|
||||||
data: Tuple[str | int, ...] | str | int = (),
|
data: Tuple[str | int, ...] | str | int = (),
|
||||||
return_result: bool = True,
|
return_result: bool = True,
|
||||||
commit: bool = False,
|
commit: bool = False,
|
||||||
) -> List[Tuple]:
|
) -> List[Tuple]:
|
||||||
|
global db
|
||||||
|
if not sql_connector_is_active(db):
|
||||||
|
db = sql_connect(db)
|
||||||
cur = db.cursor(dictionary=False)
|
cur = db.cursor(dictionary=False)
|
||||||
return _sql_quarry(cur, querry, data, return_result, commit)
|
return _sql_quarry(cur, querry, data, return_result, commit)
|
||||||
|
|
||||||
@@ -130,6 +171,10 @@ def sqlT(
|
|||||||
return_result: bool = True,
|
return_result: bool = True,
|
||||||
commit: bool = False,
|
commit: bool = False,
|
||||||
) -> List[Dict]:
|
) -> List[Dict]:
|
||||||
|
global db
|
||||||
|
if not sql_connector_is_active(db):
|
||||||
|
db = sql_connect(db)
|
||||||
|
|
||||||
cur = db.cursor(dictionary=True)
|
cur = db.cursor(dictionary=True)
|
||||||
return _sql_quarry(cur, querry, data, return_result, commit)
|
return _sql_quarry(cur, querry, data, return_result, commit)
|
||||||
|
|
||||||
@@ -148,10 +193,22 @@ def sqlT(
|
|||||||
# )
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
app.mount(
|
||||||
|
"/favicon",
|
||||||
|
StaticFiles(directory=os.environ.get("FAVICON_PATH", FAVICON)),
|
||||||
|
name="favicon",
|
||||||
|
)
|
||||||
|
app.mount(
|
||||||
|
"/static",
|
||||||
|
StaticFiles(directory=os.environ.get("STATIC_PATH", STATIC_FILES)),
|
||||||
|
name="static",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def get_index():
|
async def get_index():
|
||||||
"""gives the Index.html file"""
|
"""gives the Index.html file"""
|
||||||
return FileResponse("./index.html")
|
return FileResponse(APP_ROOT_PATH / "index.html")
|
||||||
|
|
||||||
|
|
||||||
@app.get("/files/{file_id}")
|
@app.get("/files/{file_id}")
|
||||||
@@ -178,7 +235,7 @@ async def get_file(file_id: str):
|
|||||||
# status_code=500, detail="Somethings wrong with the database"
|
# status_code=500, detail="Somethings wrong with the database"
|
||||||
# )
|
# )
|
||||||
# filename = cur.fetchone()[0]
|
# filename = cur.fetchone()[0]
|
||||||
return FileResponse(FILES_IN_PROGRESS + filename)
|
return FileResponse(FILES_IN_PROGRESS / filename)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/search/lva")
|
@app.get("/search/lva")
|
||||||
@@ -224,6 +281,9 @@ async def search_lva(
|
|||||||
)
|
)
|
||||||
# res += cur.fetchall()
|
# res += cur.fetchall()
|
||||||
res = remove_duplicates(res + zw)
|
res = remove_duplicates(res + zw)
|
||||||
|
info(
|
||||||
|
f"LVA Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
|
||||||
|
)
|
||||||
if searchlim == 0:
|
if searchlim == 0:
|
||||||
return res
|
return res
|
||||||
else:
|
else:
|
||||||
@@ -258,6 +318,9 @@ async def search_profs(
|
|||||||
)
|
)
|
||||||
# res += cur.fetchall()
|
# res += cur.fetchall()
|
||||||
res = remove_duplicates(res + zw)
|
res = remove_duplicates(res + zw)
|
||||||
|
info(
|
||||||
|
f"Prof Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
|
||||||
|
)
|
||||||
if searchlim == 0:
|
if searchlim == 0:
|
||||||
return res
|
return res
|
||||||
else:
|
else:
|
||||||
@@ -298,6 +361,9 @@ async def search_subcats(
|
|||||||
)
|
)
|
||||||
# res += cur.fetchall()
|
# res += cur.fetchall()
|
||||||
res = remove_duplicates(res + rest)
|
res = remove_duplicates(res + rest)
|
||||||
|
info(
|
||||||
|
f"Subcatrgory Search: {searchterm}; Result: {res[: (searchlim if searchlim != 0 else -1)]}"
|
||||||
|
)
|
||||||
if searchlim == 0:
|
if searchlim == 0:
|
||||||
return res
|
return res
|
||||||
else:
|
else:
|
||||||
@@ -354,7 +420,7 @@ async def create_upload_file(files: List[UploadFile], c2pdf: bool = True):
|
|||||||
content = doc.tobytes()
|
content = doc.tobytes()
|
||||||
if ft != "dir":
|
if ft != "dir":
|
||||||
filename = make_filename_unique(filename)
|
filename = make_filename_unique(filename)
|
||||||
locpath = FILES_IN_PROGRESS + filename
|
locpath = FILES_IN_PROGRESS / filename
|
||||||
# locpaths.append(locpath)
|
# locpaths.append(locpath)
|
||||||
# cur = db.cursor()
|
# cur = db.cursor()
|
||||||
# try:
|
# try:
|
||||||
@@ -421,7 +487,7 @@ async def get_submission(
|
|||||||
f"lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
|
f"lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
|
||||||
)
|
)
|
||||||
info(
|
info(
|
||||||
f"lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
|
f"Got Submission: lva: {lva}, prof: {prof}, fname {fname}, stype: {stype}, subcat: {subcat}, sem: {sem}, ex_date: {ex_date}, rects: {rects}, pagescales: {pagescales}, ocr: {ocr}"
|
||||||
)
|
)
|
||||||
rects_p = json.loads(rects)
|
rects_p = json.loads(rects)
|
||||||
scales_p = json.loads(pagescales)
|
scales_p = json.loads(pagescales)
|
||||||
@@ -438,7 +504,7 @@ async def get_submission(
|
|||||||
error(f"User tried to upload a file without specifying the {th[1]}")
|
error(f"User tried to upload a file without specifying the {th[1]}")
|
||||||
raise HTTPException(400, f"You need to specify a {th[1]}")
|
raise HTTPException(400, f"You need to specify a {th[1]}")
|
||||||
|
|
||||||
filepath = "./app/files/" + res[0][0]
|
filepath = FILES_IN_PROGRESS / res[0][0]
|
||||||
# except mariadb.Error as e:
|
# except mariadb.Error as e:
|
||||||
# print(f"Mariadb Error: {e}")
|
# print(f"Mariadb Error: {e}")
|
||||||
# raise HTTPException(
|
# raise HTTPException(
|
||||||
@@ -510,8 +576,8 @@ async def yield_censor_status(file_id: str):
|
|||||||
|
|
||||||
|
|
||||||
def censor_pdf(
|
def censor_pdf(
|
||||||
path: str,
|
path: os.PathLike,
|
||||||
destpath: str,
|
destpath: os.PathLike,
|
||||||
rects: List[List[List[float]]],
|
rects: List[List[List[float]]],
|
||||||
scales: List[Dict[str, float]],
|
scales: List[Dict[str, float]],
|
||||||
file_id: str,
|
file_id: str,
|
||||||
@@ -526,6 +592,7 @@ def censor_pdf(
|
|||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
"""
|
"""
|
||||||
|
info(f"started Censoring for file {path} to be saved to {destpath}")
|
||||||
doc = pymupdf.open(path)
|
doc = pymupdf.open(path)
|
||||||
page = doc[0]
|
page = doc[0]
|
||||||
npage = doc.page_count
|
npage = doc.page_count
|
||||||
@@ -558,8 +625,8 @@ def censor_pdf(
|
|||||||
|
|
||||||
|
|
||||||
def censor_pdf_ocr(
|
def censor_pdf_ocr(
|
||||||
path: str,
|
path: os.PathLike,
|
||||||
destpath: str,
|
destpath: os.PathLike,
|
||||||
rects: List[List[List[float]]],
|
rects: List[List[List[float]]],
|
||||||
scales: List[Dict[str, float]],
|
scales: List[Dict[str, float]],
|
||||||
file_id: str,
|
file_id: str,
|
||||||
@@ -576,6 +643,7 @@ def censor_pdf_ocr(
|
|||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
"""
|
"""
|
||||||
|
info(f"started Censoring in OCR Mode for file {path} to be saved to {destpath}")
|
||||||
doc = pymupdf.open(path)
|
doc = pymupdf.open(path)
|
||||||
output = pymupdf.open()
|
output = pymupdf.open()
|
||||||
page = doc[0]
|
page = doc[0]
|
||||||
@@ -603,12 +671,31 @@ def censor_pdf_ocr(
|
|||||||
censor_status_datas[file_id]["done"] = False
|
censor_status_datas[file_id]["done"] = False
|
||||||
censor_status_update_events[file_id].set()
|
censor_status_update_events[file_id].set()
|
||||||
# THis Costs us dearly
|
# THis Costs us dearly
|
||||||
|
try:
|
||||||
bitmap = page.get_pixmap(dpi=400)
|
bitmap = page.get_pixmap(dpi=400)
|
||||||
pdf_bytes = bitmap.pdfocr_tobytes(
|
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||||
language="deu",
|
language="deu",
|
||||||
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||||
)
|
)
|
||||||
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||||
|
except RuntimeError as e:
|
||||||
|
error(
|
||||||
|
f"Error in OCR for document: {destpath}. Error: {e}. Falling back to standard mode."
|
||||||
|
)
|
||||||
|
if i < len(rects) and rects[i] != []:
|
||||||
|
for rect in rects[i]:
|
||||||
|
prect = pymupdf.Rect(
|
||||||
|
rect[0] * wfac,
|
||||||
|
rect[1] * hfac,
|
||||||
|
(rect[0] + rect[2]) * wfac,
|
||||||
|
(rect[1] + rect[3]) * hfac,
|
||||||
|
)
|
||||||
|
page.add_redact_annot(
|
||||||
|
prect,
|
||||||
|
fill=(0, 0, 0),
|
||||||
|
)
|
||||||
|
page.apply_redactions()
|
||||||
|
output.insert_pdf(page.parent, from_page=page.number, to_page=page.number)
|
||||||
# End of the costly part
|
# End of the costly part
|
||||||
print(f"Page {i + 1}/{npage}: CENSORING DONE")
|
print(f"Page {i + 1}/{npage}: CENSORING DONE")
|
||||||
output.save(destpath)
|
output.save(destpath)
|
||||||
@@ -665,21 +752,22 @@ def make_savepath(
|
|||||||
ex_date: str,
|
ex_date: str,
|
||||||
fname: str,
|
fname: str,
|
||||||
ftype: str,
|
ftype: str,
|
||||||
) -> str:
|
) -> os.PathLike:
|
||||||
"""Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories."""
|
"""Generates the path, the file is saved to after the upload process is finished. It creates all nessecery directories."""
|
||||||
|
info(f"Started to make Savepath for '{fname}' in '{lva}' with prof '{prof}'.")
|
||||||
lv = get_lvpath(lva)
|
lv = get_lvpath(lva)
|
||||||
lvpath = lv[1] + "/"
|
lvpath = Path(lv[1])
|
||||||
pf = get_profpath(prof, lv[0])
|
pf = get_profpath(prof, lv[0])
|
||||||
pfpath = pf[1] + "/"
|
pfpath = Path(pf[1])
|
||||||
catpath = CATEGORIES[int(cat)] + "/"
|
catpath = Path(CATEGORIES[int(cat)])
|
||||||
scpath = ""
|
scpath: str | os.PathLike = ""
|
||||||
if int(cat) in SUBCAT_CATEGORIES_I and subcat != "":
|
if int(cat) in SUBCAT_CATEGORIES_I and subcat != "":
|
||||||
sc = get_subcatpath(subcat, int(cat), pf[0], lv[0])
|
sc = get_subcatpath(subcat, int(cat), pf[0], lv[0])
|
||||||
scpath = sc[1] + "/"
|
scpath = Path(sc[1])
|
||||||
if int(cat) == 6:
|
if int(cat) == 6:
|
||||||
savepath = UNIZEUG_PATH + lv[1] + "_Multimedia_only/" + pfpath
|
savepath = UNIZEUG_PATH / (lv[1] + "_Multimedia_only/") / pfpath
|
||||||
else:
|
else:
|
||||||
savepath = UNIZEUG_PATH + lvpath + pfpath + catpath + scpath
|
savepath = UNIZEUG_PATH / lvpath / pfpath / catpath / scpath
|
||||||
os.makedirs(savepath, exist_ok=True)
|
os.makedirs(savepath, exist_ok=True)
|
||||||
filename = sem + "_"
|
filename = sem + "_"
|
||||||
if int(cat) in EX_DATE_CATEGORIES_I:
|
if int(cat) in EX_DATE_CATEGORIES_I:
|
||||||
@@ -697,14 +785,16 @@ def make_savepath(
|
|||||||
filename += fname
|
filename += fname
|
||||||
file = filename + "." + ftype
|
file = filename + "." + ftype
|
||||||
|
|
||||||
destpath = pathlib.Path(savepath + file)
|
destpath = savepath / file
|
||||||
i = 0
|
i = 0
|
||||||
while destpath.is_file():
|
while destpath.is_file():
|
||||||
|
info(f"{destpath} already exists.")
|
||||||
file = filename + f"_{i}." + ftype
|
file = filename + f"_{i}." + ftype
|
||||||
i += 1
|
i += 1
|
||||||
destpath = pathlib.Path(savepath + file)
|
destpath = savepath / file
|
||||||
destpath.touch()
|
destpath.touch()
|
||||||
return savepath + file
|
info(f"Path for file to be saved generated as: {savepath / file}")
|
||||||
|
return savepath / file
|
||||||
|
|
||||||
|
|
||||||
def get_lvpath(lva: str) -> Tuple[int, str]:
|
def get_lvpath(lva: str) -> Tuple[int, str]:
|
||||||
@@ -897,10 +987,10 @@ async def save_files_to_folder(files: List[UploadFile]) -> str:
|
|||||||
if filename == "":
|
if filename == "":
|
||||||
filename = "None"
|
filename = "None"
|
||||||
filename = make_filename_unique(filename)
|
filename = make_filename_unique(filename)
|
||||||
os.mkdir(FILES_IN_PROGRESS + filename)
|
os.mkdir(FILES_IN_PROGRESS / filename)
|
||||||
for idx, file in enumerate(files):
|
for idx, file in enumerate(files):
|
||||||
fn = file.filename if file.filename is not None else "None" + str(idx)
|
fn = file.filename if file.filename is not None else "None" + str(idx)
|
||||||
with open(FILES_IN_PROGRESS + filename + "/" + fn, "wb") as f:
|
with open(FILES_IN_PROGRESS / filename / fn, "wb") as f:
|
||||||
f.write(await file.read())
|
f.write(await file.read())
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
@@ -928,13 +1018,13 @@ async def remove_old_FIP_entrys():
|
|||||||
info(f"Remove Files: {files}")
|
info(f"Remove Files: {files}")
|
||||||
for file in files:
|
for file in files:
|
||||||
sql("DELETE FROM FIP WHERE id=?", (file["id"]), return_result=False)
|
sql("DELETE FROM FIP WHERE id=?", (file["id"]), return_result=False)
|
||||||
os.remove(FILES_IN_PROGRESS + file["filename"])
|
os.remove(FILES_IN_PROGRESS / file["filename"])
|
||||||
# sql(
|
# sql(
|
||||||
# "DELETE FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24",
|
# "DELETE FROM FIP WHERE HOUR(TIMEDIFF(NOW(),initTimeStamp)) > 24",
|
||||||
# return_result=False,
|
# return_result=False,
|
||||||
# )
|
# )
|
||||||
db.commit()
|
db.commit()
|
||||||
return FileResponse("./index.html")
|
return FileResponse(APP_ROOT_PATH / "index.html")
|
||||||
|
|
||||||
|
|
||||||
def delete_from_FIP(uuid: str):
|
def delete_from_FIP(uuid: str):
|
||||||
@@ -942,4 +1032,4 @@ def delete_from_FIP(uuid: str):
|
|||||||
if len(res) < 1:
|
if len(res) < 1:
|
||||||
raise HTTPException(500, "I am trying to delete a file that dose not exist")
|
raise HTTPException(500, "I am trying to delete a file that dose not exist")
|
||||||
sql("DELETE FROM FIP WHERE id=?", (uuid,), return_result=False, commit=True)
|
sql("DELETE FROM FIP WHERE id=?", (uuid,), return_result=False, commit=True)
|
||||||
os.remove(FILES_IN_PROGRESS + res[0]["filename"])
|
os.remove(FILES_IN_PROGRESS / res[0]["filename"])
|
||||||
|
|||||||
@@ -283,10 +283,10 @@ function submitPdf(eve) {
|
|||||||
submitForm(formdata);
|
submitForm(formdata);
|
||||||
}
|
}
|
||||||
async function submitForm(formData) {
|
async function submitForm(formData) {
|
||||||
|
var updateEventSource = null;
|
||||||
try {
|
try {
|
||||||
const updateEventSource = new EventSource(
|
updateEventSource = new EventSource("/get_censor_status/" + doc.fID);
|
||||||
"http://127.0.0.1:8000/get_censor_status/" + doc.fID,
|
|
||||||
);
|
|
||||||
modal.style.display = "flex";
|
modal.style.display = "flex";
|
||||||
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
|
// console.log("http://127.0.0.1:8000/get_censor_status/" + doc.fID);
|
||||||
updateEventSource.addEventListener("censorUpdate", function(eve) {
|
updateEventSource.addEventListener("censorUpdate", function(eve) {
|
||||||
@@ -295,11 +295,19 @@ async function submitForm(formData) {
|
|||||||
upload_status.innerText =
|
upload_status.innerText =
|
||||||
"Censoring Page " + data.page + "/" + data.pages;
|
"Censoring Page " + data.page + "/" + data.pages;
|
||||||
});
|
});
|
||||||
const response = await fetch("http://127.0.0.1:8000/submit", {
|
} catch {
|
||||||
|
console.error(
|
||||||
|
"Error geting eventsource for updating censoring page count: " + error,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const response = await fetch("/submit/", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
body: formData,
|
body: formData,
|
||||||
});
|
});
|
||||||
|
if (updateEventSource !== null) {
|
||||||
updateEventSource.close();
|
updateEventSource.close();
|
||||||
|
}
|
||||||
modal.style.display = "none";
|
modal.style.display = "none";
|
||||||
//let responseJSON=await response.json();
|
//let responseJSON=await response.json();
|
||||||
if (response.ok) {
|
if (response.ok) {
|
||||||
@@ -320,7 +328,7 @@ async function submitForm(formData) {
|
|||||||
window.alert("Error: " + (await response.json())["detail"]);
|
window.alert("Error: " + (await response.json())["detail"]);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error" + error);
|
console.error("Error submitting: " + error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
function uploadPdf(eve) {
|
function uploadPdf(eve) {
|
||||||
@@ -338,7 +346,7 @@ function uploadPdf(eve) {
|
|||||||
}
|
}
|
||||||
async function uploadFile(formData) {
|
async function uploadFile(formData) {
|
||||||
try {
|
try {
|
||||||
const response = await fetch("http://127.0.0.1:8000/uploadfile", {
|
const response = await fetch("/uploadfile/", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
body: formData,
|
body: formData,
|
||||||
});
|
});
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
var url = "http://127.0.0.1:8000/search/";
|
var url = "/search/";
|
||||||
var lid = null;
|
var lid = null;
|
||||||
var pid = null;
|
var pid = null;
|
||||||
var activeAutocompletion = null;
|
var activeAutocompletion = null;
|
||||||
67
compose.yml
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
services:
|
||||||
|
app:
|
||||||
|
container_name: python-app
|
||||||
|
# command: python -m uvicorn app.main:app --host 0.0.0.0 --port 80
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
volumes:
|
||||||
|
- ./app:/python
|
||||||
|
- ./unizeug:/unizeug:source
|
||||||
|
ports:
|
||||||
|
- 80:80
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
ENTRY_COMMAND: python -m uvicorn main:app --host 0.0.0.0 --port 80
|
||||||
|
APP_LOG_PATH: /python/app.log
|
||||||
|
APP_ROOT_PATH: /python
|
||||||
|
UNIZEUG_PATH: /unizeug
|
||||||
|
DB_HOST: db
|
||||||
|
DB_USER: app
|
||||||
|
DB_PASSWORD: DBPassword
|
||||||
|
DB_DATABASE: Unizeug
|
||||||
|
TZ: "Europe/Vienna"
|
||||||
|
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
- scaner
|
||||||
|
db:
|
||||||
|
container_name: db
|
||||||
|
image: mariadb
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
MARIADB_ROOT_PASSWORD: DBPassword
|
||||||
|
MARIADB_USER: app
|
||||||
|
UNIZEUG_PATH: /unizeug
|
||||||
|
MARIADB_PASSWORD: DBPassword
|
||||||
|
MARIADB_DATABASE: Unizeug
|
||||||
|
TZ: "Europe/Vienna"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
|
||||||
|
start_period: 10s
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
volumes:
|
||||||
|
- ./mariadb:/var/lib/mysql
|
||||||
|
scaner:
|
||||||
|
container_name: python-scaner
|
||||||
|
# command: python /python/init.py
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
volumes:
|
||||||
|
- ./app:/python
|
||||||
|
- ./unizeug:/unizeug:source
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
ENTRY_COMMAND: python /python/init.py
|
||||||
|
UNIZEUG_PATH: /unizeug
|
||||||
|
APP_ROOT_PATH: /python
|
||||||
|
DB_HOST: db
|
||||||
|
DB_USER: app
|
||||||
|
DB_PASSWORD: DBPassword
|
||||||
|
DB_DATABASE: Unizeug
|
||||||
|
TZ: "Europe/Vienna"
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
4
entrypoint.sh
Executable file
@@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
if [[ -n "$ENTRY_COMMAND" ]]; then
|
||||||
|
/bin/sh -c "$ENTRY_COMMAND"
|
||||||
|
fi
|
||||||
@@ -44,10 +44,12 @@ pypdf==5.2.0
|
|||||||
pytesseract==0.3.13
|
pytesseract==0.3.13
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
python-multipart==0.0.20
|
python-multipart==0.0.20
|
||||||
|
pytz==2025.2
|
||||||
PyYAML==6.0.2
|
PyYAML==6.0.2
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
rich==13.9.4
|
rich==13.9.4
|
||||||
rich-toolkit==0.13.2
|
rich-toolkit==0.13.2
|
||||||
|
schedule==1.2.2
|
||||||
shellingham==1.5.4
|
shellingham==1.5.4
|
||||||
sniffio==1.3.1
|
sniffio==1.3.1
|
||||||
starlette==0.45.3
|
starlette==0.45.3
|
||||||
|
|||||||