diff --git a/.gitignore b/.gitignore index 20fbbdc..585c4f0 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ app/files/ app/pwfile.json app/dest app.log +init.log app/__pycache__/ diff --git a/app/__pycache__/main.cpython-313.pyc b/app/__pycache__/main.cpython-313.pyc index 25c1d46..95361c3 100644 Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ diff --git a/app/init.py b/app/init.py index 50a598f..3a1feaa 100644 --- a/app/init.py +++ b/app/init.py @@ -1,10 +1,12 @@ -import paramiko +from os.path import isdir from stat import S_ISDIR, S_ISREG import re +import pathlib # from base64 import decodebytes import json import mariadb +import logging CATEGORIES = [ "Prüfungen", @@ -16,7 +18,18 @@ CATEGORIES = [ "Multimedia", ] SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"] -unizeug_path = "/mnt/save/daten/Unizeug/" +unizeug_path = "/home/wildarch/web/fet_unizeug/unizeug/" + +log = logging.getLogger(__name__) +logging.basicConfig( + filename="init.log", + level=logging.INFO, + format="[%(asctime)s, %(filename)s:%(lineno)s -> %(funcName)10s() ]%(levelname)s: %(message)s", +) +debug = log.debug +info = log.info +error = log.error + db = mariadb.connect( host="localhost", user="wildserver", password="DBPassword", database="Unizeug" ) @@ -53,29 +66,31 @@ db.commit() def get_dirstruct(): - with open("app/pwfile.json", "r") as f: - cred = json.load(f) - ssh = paramiko.SSHClient() - print(cred["sftpurl"]) - ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + # with open("app/pwfile.json", "r") as f: + # cred = json.load(f) + # ssh = paramiko.SSHClient() + # print(cred["sftpurl"]) + # ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # key=paramiko.RSAKey(data=decodebytes(bytes(cred["key"],"utf-8"))) # ssh.get_host_keys().add(cred["sftpurl"], 'ssh-rsa', key) - ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"]) - sftp = ssh.open_sftp() - folders = sftp.listdir_attr(unizeug_path) - for entry in folders: + # ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"]) + # sftp = ssh.open_sftp() + # folders = sftp.listdir_attr(unizeug_path) + folders=pathlib.Path(unizeug_path) + for entry in folders.iterdir(): if entry is None: continue - if not S_ISDIR(entry.st_mode): + if not entry.is_dir(): continue - fname = str(entry.filename) + fname = str(entry.name) regex = re.compile(r"Multimedia_only") if regex.search(fname): continue # print(fname) lvid = re.search(r"[a-zA-Z0-9]{3}\.[a-zA-Z0-9]{3}", fname) - print(lvid) + # print(lvid) if lvid is None: + error(f"Didnt Find LVA ID in Directory {fname}") continue lvid = lvid.group()[:3] + lvid.group()[4:] # name = fname[:-8] @@ -89,39 +104,35 @@ def get_dirstruct(): cur.execute("SELECT id FROM LVAs WHERE lvid=?", (lvid,)) lid = cur.fetchone()[0] db.commit() - for profsdir in sftp.listdir_attr(unizeug_path + fname + "/"): - if profsdir is None or not S_ISDIR(profsdir.st_mode): + for profsdir in entry.iterdir(): + if profsdir is None: + continue + if not profsdir.is_dir(): continue # print(profsdir.filename) try: - lastname, firstname = re.split(r"[_\-\s]", str(profsdir.filename)) + lastname, firstname = re.split(r"[_\-\s]", str(profsdir.name)) pid = link_prof(firstname, lastname, lid) except ValueError: - print(f"{name} is broken") + error(f"Couldnt get Profs from {fname}") continue - for cat in sftp.listdir_attr( - unizeug_path + fname + "/" + profsdir.filename + "/" - ): - if cat is None or not S_ISDIR(cat.st_mode): + for cat in profsdir.iterdir(): + if cat is None: continue - if cat.filename not in SUBCAT_CATEGORIES: + if not cat.is_dir(): continue - idx = CATEGORIES.index(cat.filename) - for subcat in sftp.listdir_attr( - unizeug_path - + fname - + "/" - + profsdir.filename - + "/" - + cat.filename - + "/" - ): - if subcat is None or not S_ISDIR(subcat.st_mode): + if cat.name not in SUBCAT_CATEGORIES: + continue + idx = CATEGORIES.index(cat.name) + for subcat in cat.iterdir(): + if subcat is None: + continue + if not subcat.is_dir(): continue cur = db.cursor() cur.execute( "INSERT INTO SubCats (LId,PId,cat,name) VALUES(?,?,?,?)", - (lid, pid, idx, subcat.filename), + (lid, pid, idx, subcat.name), ) db.commit() diff --git a/app/init_ssh.py b/app/init_ssh.py new file mode 100644 index 0000000..50a598f --- /dev/null +++ b/app/init_ssh.py @@ -0,0 +1,152 @@ +import paramiko +from stat import S_ISDIR, S_ISREG +import re + +# from base64 import decodebytes +import json +import mariadb + +CATEGORIES = [ + "Prüfungen", + "Klausuren", + "Übungen", + "Labore", + "Unterlagen", + "Zusammenfassungen", + "Multimedia", +] +SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"] +unizeug_path = "/mnt/save/daten/Unizeug/" +db = mariadb.connect( + host="localhost", user="wildserver", password="DBPassword", database="Unizeug" +) +c = db.cursor() +try: + c.execute("DROP TABLE LVAs") +except mariadb.OperationalError: + pass +c.execute( + "CREATE TABLE LVAs(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,lvid VARCHAR(6), lvname VARCHAR(256), lvpath VARCHAR(256),PRIMARY KEY(id))" +) +try: + c.execute("DROP TABLE Profs") +except mariadb.OperationalError: + pass +c.execute( + "CREATE TABLE Profs(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,name VARCHAR(256),PRIMARY KEY(id))" +) +try: + c.execute("DROP TABLE LPLink") +except mariadb.OperationalError: + pass +c.execute( + "CREATE TABLE LPLink(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,LId bigint(20),PId bigint(20),PRIMARY KEY(id))" +) +try: + c.execute("DROP TABLE SubCats") +except mariadb.OperationalError: + pass +c.execute( + "CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))" +) +db.commit() + + +def get_dirstruct(): + with open("app/pwfile.json", "r") as f: + cred = json.load(f) + ssh = paramiko.SSHClient() + print(cred["sftpurl"]) + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + # key=paramiko.RSAKey(data=decodebytes(bytes(cred["key"],"utf-8"))) + # ssh.get_host_keys().add(cred["sftpurl"], 'ssh-rsa', key) + ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"]) + sftp = ssh.open_sftp() + folders = sftp.listdir_attr(unizeug_path) + for entry in folders: + if entry is None: + continue + if not S_ISDIR(entry.st_mode): + continue + fname = str(entry.filename) + regex = re.compile(r"Multimedia_only") + if regex.search(fname): + continue + # print(fname) + lvid = re.search(r"[a-zA-Z0-9]{3}\.[a-zA-Z0-9]{3}", fname) + print(lvid) + if lvid is None: + continue + lvid = lvid.group()[:3] + lvid.group()[4:] + # name = fname[:-8] + name = re.sub(r"[a-zA-Z0-9]{3}\.[a-zA-Z0-9]{3}", "", fname) + # print(name) + # print(lvid) + cur = db.cursor() + cur.execute( + "INSERT INTO LVAs (lvid, lvname, lvpath) VALUES(?,?,?)", (lvid, name, fname) + ) + cur.execute("SELECT id FROM LVAs WHERE lvid=?", (lvid,)) + lid = cur.fetchone()[0] + db.commit() + for profsdir in sftp.listdir_attr(unizeug_path + fname + "/"): + if profsdir is None or not S_ISDIR(profsdir.st_mode): + continue + # print(profsdir.filename) + try: + lastname, firstname = re.split(r"[_\-\s]", str(profsdir.filename)) + pid = link_prof(firstname, lastname, lid) + except ValueError: + print(f"{name} is broken") + continue + for cat in sftp.listdir_attr( + unizeug_path + fname + "/" + profsdir.filename + "/" + ): + if cat is None or not S_ISDIR(cat.st_mode): + continue + if cat.filename not in SUBCAT_CATEGORIES: + continue + idx = CATEGORIES.index(cat.filename) + for subcat in sftp.listdir_attr( + unizeug_path + + fname + + "/" + + profsdir.filename + + "/" + + cat.filename + + "/" + ): + if subcat is None or not S_ISDIR(subcat.st_mode): + continue + cur = db.cursor() + cur.execute( + "INSERT INTO SubCats (LId,PId,cat,name) VALUES(?,?,?,?)", + (lid, pid, idx, subcat.filename), + ) + db.commit() + + +def link_prof(firstname, lastname, lid): + cur = db.cursor() + cur.execute("SELECT id from Profs WHERE name=?", (lastname + " " + firstname,)) + res = cur.fetchone() + if res is not None: + cur.execute("INSERT INTO LPLink (LId,PId) VALUES(?,?)", (lid, res[0])) + db.commit() + return res[0] + cur.execute("SELECT id from Profs WHERE name=?", (firstname + " " + lastname,)) + res = cur.fetchone() + if res is not None: + cur.execute("INSERT INTO LPLink (LId,PId) VALUES(?,?)", (lid, res[0])) + db.commit() + return res[0] + cur.execute("INSERT INTO Profs (name) VALUES(?)", (lastname + " " + firstname,)) + cur.execute("SELECT id FROM Profs WHERE name=?", (lastname + " " + firstname,)) + res = cur.fetchone() + cur.execute("INSERT INTO LPLink (LId,PId) VALUES(?,?)", (lid, res[0])) + db.commit() + return res[0] + + +if __name__ == "__main__": + get_dirstruct() diff --git a/app/main.py b/app/main.py index bc4ef07..78118a8 100644 --- a/app/main.py +++ b/app/main.py @@ -461,6 +461,7 @@ async def censor_pdf( ): """Censors pdf and runs OCR If Secure is True the file is converted to Pixels and then recreated; else the censored sections are just covering the text below and can be easiliy removed with e.g. Inkscape + Saves the file to the given Destpath. Args: path: path to the pdf document destpath: Path where the result is supposed to be saved to @@ -614,8 +615,6 @@ def make_savepath( destpath = pathlib.Path(savepath + file) i = 0 - info(destpath) - info(destpath.is_file()) while destpath.is_file(): file = filename + f"_{i}." + ftype i += 1