finished the file to scrape the dirstructure. there is a lot of work to do to get everything in order

This commit is contained in:
Marcel Gansfusz
2025-02-19 17:47:43 +01:00
parent f969fec15e
commit 2cf7bda010
4 changed files with 146 additions and 32 deletions

View File

@@ -1,16 +1,57 @@
import paramiko
from stat import S_ISDIR, S_ISREG
import re
from base64 import decodebytes
# from base64 import decodebytes
import json
import mariadb
CATEGORIES = [
"Prüfungen",
"Klausuren",
"Übungen",
"Labore",
"Unterlagen",
"Zusammenfassungen",
"Multimedia",
]
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
unizeug_path = "/mnt/save/daten/Unizeug/"
db = mariadb.connect(
host="localhost", user="wildserver", password="DBPassword", database="Unizeug"
)
c=db.cursor()
c.execute("DROP TABLE LVAs")
c.execute("CREATE TABLE LVAs(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,lvid MEDIUMINT unsigned, lvname VARCHAR(256), lvpath VARCHAR(256),PRIMARY KEY(id))")
c = db.cursor()
try:
c.execute("DROP TABLE LVAs")
except mariadb.OperationalError:
pass
c.execute(
"CREATE TABLE LVAs(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,lvid VARCHAR(6), lvname VARCHAR(256), lvpath VARCHAR(256),PRIMARY KEY(id))"
)
try:
c.execute("DROP TABLE Profs")
except mariadb.OperationalError:
pass
c.execute(
"CREATE TABLE Profs(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,name VARCHAR(256),PRIMARY KEY(id))"
)
try:
c.execute("DROP TABLE LPLink")
except mariadb.OperationalError:
pass
c.execute(
"CREATE TABLE LPLink(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,LId bigint(20),PId bigint(20),PRIMARY KEY(id))"
)
try:
c.execute("DROP TABLE SubCats")
except mariadb.OperationalError:
pass
c.execute(
"CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id))"
)
db.commit()
def get_dirstruct():
with open("app/pwfile.json", "r") as f:
cred = json.load(f)
@@ -21,27 +62,89 @@ def get_dirstruct():
# ssh.get_host_keys().add(cred["sftpurl"], 'ssh-rsa', key)
ssh.connect(cred["sftpurl"], username=cred["sftpuser"], password=cred["sftpPW"])
sftp = ssh.open_sftp()
folders = sftp.listdir_attr("/mnt/save/daten/Unizeug/")
folders = sftp.listdir_attr(unizeug_path)
for entry in folders:
if entry is None:
continue
if S_ISDIR(entry.st_mode):
fname = str(entry.filename)
regex=re.compile(r"Multimedia_only")
if regex.search(fname):
if not S_ISDIR(entry.st_mode):
continue
fname = str(entry.filename)
regex = re.compile(r"Multimedia_only")
if regex.search(fname):
continue
# print(fname)
lvid = re.search(r"\d{3}.\d{3}", fname)
# print(lvid)
if lvid is None:
continue
lvid = int(lvid.group()[:3] + lvid.group()[4:])
name = fname[:-8]
# print(name)
# print(lvid)
cur = db.cursor()
cur.execute(
"INSERT INTO LVAs (lvid, lvname, lvpath) VALUES(?,?,?)", (lvid, name, fname)
)
cur.execute("SELECT id FROM LVAs WHERE lvid=?", (lvid,))
lid = cur.fetchone()[0]
db.commit()
for profsdir in sftp.listdir_attr(unizeug_path + fname + "/"):
if profsdir is None or not S_ISDIR(profsdir.st_mode):
continue
# print(fname)
lvid = re.search(r"\d{3}.\d{3}", fname)
# print(lvid)
if lvid is None:
# print(profsdir.filename)
try:
lastname, firstname = re.split(r"[_\-\s]", str(profsdir.filename))
pid = link_prof(firstname, lastname, lid)
except ValueError:
print(f"{name} is broken")
continue
lvid=int(lvid.group()[:3]+lvid.group()[4:])
name=fname[:-8]
print(name)
print(lvid)
cur=db.cursor()
cur.execute("INSERT INTO LVAs (lvid, lvname, lvpath) VALUES(?,?,?)",(lvid,name,fname))
db.commit()
for cat in sftp.listdir_attr(
unizeug_path + fname + "/" + profsdir.filename + "/"
):
if cat is None or not S_ISDIR(cat.st_mode):
continue
if cat.filename not in SUBCAT_CATEGORIES:
continue
idx = CATEGORIES.index(cat.filename)
for subcat in sftp.listdir_attr(
unizeug_path
+ fname
+ "/"
+ profsdir.filename
+ "/"
+ cat.filename
+ "/"
):
if subcat is None or not S_ISDIR(subcat.st_mode):
continue
cur = db.cursor()
cur.execute(
"INSERT INTO SubCats (LId,PId,cat,name) VALUES(?,?,?,?)",
(lid, pid, idx, subcat.filename),
)
db.commit()
def link_prof(firstname, lastname, lid):
cur = db.cursor()
cur.execute("SELECT id from Profs WHERE name=?", (lastname + " " + firstname,))
res = cur.fetchone()
if res is not None:
cur.execute("INSERT INTO LPLink (LId,PId) VALUES(?,?)", (lid, res[0]))
db.commit()
return res[0]
cur.execute("SELECT id from Profs WHERE name=?", (firstname + " " + lastname,))
res = cur.fetchone()
if res is not None:
cur.execute("INSERT INTO LPLink (LId,PId) VALUES(?,?)", (lid, res[0]))
db.commit()
return res[0]
cur.execute("INSERT INTO Profs (name) VALUES(?)", (lastname + " " + firstname,))
cur.execute("SELECT id FROM Profs WHERE name=?", (lastname + " " + firstname,))
res = cur.fetchone()
cur.execute("INSERT INTO LPLink (LId,PId) VALUES(?,?)", (lid, res[0]))
db.commit()
return res[0]
if __name__ == "__main__":

View File

@@ -20,6 +20,16 @@ app.mount("/static", StaticFiles(directory="./static"), name="static")
db = mariadb.connect(
host="localhost", user="wildserver", password="DBPassword", database="Unizeug"
)
CATEGORIES = [
"Prüfungen",
"Klausuren",
"Übungen",
"Labore",
"Unterlagen",
"Zusammenfassungen",
"Multimedia",
]
SUBCAT_CATEGORIES = ["Klausuren", "Übungen", "Labore"]
# cur = db.cursor()
@@ -91,7 +101,7 @@ async def get_submittion(
fname: Annotated[str, Form()], # Path to pdf File
fileId: Annotated[str, Form()],
sem: Annotated[str, Form()], # Semester eg. 2024W
stype: Annotated[str, Form()], # Type of File eg. Prüfung
stype: Annotated[str, Form()], # Type of File eg. Prüfung=>0
ex_date: Annotated[str, Form()], # Date of Exam only when type is exam
rects: Annotated[
str, Form()

View File

@@ -1,6 +1,7 @@
CREATE DATABASE Unizeug;
USE Unizeug;
CREATE TABLE LVAs(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,lvid MEDIUMINT unsigned, lvname VARCHAR(256), lvpath VARCHAR(256),PRIMARY KEY(id));
CREATE TABLE Porfs(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,name VARCHAR(256),PRIMARY KEY(id));
CREATE TABLE LPLink(id BIGINT(20) unsigned NOT NULL AUTO_INCREMENT,LVAID bigint(20),ProfID bigint(20),PRIMARY KEY(id));
CREATE TABLE LVAs(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,lvid VARCHAR(6), lvname VARCHAR(256), lvpath VARCHAR(256),PRIMARY KEY(id));
CREATE TABLE Profs(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,name VARCHAR(256),PRIMARY KEY(id));
CREATE TABLE LPLink(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId bigint(20),PId bigint(20),PRIMARY KEY(id));
CREATE TABLE SubCats(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,LId BIGINT(20),PId BIGINT(20),cat TINYINT UNSIGNED,name VARCHAR(256), PRIMARY KEY(id));
CREATE TABLE FIP(id UUID DEFAULT(UUID()), filename VARCHAR(256), PRIMARY KEY(id));

View File

@@ -34,19 +34,19 @@
<input type="text" id="name" name="fname" placeholder="Prüfung" /><br />
<label for="sem">Semester:</label>
<input type="text" id="sem" name="sem" placeholder="2024W" /><br />
<input type="radio" id="pruefung" name="stype" value="pruefung" checked="checked" />
<input type="radio" id="pruefung" name="stype" value="0" checked="checked" />
<label for="pruefung">Prüfung</label><br />
<input type="radio" id="klausur" name="stype" value="klausur" />
<input type="radio" id="klausur" name="stype" value="1" />
<label for="klausur">Klausur</label><br />
<input type="radio" id="uebung" name="stype" value="uebung" />
<input type="radio" id="uebung" name="stype" value="2" />
<label for="uebung">Übung</label><br />
<input type="radio" id="labor" name="stype" value="labor" />
<input type="radio" id="labor" name="stype" value="3" />
<label for="labor">Labor</label><br />
<input type="radio" id="unterlagen" name="stype" value="unterlagen" />
<input type="radio" id="unterlagen" name="stype" value="4" />
<label for="unterlagen">Unterlagen</label><br />
<input type="radio" id="zusammenfassungen" name="stype" value="zusammenfassungen" />
<label for="zusammenfassungen">Zusammenfassungen</label><br />
<input type="radio" id="multimedia" name="stype" value="multimedia" />
<input type="radio" id="zusammenfassungen" name="stype" value="5" />
<label for="zusammenfassungen">Zusammenfassung</label><br />
<input type="radio" id="multimedia" name="stype" value="6" />
<label for="multimedia">Multimedia</label><br />
<label for="date">Datum</label>
<input type="date" id="date" name="ex_date" /><br />