multiple added features including tokens

This commit is contained in:
www
2020-08-30 11:25:12 +00:00
parent 5458633cd3
commit 4c8ef46c6a
8 changed files with 283 additions and 61 deletions

3
.gitignore vendored
View File

@@ -1,4 +1,5 @@
node_modules/
*.yaml
*.html
*.png
*.png
.env/

44
fetch_curricula.js Normal file
View File

@@ -0,0 +1,44 @@
const fs = require("fs");
const {read_html, du_unizeug, fetch_page,merge_folders_courselist} = require("./nodelib/lib");
const tissparse = require("./nodelib/tissparse");
const YAML = require("yaml");
async function load_curriculum(link, targetfile) {
html = await fetch_page(link,'div.ui-datatable-tablewrapper');
folders=du_unizeug()
console.log(`writing to ${targetfile}.html`)
fs.writeFile(targetfile+".html", html, function (err) {
if (err) return console.log(err);
});
courselist=merge_folders_courselist(tissparse.courselist(html),folders)
console.log(`writing to ${targetfile}`)
fs.writeFile(targetfile, YAML.stringify(courselist["tab2"]), function (err) {
if (err) return console.log(err);
});
}
const file = fs.readFileSync('./curricula.yaml', 'utf8')
curricula=YAML.parse(file)
for (key in curricula ){
console.log(`loading link: ${curricula[key]['link']}`);
console.log(`Target file data/${key}.yaml`);
if (curricula[key]["link"]) {
load_curriculum(curricula[key]["link"],`data/${key}.yaml`)
}
}
fetch_page('https://tiss.tuwien.ac.at/curriculum/public/curriculum.xhtml?dswid=3493&dsrid=603&key=58908','div.ui-datatable-tablewrapper').then((html)=> {
folders=du_unizeug()
courselist=tissparse.courselist(html)
courselist.forEach((value,index)=> {
i=folders["lookup"][value["lvanr"]] // lookup the LVAnr and get an Array with indexes
if (i) value["folders"] = i.reduce((acc,item)=>{
acc.push(folders["folders"][item]) // get one of the folders that are related to LVAnr
return acc
},[])
})
//console.log(YAML.stringify(courselist))
});

View File

@@ -11,21 +11,62 @@ import re
from functools import partial
import yaml
import slugify
from flask_mail import Mail,Message
from zipfile import ZipFile
from .token import TokenCollection
from flask import request
# Initialize application
app = Flask(__name__)
app.config['MAIL_SERVER']= "buran.htu.tuwien.ac.at"
app.config['MAIL_DEFAULT_SENDER']="andis@fet.at"
app.config['FREEZER_DESTINATION_IGNORE']=['*.zip',"intern"]
mail=Mail(app)
app.config["url_prefix"]=""
app.logger.setLevel(logging.DEBUG)
# Initialize FlatPages Index
tokens = TokenCollection("tokens.yaml")
#msg = Message("Hello",
# sender="andis@fet.at",
# recipients=["andis@fet.at"])
#mail.send(msg)
lvas=[]
studien={}
#search the data directory for yaml files
for base,_, files in os.walk("data"):
for fn in files:
if re.match(".*yaml$", fn):
with open(os.path.join("data",fn),"r") as f:
data=yaml.load(f.read(),yaml.Loader)
studien[fn]=data
with open(os.path.join("test.yaml"),"w") as f:
f.write(yaml.dump(studien))
# create a lva list from the structure
for studium, lvaliste in studien.items():
for k,lva in lvaliste.items():
if len(lva)>3:
lvas+=[lva]
folders={}
with open(os.path.join("testlvaliste.yaml"),"w") as f:
f.write(yaml.dump(lvas))
# make folders compact in lva
for lva in lvas:
for l in lva["courses"]:
if "folders" in l:
for folder in l["folders"]:
folders[folder["folder"]]= folder["folderpath"]
lvas= [
{"name": "LVA1"}
]
with open("data/test.yaml","r") as f:
lvas=yaml.load(f.read(),yaml.Loader)
app.logger.info(lvas)
app.logger.info('Initialize FET BSP Sammlung')
freezer = Freezer(app)
@@ -41,16 +82,52 @@ def slug(string):
def toyaml(obj):
return yaml.dump(obj)
@page_blueprint.route('/sendmail.html')
def send_a_mail():
msg = Message("Hello",
sender="andis@fet.at",
recipients=["andis@fet.at"])
app.logger.info(msg)
mail.send(msg)
return "Done", 200
#@page_blueprint.route('/<path:name>/',strict_slashes=False)
@page_blueprint.route('/')
#@csp_header()
def index():
return render_template("lva_liste.html", lvas=lvas)
return render_template("lva_liste.html", lvas=lvas,zip=False)
@page_blueprint.route('/intern/zips.html')
def indexzips():
return render_template("lva_liste.html", lvas=lvas,zip=True)
@page_blueprint.route('intern/createtoken.html')
def createtoken():
t=tokens.create()
return render_template("lva_liste.html", lvas=lvas,zip=True, token=t)
@page_blueprint.route('intern/zips//<name>.zip')
def files(name):
token = request.args.get('token')
# create a ZipFile objec
if not name in folders:
app.log.error("Not found %s" % name)
return "NotFound",404
if not tokens.is_valid(token):
return "Restricted",401
with ZipFile("zips/%s.zip" % name, 'w') as zipObj:
# Iterate over all the files in directory
for folderName, subfolders, filenames in os.walk(folders[name]):
for filename in filenames:
#create complete filepath of file in directory
filePath = os.path.join(folderName, filename)
# Add file to zip
zipObj.write(filePath, os.path.basename(filePath))
return send_from_directory(os.path.abspath("zips"), "%s.zip" % name)
@api_blueprint.route('/index.json',strict_slashes=False)
def api_index(name='index'):
return jsonify(page={}), 200, {'Content-Type': 'application/json; charset=utf-8'}

41
flaskapp/token.py Normal file
View File

@@ -0,0 +1,41 @@
import uuid
from collections.abc import Mapping
import datetime
import yaml
class TokenCollection(Mapping):
def __init__(self, filename: str):
self.filename=filename
with open(filename,"r") as f:
self.d=yaml.load(f.read(),yaml.Loader)
if self.d is None:
self.d=dict()
def save(self):
with open(self.filename,"w") as f:
f.write(yaml.dump(self.d))
def __getitem__(self,key):
return d[key]
def is_valid(self, token, days=1):
if not token in self.d:
return False
#return self.d[token]["created"] + datetime.timedelta(days=days)
#print(self.d[token]["created"] + datetime.timedelta(days=days))
if self.d[token]["created"] + datetime.timedelta(days=days) > datetime.datetime.now():
return True
return False
def create(self, created_by="Anonym"):
t=uuid.uuid4()
self.d[str(t)]={
"created": datetime.datetime.now(),
"createdby": created_by
}
self.save()
return str(t)
def __iter__(self):
return iter(self.d)
def __len__(self):
return len(self.d)

44
lib.js
View File

@@ -1,44 +0,0 @@
const fs = require("fs");
const YAML = require("yaml");
const child_process = require("child_process");
async function read_html(filename){
return fs.readFileSync(filename,"utf8");
}
function parse_du_line(text) {
res={}
r=text.split("\t")
if (r.length <2) return [text]; // valid line mus have a column size and folder
foldername=r[1]
res["size"]=r[0]
res["folderpath"]=foldername
res["folder"]=foldername.split("/")[foldername.split("/").length-1]
//console.log([foldername.split("/")[foldername.split("/").length-1], foldername.split("/"),foldername.split("/").length])
r1=foldername.replace(/(\d{3})[\._]?([A\d]{3})/i,"$1.$2")
r1=r1.match(/(\d{3})\.([A\d]{3})/i)
if (!r1) return res;
res["lvanr"] = r1[0]
return res
}
function du_unizeug(){
buf = child_process.execSync("du /mnt/save/daten/Unizeug/ -d 1 -h",{"maxBuffer": 1024*1024*48}).toString()
tab=[];
buf.split("\n").forEach(value => {
text=parse_du_line(value)
tab.push(text)
})
lookup=tab.reduce((acc,item,index)=>{
if (item["lvanr"]) {
if (acc[item["lvanr"]]) acc[item["lvanr"]].push(index);
else acc[item["lvanr"]] =[index];
}
return acc;
}, {});
return {"folders": tab, "lookup": lookup}
}
module.exports = {read_html,du_unizeug}

81
nodelib/lib.js Normal file
View File

@@ -0,0 +1,81 @@
const fs = require("fs");
const YAML = require("yaml");
const child_process = require("child_process");
const puppeteer = require('puppeteer');
async function read_html(filename){
return fs.readFileSync(filename,"utf8");
}
function parse_du_line(text) {
res={}
r=text.split("\t")
if (r.length <2) return [text]; // valid line mus have a column size and folder
foldername=r[1]
res["size"]=r[0]
res["folderpath"]=foldername
res["folder"]=foldername.split("/")[foldername.split("/").length-1]
//console.log([foldername.split("/")[foldername.split("/").length-1], foldername.split("/"),foldername.split("/").length])
r1=foldername.replace(/(\d{3})[\._]?([A\d]{3})/i,"$1.$2")
r1=r1.match(/(\d{3})\.([A\d]{3})/i)
if (!r1) return res;
res["lvanr"] = r1[0]
return res
}
function du_unizeug(){
buf = child_process.execSync("du /mnt/save/daten/Unizeug/ -d 1 -h",{"maxBuffer": 1024*1024*48}).toString()
tab=[];
buf.split("\n").forEach(value => {
text=parse_du_line(value)
tab.push(text)
})
lookup=tab.reduce((acc,item,index)=>{
if (item["lvanr"]) {
if (acc[item["lvanr"]]) acc[item["lvanr"]].push(index);
else acc[item["lvanr"]] =[index];
}
return acc;
}, {});
return {"folders": tab, "lookup": lookup}
}
async function fetch_page(url, selector) {
const browser = await puppeteer.launch({args: ['--no-sandbox']});
const page = await browser.newPage();
await page.goto(url, {
waitUntil: 'networkidle2'
});
//console.log("Waiting for selector: "+selector)
await page.waitForSelector(selector,{ timeout: 7000 }).catch((err)=>{console.log(err); page.screenshot({path: 'exampleerr.png'}); await browser.close();})
await page.screenshot({path: 'example.png'});
let body = await page.evaluate((selector)=>{
let b = $(selector)[0].innerHTML;
return b
},selector).catch((err)=>{console.log(err); await browser.close()});
//console.log(await page.text());
await browser.close();
//console.log(body)
return body
}
function merge_folders_courselist(courselist,folders) {
Object.entries(courselist["tab2"]).forEach(([index0,value0])=> {
value0["courses"].forEach((value)=> {
i=folders["lookup"][value["lvanr"]] // lookup the LVAnr and get an Array with indexes
if (i) value["folders"] = i.reduce((acc,item)=>{
acc.push(folders["folders"][item]) // get one of the folders that are related to LVAnr
return acc
},[])
},value0)
})
return courselist;
}
module.exports = {read_html,du_unizeug, fetch_page,merge_folders_courselist}

View File

@@ -1,11 +1,13 @@
const cheerio = require('cheerio');
const slugify=require('slugify');
function courselist(html) {
$ = cheerio.load(html);
tab2={};
tab=[];
studium="";
pruefungsfach="";
lvaname="";
$('table').find('tr').each((index,element)=>{
let element_first=$(element).find('td.nodeTable-title > div.ui-widget');
if (element_first.hasClass("nodeTable-level-0")) {
@@ -17,11 +19,29 @@ function courselist(html) {
if (element_first.hasClass("nodeTable-level-2")) {
modul=element_first.text().replace(/^[\s\n]+|[\s\n]+$/g, '')
}
if (element_first.hasClass("nodeTable-level-3")) {
lvaname=element_first.text().replace(/^[\s\n]+|[\s\n]+$/g, '')
tab2[lvaname]={"courses": []};
}
if (element_first.hasClass("nodeTable-level-4")) {
let course_key=element_first.find("div.courseKey").text();
course_key=course_key.replace(/^[\s\n]+|[\s\n]+$/g, '');
let ects=$(($(element).find("td.nodeTable-short")).toArray()[2]).text()
let std=$(($(element).find("td.nodeTable-short")).toArray()[1]).text()
tab2[lvaname]["studium"] = studium
tab2[lvaname]["modul"] = modul
tab2[lvaname]["prufungsfach"] = pruefungsfach
tab2[lvaname]["lvaname"] = lvaname
tab2[lvaname]["ects"]=ects
tab2[lvaname]["std"]=std
tab2[lvaname]["courses"].push({
"href": element_first.find("a").attr("href"),
"lvanr": course_key.split(" ")[0],
"lvatyp": course_key.split(" ")[1],
"lvasem": course_key.split(" ")[2],
"courseKey": course_key,
"courseTitle":element_first.find("div.courseTitle").text().replace(/^[\s\n]+|[\s\n]+$/g, '')
})
tab.push({"href": element_first.find("a").attr("href"),
"studium": studium,
"pruefungsfach": pruefungsfach,
@@ -29,6 +49,7 @@ function courselist(html) {
"ects":ects,
"std":std,
"courseKey": course_key,
"lvaname": lvaname,
"lvanr": course_key.split(" ")[0],
"lvatyp": course_key.split(" ")[1],
"lvasem": course_key.split(" ")[2],
@@ -36,7 +57,7 @@ function courselist(html) {
})
}
})
return tab;
return {tab2};
}
module.exports = {courselist}

3
run
View File

@@ -1,3 +1,4 @@
#!/bin/bash
source .env/bin/activate
uwsgi uwsgi.ini
node fetch_curricula.js
python run.py build