From 4c8ef46c6a570a52586c6df3d04f4a902b496c5a Mon Sep 17 00:00:00 2001 From: www Date: Sun, 30 Aug 2020 11:25:12 +0000 Subject: [PATCH] multiple added features including tokens --- .gitignore | 3 +- fetch_curricula.js | 44 ++++++++++++ flaskapp/__init__.py | 101 +++++++++++++++++++++++---- flaskapp/token.py | 41 +++++++++++ lib.js | 44 ------------ nodelib/lib.js | 81 +++++++++++++++++++++ tissparse.js => nodelib/tissparse.js | 27 ++++++- run | 3 +- 8 files changed, 283 insertions(+), 61 deletions(-) create mode 100644 fetch_curricula.js create mode 100644 flaskapp/token.py delete mode 100644 lib.js create mode 100644 nodelib/lib.js rename tissparse.js => nodelib/tissparse.js (63%) diff --git a/.gitignore b/.gitignore index d99d6d2..c9cee1c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ node_modules/ *.yaml *.html -*.png \ No newline at end of file +*.png +.env/ \ No newline at end of file diff --git a/fetch_curricula.js b/fetch_curricula.js new file mode 100644 index 0000000..afe268e --- /dev/null +++ b/fetch_curricula.js @@ -0,0 +1,44 @@ +const fs = require("fs"); +const {read_html, du_unizeug, fetch_page,merge_folders_courselist} = require("./nodelib/lib"); +const tissparse = require("./nodelib/tissparse"); +const YAML = require("yaml"); + + +async function load_curriculum(link, targetfile) { + html = await fetch_page(link,'div.ui-datatable-tablewrapper'); + folders=du_unizeug() + console.log(`writing to ${targetfile}.html`) + fs.writeFile(targetfile+".html", html, function (err) { + if (err) return console.log(err); + }); + courselist=merge_folders_courselist(tissparse.courselist(html),folders) + console.log(`writing to ${targetfile}`) + fs.writeFile(targetfile, YAML.stringify(courselist["tab2"]), function (err) { + if (err) return console.log(err); + }); + +} + +const file = fs.readFileSync('./curricula.yaml', 'utf8') +curricula=YAML.parse(file) +for (key in curricula ){ + console.log(`loading link: ${curricula[key]['link']}`); + console.log(`Target file data/${key}.yaml`); + if (curricula[key]["link"]) { + load_curriculum(curricula[key]["link"],`data/${key}.yaml`) + } +} + +fetch_page('https://tiss.tuwien.ac.at/curriculum/public/curriculum.xhtml?dswid=3493&dsrid=603&key=58908','div.ui-datatable-tablewrapper').then((html)=> { + folders=du_unizeug() + courselist=tissparse.courselist(html) + courselist.forEach((value,index)=> { + i=folders["lookup"][value["lvanr"]] // lookup the LVAnr and get an Array with indexes + if (i) value["folders"] = i.reduce((acc,item)=>{ + acc.push(folders["folders"][item]) // get one of the folders that are related to LVAnr + return acc + },[]) + }) + + //console.log(YAML.stringify(courselist)) +}); \ No newline at end of file diff --git a/flaskapp/__init__.py b/flaskapp/__init__.py index 187fb68..4f44d08 100644 --- a/flaskapp/__init__.py +++ b/flaskapp/__init__.py @@ -11,21 +11,62 @@ import re from functools import partial import yaml import slugify - - +from flask_mail import Mail,Message +from zipfile import ZipFile +from .token import TokenCollection +from flask import request # Initialize application app = Flask(__name__) +app.config['MAIL_SERVER']= "buran.htu.tuwien.ac.at" +app.config['MAIL_DEFAULT_SENDER']="andis@fet.at" +app.config['FREEZER_DESTINATION_IGNORE']=['*.zip',"intern"] +mail=Mail(app) app.config["url_prefix"]="" app.logger.setLevel(logging.DEBUG) # Initialize FlatPages Index +tokens = TokenCollection("tokens.yaml") + +#msg = Message("Hello", +# sender="andis@fet.at", +# recipients=["andis@fet.at"]) + +#mail.send(msg) + + + +lvas=[] +studien={} +#search the data directory for yaml files +for base,_, files in os.walk("data"): + for fn in files: + if re.match(".*yaml$", fn): + with open(os.path.join("data",fn),"r") as f: + data=yaml.load(f.read(),yaml.Loader) + studien[fn]=data + + +with open(os.path.join("test.yaml"),"w") as f: + f.write(yaml.dump(studien)) + +# create a lva list from the structure +for studium, lvaliste in studien.items(): + for k,lva in lvaliste.items(): + if len(lva)>3: + lvas+=[lva] + +folders={} + +with open(os.path.join("testlvaliste.yaml"),"w") as f: + f.write(yaml.dump(lvas)) +# make folders compact in lva +for lva in lvas: + for l in lva["courses"]: + if "folders" in l: + for folder in l["folders"]: + folders[folder["folder"]]= folder["folderpath"] + -lvas= [ - {"name": "LVA1"} -] -with open("data/test.yaml","r") as f: - lvas=yaml.load(f.read(),yaml.Loader) -app.logger.info(lvas) app.logger.info('Initialize FET BSP Sammlung') freezer = Freezer(app) @@ -41,16 +82,52 @@ def slug(string): def toyaml(obj): return yaml.dump(obj) +@page_blueprint.route('/sendmail.html') +def send_a_mail(): + msg = Message("Hello", + sender="andis@fet.at", + recipients=["andis@fet.at"]) + app.logger.info(msg) + mail.send(msg) + return "Done", 200 #@page_blueprint.route('//',strict_slashes=False) @page_blueprint.route('/') -#@csp_header() def index(): - return render_template("lva_liste.html", lvas=lvas) + return render_template("lva_liste.html", lvas=lvas,zip=False) + +@page_blueprint.route('/intern/zips.html') +def indexzips(): + return render_template("lva_liste.html", lvas=lvas,zip=True) + +@page_blueprint.route('intern/createtoken.html') +def createtoken(): + t=tokens.create() + return render_template("lva_liste.html", lvas=lvas,zip=True, token=t) + + +@page_blueprint.route('intern/zips//.zip') +def files(name): + + token = request.args.get('token') + # create a ZipFile objec + if not name in folders: + app.log.error("Not found %s" % name) + return "NotFound",404 + if not tokens.is_valid(token): + return "Restricted",401 + + with ZipFile("zips/%s.zip" % name, 'w') as zipObj: + # Iterate over all the files in directory + for folderName, subfolders, filenames in os.walk(folders[name]): + for filename in filenames: + #create complete filepath of file in directory + filePath = os.path.join(folderName, filename) + # Add file to zip + zipObj.write(filePath, os.path.basename(filePath)) + return send_from_directory(os.path.abspath("zips"), "%s.zip" % name) - - @api_blueprint.route('/index.json',strict_slashes=False) def api_index(name='index'): return jsonify(page={}), 200, {'Content-Type': 'application/json; charset=utf-8'} diff --git a/flaskapp/token.py b/flaskapp/token.py new file mode 100644 index 0000000..d595bec --- /dev/null +++ b/flaskapp/token.py @@ -0,0 +1,41 @@ +import uuid +from collections.abc import Mapping +import datetime +import yaml +class TokenCollection(Mapping): + def __init__(self, filename: str): + self.filename=filename + with open(filename,"r") as f: + self.d=yaml.load(f.read(),yaml.Loader) + if self.d is None: + self.d=dict() + + def save(self): + with open(self.filename,"w") as f: + f.write(yaml.dump(self.d)) + def __getitem__(self,key): + return d[key] + + def is_valid(self, token, days=1): + if not token in self.d: + return False + #return self.d[token]["created"] + datetime.timedelta(days=days) + #print(self.d[token]["created"] + datetime.timedelta(days=days)) + if self.d[token]["created"] + datetime.timedelta(days=days) > datetime.datetime.now(): + return True + return False + + def create(self, created_by="Anonym"): + t=uuid.uuid4() + self.d[str(t)]={ + "created": datetime.datetime.now(), + "createdby": created_by + } + self.save() + return str(t) + + def __iter__(self): + return iter(self.d) + + def __len__(self): + return len(self.d) \ No newline at end of file diff --git a/lib.js b/lib.js deleted file mode 100644 index 83d6838..0000000 --- a/lib.js +++ /dev/null @@ -1,44 +0,0 @@ -const fs = require("fs"); -const YAML = require("yaml"); -const child_process = require("child_process"); -async function read_html(filename){ - return fs.readFileSync(filename,"utf8"); -} - - - function parse_du_line(text) { - res={} - - r=text.split("\t") - if (r.length <2) return [text]; // valid line mus have a column size and folder - foldername=r[1] - res["size"]=r[0] - res["folderpath"]=foldername - res["folder"]=foldername.split("/")[foldername.split("/").length-1] - //console.log([foldername.split("/")[foldername.split("/").length-1], foldername.split("/"),foldername.split("/").length]) - r1=foldername.replace(/(\d{3})[\._]?([A\d]{3})/i,"$1.$2") - r1=r1.match(/(\d{3})\.([A\d]{3})/i) - if (!r1) return res; - res["lvanr"] = r1[0] - return res -} - -function du_unizeug(){ -buf = child_process.execSync("du /mnt/save/daten/Unizeug/ -d 1 -h",{"maxBuffer": 1024*1024*48}).toString() - tab=[]; - - buf.split("\n").forEach(value => { - text=parse_du_line(value) - tab.push(text) - }) - lookup=tab.reduce((acc,item,index)=>{ - if (item["lvanr"]) { - if (acc[item["lvanr"]]) acc[item["lvanr"]].push(index); - else acc[item["lvanr"]] =[index]; - } - return acc; - }, {}); - return {"folders": tab, "lookup": lookup} - -} -module.exports = {read_html,du_unizeug} \ No newline at end of file diff --git a/nodelib/lib.js b/nodelib/lib.js new file mode 100644 index 0000000..dc69004 --- /dev/null +++ b/nodelib/lib.js @@ -0,0 +1,81 @@ +const fs = require("fs"); +const YAML = require("yaml"); +const child_process = require("child_process"); +const puppeteer = require('puppeteer'); +async function read_html(filename){ + return fs.readFileSync(filename,"utf8"); +} + + + function parse_du_line(text) { + res={} + + r=text.split("\t") + if (r.length <2) return [text]; // valid line mus have a column size and folder + foldername=r[1] + res["size"]=r[0] + res["folderpath"]=foldername + res["folder"]=foldername.split("/")[foldername.split("/").length-1] + //console.log([foldername.split("/")[foldername.split("/").length-1], foldername.split("/"),foldername.split("/").length]) + r1=foldername.replace(/(\d{3})[\._]?([A\d]{3})/i,"$1.$2") + r1=r1.match(/(\d{3})\.([A\d]{3})/i) + if (!r1) return res; + res["lvanr"] = r1[0] + return res +} + +function du_unizeug(){ +buf = child_process.execSync("du /mnt/save/daten/Unizeug/ -d 1 -h",{"maxBuffer": 1024*1024*48}).toString() + tab=[]; + + buf.split("\n").forEach(value => { + text=parse_du_line(value) + tab.push(text) + }) + lookup=tab.reduce((acc,item,index)=>{ + if (item["lvanr"]) { + if (acc[item["lvanr"]]) acc[item["lvanr"]].push(index); + else acc[item["lvanr"]] =[index]; + } + return acc; + }, {}); + return {"folders": tab, "lookup": lookup} + +} + + +async function fetch_page(url, selector) { + const browser = await puppeteer.launch({args: ['--no-sandbox']}); + const page = await browser.newPage(); + await page.goto(url, { + waitUntil: 'networkidle2' +}); + + //console.log("Waiting for selector: "+selector) + await page.waitForSelector(selector,{ timeout: 7000 }).catch((err)=>{console.log(err); page.screenshot({path: 'exampleerr.png'}); await browser.close();}) + await page.screenshot({path: 'example.png'}); + let body = await page.evaluate((selector)=>{ + let b = $(selector)[0].innerHTML; + return b + },selector).catch((err)=>{console.log(err); await browser.close()}); + //console.log(await page.text()); + await browser.close(); + //console.log(body) + return body +} + + +function merge_folders_courselist(courselist,folders) { + Object.entries(courselist["tab2"]).forEach(([index0,value0])=> { + value0["courses"].forEach((value)=> { + i=folders["lookup"][value["lvanr"]] // lookup the LVAnr and get an Array with indexes + if (i) value["folders"] = i.reduce((acc,item)=>{ + acc.push(folders["folders"][item]) // get one of the folders that are related to LVAnr + return acc + },[]) + },value0) +}) +return courselist; + +} +module.exports = {read_html,du_unizeug, fetch_page,merge_folders_courselist} \ No newline at end of file diff --git a/tissparse.js b/nodelib/tissparse.js similarity index 63% rename from tissparse.js rename to nodelib/tissparse.js index 6a09992..f579f8d 100644 --- a/tissparse.js +++ b/nodelib/tissparse.js @@ -1,11 +1,13 @@ const cheerio = require('cheerio'); +const slugify=require('slugify'); function courselist(html) { $ = cheerio.load(html); - + tab2={}; tab=[]; studium=""; pruefungsfach=""; + lvaname=""; $('table').find('tr').each((index,element)=>{ let element_first=$(element).find('td.nodeTable-title > div.ui-widget'); if (element_first.hasClass("nodeTable-level-0")) { @@ -17,11 +19,29 @@ function courselist(html) { if (element_first.hasClass("nodeTable-level-2")) { modul=element_first.text().replace(/^[\s\n]+|[\s\n]+$/g, '') } + if (element_first.hasClass("nodeTable-level-3")) { + lvaname=element_first.text().replace(/^[\s\n]+|[\s\n]+$/g, '') + tab2[lvaname]={"courses": []}; + } if (element_first.hasClass("nodeTable-level-4")) { let course_key=element_first.find("div.courseKey").text(); course_key=course_key.replace(/^[\s\n]+|[\s\n]+$/g, ''); let ects=$(($(element).find("td.nodeTable-short")).toArray()[2]).text() let std=$(($(element).find("td.nodeTable-short")).toArray()[1]).text() + tab2[lvaname]["studium"] = studium + tab2[lvaname]["modul"] = modul + tab2[lvaname]["prufungsfach"] = pruefungsfach + tab2[lvaname]["lvaname"] = lvaname + tab2[lvaname]["ects"]=ects + tab2[lvaname]["std"]=std + tab2[lvaname]["courses"].push({ + "href": element_first.find("a").attr("href"), + "lvanr": course_key.split(" ")[0], + "lvatyp": course_key.split(" ")[1], + "lvasem": course_key.split(" ")[2], + "courseKey": course_key, + "courseTitle":element_first.find("div.courseTitle").text().replace(/^[\s\n]+|[\s\n]+$/g, '') + }) tab.push({"href": element_first.find("a").attr("href"), "studium": studium, "pruefungsfach": pruefungsfach, @@ -29,6 +49,7 @@ function courselist(html) { "ects":ects, "std":std, "courseKey": course_key, + "lvaname": lvaname, "lvanr": course_key.split(" ")[0], "lvatyp": course_key.split(" ")[1], "lvasem": course_key.split(" ")[2], @@ -36,7 +57,7 @@ function courselist(html) { }) } }) - - return tab; + + return {tab2}; } module.exports = {courselist} \ No newline at end of file diff --git a/run b/run index 99d0f14..9c72dff 100755 --- a/run +++ b/run @@ -1,3 +1,4 @@ #!/bin/bash source .env/bin/activate -uwsgi uwsgi.ini +node fetch_curricula.js +python run.py build \ No newline at end of file