diff --git a/fet2020/documents/__init__.py b/fet2020/documents/__init__.py index c3fab2a5..14f3c82d 100644 --- a/fet2020/documents/__init__.py +++ b/fet2020/documents/__init__.py @@ -1 +1,16 @@ -from .api import getPadHTML, setPadHTML, createPadifNotExists + +from .api import getPadHTML, setPadHTML, createPadifNotExists, get_pad_link +from .parse import to_key, to_path, parse_html_from_ep + + + +def getInternPadHTML(key): + key = to_key(key) + createPadifNotExists(key, groupName="intern") + html = getPadHTML(to_key(key), groupName="intern") + + return parse_html_from_ep(html, key) + + +def get_int_pad_link(path): + return get_pad_link(to_key(path), groupName="intern") \ No newline at end of file diff --git a/fet2020/documents/api.py b/fet2020/documents/api.py index 69971d9b..264e5864 100644 --- a/fet2020/documents/api.py +++ b/fet2020/documents/api.py @@ -2,18 +2,15 @@ from django.conf import settings import os import urllib.parse - from etherpad_lite import EtherpadLiteClient, EtherpadException - from contextlib import contextmanager - - import logging - +from .parse import ToText logger = logging.getLogger(__name__) SERVER_URL = settings.ETHERPAD_CLIENT["exturl"] + @contextmanager def ignore_ep_exception(msg="", *exceptions): try: @@ -21,7 +18,7 @@ def ignore_ep_exception(msg="", *exceptions): except exceptions as e: logger.error(msg) logger.error("%s", e) - + def get_ep_client(groupName="fet"): epc = None @@ -29,8 +26,7 @@ def get_ep_client(groupName="fet"): with ignore_ep_exception("Error connecting to Etherpad", Exception): with open(os.path.abspath(settings.ETHERPAD_CLIENT["apikey"]), "r") as f: - apikey = f.read() - apikey = apikey.rstrip() + apikey = f.read().rstrip() epc = EtherpadLiteClient( base_params={ "apikey": apikey, @@ -42,6 +38,7 @@ def get_ep_client(groupName="fet"): return epc, group + @contextmanager def ep_client(groupName="fet"): epc, group = get_ep_client(groupName) @@ -52,43 +49,63 @@ def ep_client(groupName="fet"): yield epc, group -def __checkPadExists(padID=None): +def __checkPadExists(padID=None, groupName="fet"): if not padID: return False - with ep_client() as (epc, group): + with ep_client(groupName) as (epc, group): if not epc or not group: return None - lists = epc.listPads(groupID=group["groupID"]) + lists = epc.listPads(groupID = group["groupID"]) if str(padID) in lists["padIDs"]: return True return False -def createPadifNotExists(padID): - if not __checkPadExists(padID=padID): - with ep_client() as (epc, group): +def list_pads(groupName="fet"): + with ep_client(groupName) as (epc, group): + return [p.split("$")[1] for p in epc.listPads(groupID = group["groupID"])["padIDs"]] + +def list_main_pads(groupName="intern"): + def is_main(s): + return len(s.split("-")) == 1 + return list(filter(is_main, list_pads(groupName))) + +def clean_up_pads(groupName="intern"): + with ep_client(groupName) as (epc, group): + for pid in epc.listPads(groupID = group["groupID"])["padIDs"]: + print(pid) + html = epc.getHTML(padID=pid)["html"] + if ToText(html) == "": + print(f"Deleting...${pid}") + epc.deletePad(padID = pid) + return + + +def createPadifNotExists(padID, groupName="fet"): + if not __checkPadExists(padID=padID, groupName=groupName): + with ep_client(groupName) as (epc, group): if not epc or not group: return None epc.createGroupPad( - groupID = group["groupID"], padName=padID, text="helloworld" + groupID=group["groupID"], padName=padID, text="helloworld" ) return padID -def getPadHTML(padID): +def getPadHTML(padID, groupName="fet"): text = None - with ep_client() as (epc, group): + with ep_client(groupName) as (epc, group): if not epc or not group: return None text = epc.getHTML(padID = group["groupID"] + "$" + padID)["html"] return text -def setPadHTML(padID, html): - epc, group = get_ep_client() +def setPadHTML(padID, html, groupName="fet"): + epc, group = get_ep_client(groupName) if not epc or not group: return None @@ -96,12 +113,13 @@ def setPadHTML(padID, html): return html -def get_pad_link(padID): +def get_pad_link(padID, groupName="fet"): if padID is None: return "#" - with ep_client() as (epc, group): + with ep_client(groupName) as (epc, group): if not epc or not group: return "#" return urllib.parse.urljoin( - settings.ETHERPAD_CLIENT["exturl"], "p/" + group["groupID"] + "$" + str(padID) + settings.ETHERPAD_CLIENT["exturl"], + "p/" + group["groupID"] + "$" + str(padID), ) diff --git a/fet2020/documents/etherpadlib.py b/fet2020/documents/etherpadlib.py index deaf4b22..83917390 100644 --- a/fet2020/documents/etherpadlib.py +++ b/fet2020/documents/etherpadlib.py @@ -7,8 +7,8 @@ from authentications.decorators import ep_authenticated_user @ep_authenticated_user -def __get_ep_sessionid(request): - epc, group = get_ep_client() +def __get_ep_sessionid(request, groupName="fet"): + epc, group = get_ep_client(groupName) if not epc or not group: return None, None @@ -30,8 +30,8 @@ def __get_ep_sessionid(request): return result["sessionID"], expires -def add_ep_cookie(request, response): - ep_sessid, expires = __get_ep_sessionid(request) +def add_ep_cookie(request, response, groupName="fet"): + ep_sessid, expires = __get_ep_sessionid(request, groupName=groupName) if ep_sessid: response.set_cookie( diff --git a/fet2020/documents/parse.py b/fet2020/documents/parse.py new file mode 100644 index 00000000..d7ed2591 --- /dev/null +++ b/fet2020/documents/parse.py @@ -0,0 +1,50 @@ +from slugify import slugify +import re +from bs4 import BeautifulSoup +from django.urls import reverse +from os.path import join +from functools import partial + +def clean_string(s): + s = re.sub("[^\w\d\_/-]", "_", s).replace("/", "-") + while s[-1]=="-": + s=s[0:-1] + return s + +def to_key(s): + return (clean_string(s).replace("/", "-")) + +def to_path(s): + return clean_string(s).replace("-","/") + + +def create_a_link_for(v, path): + v = v.group(1) + v = clean_string(v).replace("-", "/") + if path[0] != "/": + path = join(path, v) + while path[0]=="/": + path = path[1:] + + u = reverse("int", kwargs={"path": path}) + return "%s" % (u, v) + + +def ToText(html): + s = BeautifulSoup(html, "html.parser").find("body") + + return s.get_text().strip() + + +def parse_html_from_ep(html, key): + path = to_path(key) + soup = BeautifulSoup(html, "html.parser") + soup = soup.find("body") + textNodes = soup.findAll(text=True) + for node in textNodes: + text = str(node) + text = re.sub(r'^##(.+)$', r'