from lxml.html.clean import clean_html, Cleaner

def PostKeyError(KeyError):
    pass

def post_to_solr(p):
    # if a list is given call for each element
    if type(p) is list:
        return [post_to_solr(o) for o in p]
    def get_text(l):
        if not l:
            return ""
        if len(l)<1:
            return ""
        c=Cleaner(allow_tags=['i','em','p'], remove_tags=['p','div'])
        h=c.clean_html(l.replace("\n"," ").replace("\r"," ").replace("\t"," ").replace("\\"," "))
        return h
    if type(p) is list:
        return [post_to_solr(pp) for pp in p]
    # Check Dict and keys
    assert type(p) is dict, "Argument for post_to_solr needs to be a dict or list of dicts"
    for k in ["url", "body"]:
        if not  k in p: 
            raise PostKeyError("Post needs to have key '%s'" % k)
    # Return the solr structure
    return {
        "id": p["slug"],
        "date_dt": p["public_date"],
        "text_txt": (get_text(p.get("body","")) or "")+(p.get("agenda_html","") or "")
    }