diff --git a/app.js b/app.js index a2f332c..805e6fd 100644 --- a/app.js +++ b/app.js @@ -163,8 +163,8 @@ class PDFDocument { } get paramRects() { let prects = []; - for (var k = 0; k < this.rects.length; k++) { - prects[k] = []; + for (var k = 1; k < this.rects.length; k++) { + prects[k - 1] = []; //console.log(this.rects[k]); if (this.rects[k] === undefined) { continue; @@ -174,7 +174,7 @@ class PDFDocument { let len = this.rects[k].length; for (var i = 0; i < len; i++) { //console.log(this.rects[k][i]); - prects[k].push([this.rects[k][i].makeTuple()]); + prects[k - 1].push(this.rects[k][i].makeTuple()); //console.log(prects[k][i]); } } @@ -263,7 +263,7 @@ function submitPdf(eve) { var formdata = new FormData(eve.target); console.log(doc.paramRects); formdata.append("rects", JSON.stringify(doc.paramRects)); - formdata.append("pagescales", JSON.stringify(doc.pagescales)); + formdata.append("pagescales", JSON.stringify(doc.pagescales.slice(1))); formdata.append("fname", doc.fname); console.log(formdata); submitForm(formdata); diff --git a/app/__pycache__/main.cpython-313.pyc b/app/__pycache__/main.cpython-313.pyc index 6e9e9fd..4fe65ec 100644 Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ diff --git a/app/main.py b/app/main.py index 64381dd..4fdcb57 100644 --- a/app/main.py +++ b/app/main.py @@ -3,6 +3,9 @@ from typing import List, Dict, Tuple from datetime import date from fastapi import FastAPI, File, UploadFile, Request, Form from fastapi.staticfiles import StaticFiles +import pymupdf +import pdf2image +import json app = FastAPI() @@ -26,19 +29,65 @@ async def create_upload_file(file: UploadFile): @app.post("/submit/") async def get_submittion( - lva: Annotated[str, Form()], - prof: Annotated[str, Form()], - fname: Annotated[str, Form()], - sem: Annotated[str, Form()], - stype: Annotated[str, Form()], - date: Annotated[str, Form()], - rects: Annotated[str, Form()], # List[List[Tuple[float, float, float, float]]], - pagescales: Annotated[str, Form()], # Annotated[List[Dict[str, float]], Form()], + lva: Annotated[str, Form()], # LVA Name and Number + prof: Annotated[str, Form()], # Vortragender + fname: Annotated[str, Form()], # Path to pdf File + sem: Annotated[str, Form()], # Semester eg. 2024W + stype: Annotated[str, Form()], # Type of File eg. Prüfung + ex_date: Annotated[str, Form()], # Date of Exam only when type is exam + rects: Annotated[ + str, Form() + ], # Rechtangles # List[List[Tuple[float, float, float, float]]], + pagescales: Annotated[ + str, Form() + ], # Scales of Pages # Annotated[List[Dict[str, float]], Form()], ): - print(lva, prof, fname, stype, sem, date, rects, pagescales) + print(lva, prof, fname, stype, sem, ex_date, rects, pagescales) + rects_p = json.loads(rects) + scales_p = json.loads(pagescales) + censor_pdf(fname, "./app/files/censored.pdf", rects_p, scales_p) return {"done": "ok"} +def censor_pdf( + path: str, + destpath: str, + rects: List[List[List[float]]], + scales: List[Dict[str, float]], +): + doc = pymupdf.open(path) + output = pymupdf.open() + page = doc[0] + width = page.rect.width + height = page.rect.height + print(width, height) + for i in range(doc.page_count): + page = doc[i] + if i < len(rects): + wfac = page.rect.width / scales[i]["width"] + hfac = page.rect.height / scales[i]["height"] + for rect in rects[i]: + prect = pymupdf.Rect( + rect[0] * wfac, + rect[1] * hfac, + (rect[0] + rect[2]) * wfac, + (rect[1] + rect[3]) * hfac, + ) + page.draw_rect( + prect, + color=(0, 0, 0), + fill=(0, 0, 0), + ) + bitmap = page.get_pixmap() + pdf_bytes = bitmap.pdfocr_tobytes( + language="deu", + tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files + ) + output.insert_pdf(pymupdf.Document(stream=pdf_bytes)) + output.save(destpath) + print("CENSORING DONE") + + # async def get_submittion(request: Request): # reqJson = await request.form() # print(reqJson) diff --git a/index.html b/index.html index e4e8eac..85b9f16 100644 --- a/index.html +++ b/index.html @@ -49,7 +49,7 @@
-
+