Added PDF censoring in python

This commit is contained in:
Marcel Gansfusz
2025-01-30 22:18:49 +01:00
parent 6275e5cfa2
commit f9643612fe
4 changed files with 63 additions and 14 deletions

8
app.js
View File

@@ -163,8 +163,8 @@ class PDFDocument {
} }
get paramRects() { get paramRects() {
let prects = []; let prects = [];
for (var k = 0; k < this.rects.length; k++) { for (var k = 1; k < this.rects.length; k++) {
prects[k] = []; prects[k - 1] = [];
//console.log(this.rects[k]); //console.log(this.rects[k]);
if (this.rects[k] === undefined) { if (this.rects[k] === undefined) {
continue; continue;
@@ -174,7 +174,7 @@ class PDFDocument {
let len = this.rects[k].length; let len = this.rects[k].length;
for (var i = 0; i < len; i++) { for (var i = 0; i < len; i++) {
//console.log(this.rects[k][i]); //console.log(this.rects[k][i]);
prects[k].push([this.rects[k][i].makeTuple()]); prects[k - 1].push(this.rects[k][i].makeTuple());
//console.log(prects[k][i]); //console.log(prects[k][i]);
} }
} }
@@ -263,7 +263,7 @@ function submitPdf(eve) {
var formdata = new FormData(eve.target); var formdata = new FormData(eve.target);
console.log(doc.paramRects); console.log(doc.paramRects);
formdata.append("rects", JSON.stringify(doc.paramRects)); formdata.append("rects", JSON.stringify(doc.paramRects));
formdata.append("pagescales", JSON.stringify(doc.pagescales)); formdata.append("pagescales", JSON.stringify(doc.pagescales.slice(1)));
formdata.append("fname", doc.fname); formdata.append("fname", doc.fname);
console.log(formdata); console.log(formdata);
submitForm(formdata); submitForm(formdata);

Binary file not shown.

View File

@@ -3,6 +3,9 @@ from typing import List, Dict, Tuple
from datetime import date from datetime import date
from fastapi import FastAPI, File, UploadFile, Request, Form from fastapi import FastAPI, File, UploadFile, Request, Form
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
import pymupdf
import pdf2image
import json
app = FastAPI() app = FastAPI()
@@ -26,19 +29,65 @@ async def create_upload_file(file: UploadFile):
@app.post("/submit/") @app.post("/submit/")
async def get_submittion( async def get_submittion(
lva: Annotated[str, Form()], lva: Annotated[str, Form()], # LVA Name and Number
prof: Annotated[str, Form()], prof: Annotated[str, Form()], # Vortragender
fname: Annotated[str, Form()], fname: Annotated[str, Form()], # Path to pdf File
sem: Annotated[str, Form()], sem: Annotated[str, Form()], # Semester eg. 2024W
stype: Annotated[str, Form()], stype: Annotated[str, Form()], # Type of File eg. Prüfung
date: Annotated[str, Form()], ex_date: Annotated[str, Form()], # Date of Exam only when type is exam
rects: Annotated[str, Form()], # List[List[Tuple[float, float, float, float]]], rects: Annotated[
pagescales: Annotated[str, Form()], # Annotated[List[Dict[str, float]], Form()], str, Form()
], # Rechtangles # List[List[Tuple[float, float, float, float]]],
pagescales: Annotated[
str, Form()
], # Scales of Pages # Annotated[List[Dict[str, float]], Form()],
): ):
print(lva, prof, fname, stype, sem, date, rects, pagescales) print(lva, prof, fname, stype, sem, ex_date, rects, pagescales)
rects_p = json.loads(rects)
scales_p = json.loads(pagescales)
censor_pdf(fname, "./app/files/censored.pdf", rects_p, scales_p)
return {"done": "ok"} return {"done": "ok"}
def censor_pdf(
path: str,
destpath: str,
rects: List[List[List[float]]],
scales: List[Dict[str, float]],
):
doc = pymupdf.open(path)
output = pymupdf.open()
page = doc[0]
width = page.rect.width
height = page.rect.height
print(width, height)
for i in range(doc.page_count):
page = doc[i]
if i < len(rects):
wfac = page.rect.width / scales[i]["width"]
hfac = page.rect.height / scales[i]["height"]
for rect in rects[i]:
prect = pymupdf.Rect(
rect[0] * wfac,
rect[1] * hfac,
(rect[0] + rect[2]) * wfac,
(rect[1] + rect[3]) * hfac,
)
page.draw_rect(
prect,
color=(0, 0, 0),
fill=(0, 0, 0),
)
bitmap = page.get_pixmap()
pdf_bytes = bitmap.pdfocr_tobytes(
language="deu",
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
)
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
output.save(destpath)
print("CENSORING DONE")
# async def get_submittion(request: Request): # async def get_submittion(request: Request):
# reqJson = await request.form() # reqJson = await request.form()
# print(reqJson) # print(reqJson)

View File

@@ -49,7 +49,7 @@
<input type="radio" id="multimedia" name="stype" value="multimedia" /> <input type="radio" id="multimedia" name="stype" value="multimedia" />
<label for="multimedia">Multimedia</label><br /> <label for="multimedia">Multimedia</label><br />
<label for="date">Datum</label> <label for="date">Datum</label>
<input type="date" id="date" name="date" /><br /> <input type="date" id="date" name="ex_date" /><br />
<button type="submit" id="send">Senden</button> <button type="submit" id="send">Senden</button>
</form> </form>
</div> </div>