Added PDF censoring in python

This commit is contained in:
Marcel Gansfusz
2025-01-30 22:18:49 +01:00
parent 6275e5cfa2
commit f9643612fe
4 changed files with 63 additions and 14 deletions

8
app.js
View File

@@ -163,8 +163,8 @@ class PDFDocument {
}
get paramRects() {
let prects = [];
for (var k = 0; k < this.rects.length; k++) {
prects[k] = [];
for (var k = 1; k < this.rects.length; k++) {
prects[k - 1] = [];
//console.log(this.rects[k]);
if (this.rects[k] === undefined) {
continue;
@@ -174,7 +174,7 @@ class PDFDocument {
let len = this.rects[k].length;
for (var i = 0; i < len; i++) {
//console.log(this.rects[k][i]);
prects[k].push([this.rects[k][i].makeTuple()]);
prects[k - 1].push(this.rects[k][i].makeTuple());
//console.log(prects[k][i]);
}
}
@@ -263,7 +263,7 @@ function submitPdf(eve) {
var formdata = new FormData(eve.target);
console.log(doc.paramRects);
formdata.append("rects", JSON.stringify(doc.paramRects));
formdata.append("pagescales", JSON.stringify(doc.pagescales));
formdata.append("pagescales", JSON.stringify(doc.pagescales.slice(1)));
formdata.append("fname", doc.fname);
console.log(formdata);
submitForm(formdata);

Binary file not shown.

View File

@@ -3,6 +3,9 @@ from typing import List, Dict, Tuple
from datetime import date
from fastapi import FastAPI, File, UploadFile, Request, Form
from fastapi.staticfiles import StaticFiles
import pymupdf
import pdf2image
import json
app = FastAPI()
@@ -26,19 +29,65 @@ async def create_upload_file(file: UploadFile):
@app.post("/submit/")
async def get_submittion(
lva: Annotated[str, Form()],
prof: Annotated[str, Form()],
fname: Annotated[str, Form()],
sem: Annotated[str, Form()],
stype: Annotated[str, Form()],
date: Annotated[str, Form()],
rects: Annotated[str, Form()], # List[List[Tuple[float, float, float, float]]],
pagescales: Annotated[str, Form()], # Annotated[List[Dict[str, float]], Form()],
lva: Annotated[str, Form()], # LVA Name and Number
prof: Annotated[str, Form()], # Vortragender
fname: Annotated[str, Form()], # Path to pdf File
sem: Annotated[str, Form()], # Semester eg. 2024W
stype: Annotated[str, Form()], # Type of File eg. Prüfung
ex_date: Annotated[str, Form()], # Date of Exam only when type is exam
rects: Annotated[
str, Form()
], # Rechtangles # List[List[Tuple[float, float, float, float]]],
pagescales: Annotated[
str, Form()
], # Scales of Pages # Annotated[List[Dict[str, float]], Form()],
):
print(lva, prof, fname, stype, sem, date, rects, pagescales)
print(lva, prof, fname, stype, sem, ex_date, rects, pagescales)
rects_p = json.loads(rects)
scales_p = json.loads(pagescales)
censor_pdf(fname, "./app/files/censored.pdf", rects_p, scales_p)
return {"done": "ok"}
def censor_pdf(
path: str,
destpath: str,
rects: List[List[List[float]]],
scales: List[Dict[str, float]],
):
doc = pymupdf.open(path)
output = pymupdf.open()
page = doc[0]
width = page.rect.width
height = page.rect.height
print(width, height)
for i in range(doc.page_count):
page = doc[i]
if i < len(rects):
wfac = page.rect.width / scales[i]["width"]
hfac = page.rect.height / scales[i]["height"]
for rect in rects[i]:
prect = pymupdf.Rect(
rect[0] * wfac,
rect[1] * hfac,
(rect[0] + rect[2]) * wfac,
(rect[1] + rect[3]) * hfac,
)
page.draw_rect(
prect,
color=(0, 0, 0),
fill=(0, 0, 0),
)
bitmap = page.get_pixmap()
pdf_bytes = bitmap.pdfocr_tobytes(
language="deu",
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
)
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
output.save(destpath)
print("CENSORING DONE")
# async def get_submittion(request: Request):
# reqJson = await request.form()
# print(reqJson)

View File

@@ -49,7 +49,7 @@
<input type="radio" id="multimedia" name="stype" value="multimedia" />
<label for="multimedia">Multimedia</label><br />
<label for="date">Datum</label>
<input type="date" id="date" name="date" /><br />
<input type="date" id="date" name="ex_date" /><br />
<button type="submit" id="send">Senden</button>
</form>
</div>