Added PDF censoring in python
This commit is contained in:
8
app.js
8
app.js
@@ -163,8 +163,8 @@ class PDFDocument {
|
||||
}
|
||||
get paramRects() {
|
||||
let prects = [];
|
||||
for (var k = 0; k < this.rects.length; k++) {
|
||||
prects[k] = [];
|
||||
for (var k = 1; k < this.rects.length; k++) {
|
||||
prects[k - 1] = [];
|
||||
//console.log(this.rects[k]);
|
||||
if (this.rects[k] === undefined) {
|
||||
continue;
|
||||
@@ -174,7 +174,7 @@ class PDFDocument {
|
||||
let len = this.rects[k].length;
|
||||
for (var i = 0; i < len; i++) {
|
||||
//console.log(this.rects[k][i]);
|
||||
prects[k].push([this.rects[k][i].makeTuple()]);
|
||||
prects[k - 1].push(this.rects[k][i].makeTuple());
|
||||
//console.log(prects[k][i]);
|
||||
}
|
||||
}
|
||||
@@ -263,7 +263,7 @@ function submitPdf(eve) {
|
||||
var formdata = new FormData(eve.target);
|
||||
console.log(doc.paramRects);
|
||||
formdata.append("rects", JSON.stringify(doc.paramRects));
|
||||
formdata.append("pagescales", JSON.stringify(doc.pagescales));
|
||||
formdata.append("pagescales", JSON.stringify(doc.pagescales.slice(1)));
|
||||
formdata.append("fname", doc.fname);
|
||||
console.log(formdata);
|
||||
submitForm(formdata);
|
||||
|
||||
Binary file not shown.
67
app/main.py
67
app/main.py
@@ -3,6 +3,9 @@ from typing import List, Dict, Tuple
|
||||
from datetime import date
|
||||
from fastapi import FastAPI, File, UploadFile, Request, Form
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
import pymupdf
|
||||
import pdf2image
|
||||
import json
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@@ -26,19 +29,65 @@ async def create_upload_file(file: UploadFile):
|
||||
|
||||
@app.post("/submit/")
|
||||
async def get_submittion(
|
||||
lva: Annotated[str, Form()],
|
||||
prof: Annotated[str, Form()],
|
||||
fname: Annotated[str, Form()],
|
||||
sem: Annotated[str, Form()],
|
||||
stype: Annotated[str, Form()],
|
||||
date: Annotated[str, Form()],
|
||||
rects: Annotated[str, Form()], # List[List[Tuple[float, float, float, float]]],
|
||||
pagescales: Annotated[str, Form()], # Annotated[List[Dict[str, float]], Form()],
|
||||
lva: Annotated[str, Form()], # LVA Name and Number
|
||||
prof: Annotated[str, Form()], # Vortragender
|
||||
fname: Annotated[str, Form()], # Path to pdf File
|
||||
sem: Annotated[str, Form()], # Semester eg. 2024W
|
||||
stype: Annotated[str, Form()], # Type of File eg. Prüfung
|
||||
ex_date: Annotated[str, Form()], # Date of Exam only when type is exam
|
||||
rects: Annotated[
|
||||
str, Form()
|
||||
], # Rechtangles # List[List[Tuple[float, float, float, float]]],
|
||||
pagescales: Annotated[
|
||||
str, Form()
|
||||
], # Scales of Pages # Annotated[List[Dict[str, float]], Form()],
|
||||
):
|
||||
print(lva, prof, fname, stype, sem, date, rects, pagescales)
|
||||
print(lva, prof, fname, stype, sem, ex_date, rects, pagescales)
|
||||
rects_p = json.loads(rects)
|
||||
scales_p = json.loads(pagescales)
|
||||
censor_pdf(fname, "./app/files/censored.pdf", rects_p, scales_p)
|
||||
return {"done": "ok"}
|
||||
|
||||
|
||||
def censor_pdf(
|
||||
path: str,
|
||||
destpath: str,
|
||||
rects: List[List[List[float]]],
|
||||
scales: List[Dict[str, float]],
|
||||
):
|
||||
doc = pymupdf.open(path)
|
||||
output = pymupdf.open()
|
||||
page = doc[0]
|
||||
width = page.rect.width
|
||||
height = page.rect.height
|
||||
print(width, height)
|
||||
for i in range(doc.page_count):
|
||||
page = doc[i]
|
||||
if i < len(rects):
|
||||
wfac = page.rect.width / scales[i]["width"]
|
||||
hfac = page.rect.height / scales[i]["height"]
|
||||
for rect in rects[i]:
|
||||
prect = pymupdf.Rect(
|
||||
rect[0] * wfac,
|
||||
rect[1] * hfac,
|
||||
(rect[0] + rect[2]) * wfac,
|
||||
(rect[1] + rect[3]) * hfac,
|
||||
)
|
||||
page.draw_rect(
|
||||
prect,
|
||||
color=(0, 0, 0),
|
||||
fill=(0, 0, 0),
|
||||
)
|
||||
bitmap = page.get_pixmap()
|
||||
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||
language="deu",
|
||||
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||
)
|
||||
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||
output.save(destpath)
|
||||
print("CENSORING DONE")
|
||||
|
||||
|
||||
# async def get_submittion(request: Request):
|
||||
# reqJson = await request.form()
|
||||
# print(reqJson)
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
<input type="radio" id="multimedia" name="stype" value="multimedia" />
|
||||
<label for="multimedia">Multimedia</label><br />
|
||||
<label for="date">Datum</label>
|
||||
<input type="date" id="date" name="date" /><br />
|
||||
<input type="date" id="date" name="ex_date" /><br />
|
||||
<button type="submit" id="send">Senden</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user