Added PDF censoring in python
This commit is contained in:
8
app.js
8
app.js
@@ -163,8 +163,8 @@ class PDFDocument {
|
|||||||
}
|
}
|
||||||
get paramRects() {
|
get paramRects() {
|
||||||
let prects = [];
|
let prects = [];
|
||||||
for (var k = 0; k < this.rects.length; k++) {
|
for (var k = 1; k < this.rects.length; k++) {
|
||||||
prects[k] = [];
|
prects[k - 1] = [];
|
||||||
//console.log(this.rects[k]);
|
//console.log(this.rects[k]);
|
||||||
if (this.rects[k] === undefined) {
|
if (this.rects[k] === undefined) {
|
||||||
continue;
|
continue;
|
||||||
@@ -174,7 +174,7 @@ class PDFDocument {
|
|||||||
let len = this.rects[k].length;
|
let len = this.rects[k].length;
|
||||||
for (var i = 0; i < len; i++) {
|
for (var i = 0; i < len; i++) {
|
||||||
//console.log(this.rects[k][i]);
|
//console.log(this.rects[k][i]);
|
||||||
prects[k].push([this.rects[k][i].makeTuple()]);
|
prects[k - 1].push(this.rects[k][i].makeTuple());
|
||||||
//console.log(prects[k][i]);
|
//console.log(prects[k][i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -263,7 +263,7 @@ function submitPdf(eve) {
|
|||||||
var formdata = new FormData(eve.target);
|
var formdata = new FormData(eve.target);
|
||||||
console.log(doc.paramRects);
|
console.log(doc.paramRects);
|
||||||
formdata.append("rects", JSON.stringify(doc.paramRects));
|
formdata.append("rects", JSON.stringify(doc.paramRects));
|
||||||
formdata.append("pagescales", JSON.stringify(doc.pagescales));
|
formdata.append("pagescales", JSON.stringify(doc.pagescales.slice(1)));
|
||||||
formdata.append("fname", doc.fname);
|
formdata.append("fname", doc.fname);
|
||||||
console.log(formdata);
|
console.log(formdata);
|
||||||
submitForm(formdata);
|
submitForm(formdata);
|
||||||
|
|||||||
Binary file not shown.
67
app/main.py
67
app/main.py
@@ -3,6 +3,9 @@ from typing import List, Dict, Tuple
|
|||||||
from datetime import date
|
from datetime import date
|
||||||
from fastapi import FastAPI, File, UploadFile, Request, Form
|
from fastapi import FastAPI, File, UploadFile, Request, Form
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
import pymupdf
|
||||||
|
import pdf2image
|
||||||
|
import json
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
@@ -26,19 +29,65 @@ async def create_upload_file(file: UploadFile):
|
|||||||
|
|
||||||
@app.post("/submit/")
|
@app.post("/submit/")
|
||||||
async def get_submittion(
|
async def get_submittion(
|
||||||
lva: Annotated[str, Form()],
|
lva: Annotated[str, Form()], # LVA Name and Number
|
||||||
prof: Annotated[str, Form()],
|
prof: Annotated[str, Form()], # Vortragender
|
||||||
fname: Annotated[str, Form()],
|
fname: Annotated[str, Form()], # Path to pdf File
|
||||||
sem: Annotated[str, Form()],
|
sem: Annotated[str, Form()], # Semester eg. 2024W
|
||||||
stype: Annotated[str, Form()],
|
stype: Annotated[str, Form()], # Type of File eg. Prüfung
|
||||||
date: Annotated[str, Form()],
|
ex_date: Annotated[str, Form()], # Date of Exam only when type is exam
|
||||||
rects: Annotated[str, Form()], # List[List[Tuple[float, float, float, float]]],
|
rects: Annotated[
|
||||||
pagescales: Annotated[str, Form()], # Annotated[List[Dict[str, float]], Form()],
|
str, Form()
|
||||||
|
], # Rechtangles # List[List[Tuple[float, float, float, float]]],
|
||||||
|
pagescales: Annotated[
|
||||||
|
str, Form()
|
||||||
|
], # Scales of Pages # Annotated[List[Dict[str, float]], Form()],
|
||||||
):
|
):
|
||||||
print(lva, prof, fname, stype, sem, date, rects, pagescales)
|
print(lva, prof, fname, stype, sem, ex_date, rects, pagescales)
|
||||||
|
rects_p = json.loads(rects)
|
||||||
|
scales_p = json.loads(pagescales)
|
||||||
|
censor_pdf(fname, "./app/files/censored.pdf", rects_p, scales_p)
|
||||||
return {"done": "ok"}
|
return {"done": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
def censor_pdf(
|
||||||
|
path: str,
|
||||||
|
destpath: str,
|
||||||
|
rects: List[List[List[float]]],
|
||||||
|
scales: List[Dict[str, float]],
|
||||||
|
):
|
||||||
|
doc = pymupdf.open(path)
|
||||||
|
output = pymupdf.open()
|
||||||
|
page = doc[0]
|
||||||
|
width = page.rect.width
|
||||||
|
height = page.rect.height
|
||||||
|
print(width, height)
|
||||||
|
for i in range(doc.page_count):
|
||||||
|
page = doc[i]
|
||||||
|
if i < len(rects):
|
||||||
|
wfac = page.rect.width / scales[i]["width"]
|
||||||
|
hfac = page.rect.height / scales[i]["height"]
|
||||||
|
for rect in rects[i]:
|
||||||
|
prect = pymupdf.Rect(
|
||||||
|
rect[0] * wfac,
|
||||||
|
rect[1] * hfac,
|
||||||
|
(rect[0] + rect[2]) * wfac,
|
||||||
|
(rect[1] + rect[3]) * hfac,
|
||||||
|
)
|
||||||
|
page.draw_rect(
|
||||||
|
prect,
|
||||||
|
color=(0, 0, 0),
|
||||||
|
fill=(0, 0, 0),
|
||||||
|
)
|
||||||
|
bitmap = page.get_pixmap()
|
||||||
|
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||||
|
language="deu",
|
||||||
|
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||||
|
)
|
||||||
|
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||||
|
output.save(destpath)
|
||||||
|
print("CENSORING DONE")
|
||||||
|
|
||||||
|
|
||||||
# async def get_submittion(request: Request):
|
# async def get_submittion(request: Request):
|
||||||
# reqJson = await request.form()
|
# reqJson = await request.form()
|
||||||
# print(reqJson)
|
# print(reqJson)
|
||||||
|
|||||||
@@ -49,7 +49,7 @@
|
|||||||
<input type="radio" id="multimedia" name="stype" value="multimedia" />
|
<input type="radio" id="multimedia" name="stype" value="multimedia" />
|
||||||
<label for="multimedia">Multimedia</label><br />
|
<label for="multimedia">Multimedia</label><br />
|
||||||
<label for="date">Datum</label>
|
<label for="date">Datum</label>
|
||||||
<input type="date" id="date" name="date" /><br />
|
<input type="date" id="date" name="ex_date" /><br />
|
||||||
<button type="submit" id="send">Senden</button>
|
<button type="submit" id="send">Senden</button>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user