Added PDF censoring in python
This commit is contained in:
67
app/main.py
67
app/main.py
@@ -3,6 +3,9 @@ from typing import List, Dict, Tuple
|
||||
from datetime import date
|
||||
from fastapi import FastAPI, File, UploadFile, Request, Form
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
import pymupdf
|
||||
import pdf2image
|
||||
import json
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@@ -26,19 +29,65 @@ async def create_upload_file(file: UploadFile):
|
||||
|
||||
@app.post("/submit/")
|
||||
async def get_submittion(
|
||||
lva: Annotated[str, Form()],
|
||||
prof: Annotated[str, Form()],
|
||||
fname: Annotated[str, Form()],
|
||||
sem: Annotated[str, Form()],
|
||||
stype: Annotated[str, Form()],
|
||||
date: Annotated[str, Form()],
|
||||
rects: Annotated[str, Form()], # List[List[Tuple[float, float, float, float]]],
|
||||
pagescales: Annotated[str, Form()], # Annotated[List[Dict[str, float]], Form()],
|
||||
lva: Annotated[str, Form()], # LVA Name and Number
|
||||
prof: Annotated[str, Form()], # Vortragender
|
||||
fname: Annotated[str, Form()], # Path to pdf File
|
||||
sem: Annotated[str, Form()], # Semester eg. 2024W
|
||||
stype: Annotated[str, Form()], # Type of File eg. Prüfung
|
||||
ex_date: Annotated[str, Form()], # Date of Exam only when type is exam
|
||||
rects: Annotated[
|
||||
str, Form()
|
||||
], # Rechtangles # List[List[Tuple[float, float, float, float]]],
|
||||
pagescales: Annotated[
|
||||
str, Form()
|
||||
], # Scales of Pages # Annotated[List[Dict[str, float]], Form()],
|
||||
):
|
||||
print(lva, prof, fname, stype, sem, date, rects, pagescales)
|
||||
print(lva, prof, fname, stype, sem, ex_date, rects, pagescales)
|
||||
rects_p = json.loads(rects)
|
||||
scales_p = json.loads(pagescales)
|
||||
censor_pdf(fname, "./app/files/censored.pdf", rects_p, scales_p)
|
||||
return {"done": "ok"}
|
||||
|
||||
|
||||
def censor_pdf(
|
||||
path: str,
|
||||
destpath: str,
|
||||
rects: List[List[List[float]]],
|
||||
scales: List[Dict[str, float]],
|
||||
):
|
||||
doc = pymupdf.open(path)
|
||||
output = pymupdf.open()
|
||||
page = doc[0]
|
||||
width = page.rect.width
|
||||
height = page.rect.height
|
||||
print(width, height)
|
||||
for i in range(doc.page_count):
|
||||
page = doc[i]
|
||||
if i < len(rects):
|
||||
wfac = page.rect.width / scales[i]["width"]
|
||||
hfac = page.rect.height / scales[i]["height"]
|
||||
for rect in rects[i]:
|
||||
prect = pymupdf.Rect(
|
||||
rect[0] * wfac,
|
||||
rect[1] * hfac,
|
||||
(rect[0] + rect[2]) * wfac,
|
||||
(rect[1] + rect[3]) * hfac,
|
||||
)
|
||||
page.draw_rect(
|
||||
prect,
|
||||
color=(0, 0, 0),
|
||||
fill=(0, 0, 0),
|
||||
)
|
||||
bitmap = page.get_pixmap()
|
||||
pdf_bytes = bitmap.pdfocr_tobytes(
|
||||
language="deu",
|
||||
tessdata="/usr/share/tessdata/", # tesseract needs to be installed; this is the path to thetesseract files
|
||||
)
|
||||
output.insert_pdf(pymupdf.Document(stream=pdf_bytes))
|
||||
output.save(destpath)
|
||||
print("CENSORING DONE")
|
||||
|
||||
|
||||
# async def get_submittion(request: Request):
|
||||
# reqJson = await request.form()
|
||||
# print(reqJson)
|
||||
|
||||
Reference in New Issue
Block a user