使用python与Flask对pdf格式文件进行删改

我们在网上搜集的一些电子版资料多数是pdf格式,一些无良培训机构或者自媒体为了博取眼球、引流、会在倒手过程使用一些程式对一些文档进行批量添加水印,或者联系,以此原本干净整洁资料满屏"牛皮藓",简直是糟糕透了!

from flask import Flask, request, send_file, render_template_string, jsonify
from PyPDF2 import PdfReader, PdfWriter
import os
from pdf2image import convert_from_path
import io
import base64

app = Flask(__name__)


# 根 URL 路由
@app.route('/')
def index():
    return render_template_string('''
        <!DOCTYPE html>
        <html>
        <head>
            <title>PDF Page Manager</title>
            <style>
                body {
                    font-family: Arial, sans-serif;
                }
                .grid-container {
                    display: grid;
                    grid-template-columns: repeat(5, 1fr);
                    grid-gap: 10px;
                    margin-bottom: 20px;
                }
                .grid-item {
                    text-align: center;
                }
                .grid-item img {
                    max-width: 100%;
                    height: auto;
                }
                .grid-item input[type="checkbox"] {
                    margin-top: 5px;
                }
            </style>
        </head>
        <body>
            <h1>Select Pages to Delete</h1>
            <div id="pageContainer"></div>
            <button onclick="loadPages()">Load Pages</button>
            <button onclick="submitForm()">Submit</button>

            <script>
                function loadPages() {
                    fetch('/get-pages', { method: 'GET' })
                        .then(response => response.json())
                        .then(data => {
                            const container = document.getElementById('pageContainer');
                            container.innerHTML = ''; // 清空容器
                            data.pages.forEach((page, index) => {
                                const item = document.createElement('div');
                                item.className = 'grid-item';
                                const img = document.createElement('img');
                                img.src = `data:image/png;base64,${page.image}`;
                                img.alt = `Page ${index + 1}`;
                                const checkbox = document.createElement('input');
                                checkbox.type = 'checkbox';
                                checkbox.name = 'page';
                                checkbox.value = index;
                                const label = document.createElement('label');
                                label.htmlFor = `page${index}`;
                                label.appendChild(document.createTextNode(`Page ${index + 1}`));
                                item.appendChild(img);
                                item.appendChild(checkbox);
                                item.appendChild(label);
                                container.appendChild(item);
                            });
                        });
                }

                function submitForm() {
                    const checkboxes = document.querySelectorAll('input[type=checkbox]:checked');
                    const selectedPages = Array.from(checkboxes).map(checkbox => checkbox.value);
                    fetch('/merge-pdf', {
                        method: 'POST',
                        headers: {
                            'Content-Type': 'application/json'
                        },
                        body: JSON.stringify({ selected_pages: selectedPages })
                    }).then(response => {
                        if (response.ok) {
                            alert('PDF has been modified and saved.');
                        } else {
                            alert('An error occurred while modifying the PDF.');
                        }
                    });
                }
            </script>
        </body>
        </html>
    ''')


@app.route('/get-pages', methods=['GET'])
def get_pages():
    file_path = r"D:\daku\python编辑pdf\2024年县域未成年人网络消费调研报告-佟毕铖.pdf"
    try:
        images = convert_from_path(file_path)
        page_data = []

        for i, image in enumerate(images):
            buffered = io.BytesIO()
            image.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
            page_data.append({'index': i, 'image': img_str})

        return jsonify({'pages': page_data})
    except Exception as e:
        return jsonify({'error': str(e)}), 500


@app.route('/merge-pdf', methods=['POST'])
def merge_pdf():
    data = request.json
    selected_pages = data.get('selected_pages', [])

    file_path = r"D:\daku\python编辑pdf\2024年县域未成年人网络消费调研报告-佟毕铖.pdf"
    reader = PdfReader(file_path)

    writer = PdfWriter()

    for page_num in range(len(reader.pages)):
        if str(page_num) not in selected_pages:
            writer.add_page(reader.pages[page_num])

    output_path = r"D:\daku\python编辑pdf\output\modified_report.pdf"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, 'wb') as f:
        writer.write(f)

    return send_file(output_path, as_attachment=True)


if __name__ == '__main__':
    app.run(debug=True)

网页端代码:

<!DOCTYPE html>
<html>
<head>
    <title>PDF Page Manager</title>
    <style>
        body {
            font-family: Arial, sans-serif;
        }
        .grid-container {
            display: grid;
            grid-template-columns: repeat(5, 1fr);
            grid-gap: 10px;
            margin-bottom: 20px;
        }
        .grid-item {
            text-align: center;
        }
        .grid-item img {
            max-width: 100%;
            height: auto;
        }
        .grid-item input[type="checkbox"] {
            margin-top: 5px;
        }
    </style>
</head>
<body>
    <h1>Select Pages to Delete</h1>
    <div id="pageContainer"></div>
    <button onclick="loadPages()">Load Pages</button>
    <button onclick="submitForm()">Submit</button>

    <script>
        function loadPages() {
            fetch('/get-pages', { method: 'GET' })
                .then(response => response.json())
                .then(data => {
                    const container = document.getElementById('pageContainer');
                    container.innerHTML = ''; // 清空容器
                    data.pages.forEach((page, index) => {
                        const item = document.createElement('div');
                        item.className = 'grid-item';
                        const img = document.createElement('img');
                        img.src = `data:image/png;base64,${page.image}`;
                        img.alt = `Page ${index + 1}`;
                        const checkbox = document.createElement('input');
                        checkbox.type = 'checkbox';
                        checkbox.name = 'page';
                        checkbox.value = index;
                        const label = document.createElement('label');
                        label.htmlFor = `page${index}`;
                        label.appendChild(document.createTextNode(`Page ${index + 1}`));
                        item.appendChild(img);
                        item.appendChild(checkbox);
                        item.appendChild(label);
                        container.appendChild(item);
                    });
                });
        }

        function submitForm() {
            const checkboxes = document.querySelectorAll('input[type=checkbox]:checked');
            const selectedPages = Array.from(checkboxes).map(checkbox => checkbox.value);
            fetch('/merge-pdf', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify({ selected_pages: selectedPages })
            }).then(response => {
                if (response.ok) {
                    alert('PDF has been modified and saved.');
                } else {
                    alert('An error occurred while modifying the PDF.');
                }
            });
        }
    </script>
</body>
</html>

通过python抽取指定路径pdf格式文件,进行页面分割,将分割页面载入网页

勾选页脚下方小框框,在最下方点击提交保存就好啦!

相关推荐
穆友航6 小时前
PDF内容提取,MinerU使用
数据分析·pdf
拾荒的小海螺1 天前
JAVA:探索 PDF 文字提取的技术指南
java·开发语言·pdf
村东头老张1 天前
Java 实现PDF添加水印
java·开发语言·pdf
好美啊啊啊啊!1 天前
Thymeleaf模板引擎生成的html字符串转换成pdf
pdf·html
zhentiya2 天前
曼昆《经济学原理》第八版课后答案及英文版PDF
大数据·pdf
三天不学习2 天前
如何解决pdf.js跨域从url动态加载pdf文档
javascript·pdf
吾店云建站2 天前
9个最佳WordPress PDF插件(查看器、嵌入和下载)
程序人生·pdf·创业创新·流量运营·程序员创富·教育电商
007php0072 天前
基于企业微信客户端设计一个文件下载与预览系统
开发语言·python·docker·golang·pdf·php·企业微信
慧都小妮子2 天前
Spire.PDF for .NET【页面设置】演示:更改 PDF 页面大小
前端·pdf·.net