使用python与Flask对pdf格式文件进行删改

我们在网上搜集的一些电子版资料多数是pdf格式,一些无良培训机构或者自媒体为了博取眼球、引流、会在倒手过程使用一些程式对一些文档进行批量添加水印,或者联系,以此原本干净整洁资料满屏"牛皮藓",简直是糟糕透了!

from flask import Flask, request, send_file, render_template_string, jsonify
from PyPDF2 import PdfReader, PdfWriter
import os
from pdf2image import convert_from_path
import io
import base64

app = Flask(__name__)


# 根 URL 路由
@app.route('/')
def index():
    return render_template_string('''
        <!DOCTYPE html>
        <html>
        <head>
            <title>PDF Page Manager</title>
            <style>
                body {
                    font-family: Arial, sans-serif;
                }
                .grid-container {
                    display: grid;
                    grid-template-columns: repeat(5, 1fr);
                    grid-gap: 10px;
                    margin-bottom: 20px;
                }
                .grid-item {
                    text-align: center;
                }
                .grid-item img {
                    max-width: 100%;
                    height: auto;
                }
                .grid-item input[type="checkbox"] {
                    margin-top: 5px;
                }
            </style>
        </head>
        <body>
            <h1>Select Pages to Delete</h1>
            <div id="pageContainer"></div>
            <button onclick="loadPages()">Load Pages</button>
            <button onclick="submitForm()">Submit</button>

            <script>
                function loadPages() {
                    fetch('/get-pages', { method: 'GET' })
                        .then(response => response.json())
                        .then(data => {
                            const container = document.getElementById('pageContainer');
                            container.innerHTML = ''; // 清空容器
                            data.pages.forEach((page, index) => {
                                const item = document.createElement('div');
                                item.className = 'grid-item';
                                const img = document.createElement('img');
                                img.src = `data:image/png;base64,${page.image}`;
                                img.alt = `Page ${index + 1}`;
                                const checkbox = document.createElement('input');
                                checkbox.type = 'checkbox';
                                checkbox.name = 'page';
                                checkbox.value = index;
                                const label = document.createElement('label');
                                label.htmlFor = `page${index}`;
                                label.appendChild(document.createTextNode(`Page ${index + 1}`));
                                item.appendChild(img);
                                item.appendChild(checkbox);
                                item.appendChild(label);
                                container.appendChild(item);
                            });
                        });
                }

                function submitForm() {
                    const checkboxes = document.querySelectorAll('input[type=checkbox]:checked');
                    const selectedPages = Array.from(checkboxes).map(checkbox => checkbox.value);
                    fetch('/merge-pdf', {
                        method: 'POST',
                        headers: {
                            'Content-Type': 'application/json'
                        },
                        body: JSON.stringify({ selected_pages: selectedPages })
                    }).then(response => {
                        if (response.ok) {
                            alert('PDF has been modified and saved.');
                        } else {
                            alert('An error occurred while modifying the PDF.');
                        }
                    });
                }
            </script>
        </body>
        </html>
    ''')


@app.route('/get-pages', methods=['GET'])
def get_pages():
    file_path = r"D:\daku\python编辑pdf\2024年县域未成年人网络消费调研报告-佟毕铖.pdf"
    try:
        images = convert_from_path(file_path)
        page_data = []

        for i, image in enumerate(images):
            buffered = io.BytesIO()
            image.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
            page_data.append({'index': i, 'image': img_str})

        return jsonify({'pages': page_data})
    except Exception as e:
        return jsonify({'error': str(e)}), 500


@app.route('/merge-pdf', methods=['POST'])
def merge_pdf():
    data = request.json
    selected_pages = data.get('selected_pages', [])

    file_path = r"D:\daku\python编辑pdf\2024年县域未成年人网络消费调研报告-佟毕铖.pdf"
    reader = PdfReader(file_path)

    writer = PdfWriter()

    for page_num in range(len(reader.pages)):
        if str(page_num) not in selected_pages:
            writer.add_page(reader.pages[page_num])

    output_path = r"D:\daku\python编辑pdf\output\modified_report.pdf"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, 'wb') as f:
        writer.write(f)

    return send_file(output_path, as_attachment=True)


if __name__ == '__main__':
    app.run(debug=True)

网页端代码:

<!DOCTYPE html>
<html>
<head>
    <title>PDF Page Manager</title>
    <style>
        body {
            font-family: Arial, sans-serif;
        }
        .grid-container {
            display: grid;
            grid-template-columns: repeat(5, 1fr);
            grid-gap: 10px;
            margin-bottom: 20px;
        }
        .grid-item {
            text-align: center;
        }
        .grid-item img {
            max-width: 100%;
            height: auto;
        }
        .grid-item input[type="checkbox"] {
            margin-top: 5px;
        }
    </style>
</head>
<body>
    <h1>Select Pages to Delete</h1>
    <div id="pageContainer"></div>
    <button onclick="loadPages()">Load Pages</button>
    <button onclick="submitForm()">Submit</button>

    <script>
        function loadPages() {
            fetch('/get-pages', { method: 'GET' })
                .then(response => response.json())
                .then(data => {
                    const container = document.getElementById('pageContainer');
                    container.innerHTML = ''; // 清空容器
                    data.pages.forEach((page, index) => {
                        const item = document.createElement('div');
                        item.className = 'grid-item';
                        const img = document.createElement('img');
                        img.src = `data:image/png;base64,${page.image}`;
                        img.alt = `Page ${index + 1}`;
                        const checkbox = document.createElement('input');
                        checkbox.type = 'checkbox';
                        checkbox.name = 'page';
                        checkbox.value = index;
                        const label = document.createElement('label');
                        label.htmlFor = `page${index}`;
                        label.appendChild(document.createTextNode(`Page ${index + 1}`));
                        item.appendChild(img);
                        item.appendChild(checkbox);
                        item.appendChild(label);
                        container.appendChild(item);
                    });
                });
        }

        function submitForm() {
            const checkboxes = document.querySelectorAll('input[type=checkbox]:checked');
            const selectedPages = Array.from(checkboxes).map(checkbox => checkbox.value);
            fetch('/merge-pdf', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify({ selected_pages: selectedPages })
            }).then(response => {
                if (response.ok) {
                    alert('PDF has been modified and saved.');
                } else {
                    alert('An error occurred while modifying the PDF.');
                }
            });
        }
    </script>
</body>
</html>

通过python抽取指定路径pdf格式文件,进行页面分割,将分割页面载入网页

勾选页脚下方小框框,在最下方点击提交保存就好啦!

相关推荐
宝子向前冲1 小时前
纯前端生成PDF(jsPDF)并下载保存或上传到OSS
前端·pdf·html2canvas·oss·jspdf
bw8767206871 小时前
人工智能--JupyterNoteBook 转换成 PDF
人工智能·机器学习·pdf
棱角~~3 小时前
10款PDF转Word软件工具的使用感受及其亮点!!!
经验分享·pdf·word·学习方法
shujuwa663 小时前
什么是开源软件(OSS)?
pdf·编辑器·电脑·word·开源软件
晨欣5 小时前
Mac如何将多个pdf文件归并到一个
macos·pdf
慧都小妮子6 小时前
Spire.PDF for .NET【页面设置】演示:获取 PDF 文件中的页数
java·pdf·.net
新加坡内哥谈技术13 小时前
Claude 3.5 Sonnet模型新增了PDF支持功能
人工智能·pdf
weixin_4046793116 小时前
pdf转图片
linux·运维·开发语言·python·pdf
多吃轻食18 小时前
关于 PDF 抽取的吐槽
pdf
大飞攻城狮1 天前
2024年中国生成式人工智能应用与实践展望白皮书(中英文版)|附147页PDF文件下载
大数据·人工智能·pdf·产品经理·大模型学习·大模型入门·大模型教程