批量读取pdf发票中二维码的信息

如下代码Java类:

java 复制代码
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.write.builder.ExcelWriterBuilder;
import com.alibaba.excel.write.metadata.WriteSheet;
import com.alibaba.excel.write.metadata.style.WriteCellStyle;
import com.alibaba.excel.write.style.HorizontalCellStyleStrategy;
import com.alibaba.excel.write.style.column.LongestMatchColumnWidthStyleStrategy;
import com.google.zxing.BinaryBitmap;
import com.google.zxing.MultiFormatReader;
import com.google.zxing.NotFoundException;
import com.google.zxing.Result;
import com.google.zxing.client.j2se.BufferedImageLuminanceSource;
import com.google.zxing.common.HybridBinarizer;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.ss.usermodel.BorderStyle;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

/**
 * pdf电子发票二维码解析
 *
 * @author 单红宇
 * @since 2025-01-02 09:00:16
 */
@Slf4j
@Data
public class FapiaoPDFQRcodeParser {

    /**
     * folderPath
     */
    private String folderPath;
    /**
     * outputPath
     */
    private String outputPath;

    /**
     * 入口
     *
     * @param args args
     */
    public static void main(String[] args) {
        String folderPath = "D:\\Downloads\\fapiao";
        FapiaoPDFQRcodeParser fapiaoPDFQrcodeParser = new FapiaoPDFQRcodeParser();
        fapiaoPDFQrcodeParser.setFolderPath(folderPath);
        fapiaoPDFQrcodeParser.processFolder(folderPath);
    }

    /**
     * 递归处理所有pdf发票
     *
     * @param folder             folder
     * @param resultCodeDataList qrCodes
     * @param errorDataList      errorDataList
     */
    private void processFolderRecursively(File folder, List<QrCodeData> resultCodeDataList, List<ErrorData> errorDataList) {
        if (folder.isDirectory()) {
            for (File file : Objects.requireNonNull(folder.listFiles())) {
                if (file.isDirectory()) {
                    processFolderRecursively(file, resultCodeDataList, errorDataList);
                } else if (file.isFile() && file.getName().toLowerCase().endsWith(".pdf")) {
                    log.info("Processing: {}", file.getAbsolutePath());
                    String fileName = file.getAbsolutePath().substring(this.getFolderPath().length() + 1);
                    try {
                        resultCodeDataList.addAll(extractQRCodesFromPdf(file, fileName, errorDataList));
                    } catch (IOException e) {
                        errorDataList.add(new ErrorData(fileName, e.getMessage()));
                    }
                }
            }
        }
    }

    /**
     * 开始处理发票文件夹
     *
     * @param folderPath folderPath
     */
    private void processFolder(String folderPath) {
        File folder = new File(folderPath);
        List<QrCodeData> resultList = new ArrayList<>();
        List<ErrorData> errorDataList = new ArrayList<>();
        processFolderRecursively(folder, resultList, errorDataList);
        // 使用EasyExcel写入Excel文件
        // 创建ExcelWriter对象,指定文件名和文件类型(这里假设是xlsx)
        ExcelWriterBuilder writerBuilder =
                EasyExcel.write(this.getOutputPath())
                        .registerWriteHandler(this.getHorizontalCellStyleStrategy())
                        .registerWriteHandler(new LongestMatchColumnWidthStyleStrategy());
        // 如果需要自定义一些全局配置,可以继续链式调用writerBuilder的其他方法
        try (ExcelWriter excelWriter = writerBuilder.build()) {// 构建ExcelWriter
            // 写入第一个sheet
            WriteSheet writeSheet1 = EasyExcel.writerSheet("Codes").head(QrCodeData.class).build();
            excelWriter.write(resultList, writeSheet1);

            // 写入第二个sheet
            WriteSheet writeSheet2 = EasyExcel.writerSheet("Errors").head(ErrorData.class).build();
            excelWriter.write(errorDataList, writeSheet2);
        } catch (Exception e) {
            log.error("输出Excel异常", e);
        }
    }

    /**
     * 设置单元格样式
     *
     * @return HorizontalCellStyleStrategy
     */
    private HorizontalCellStyleStrategy getHorizontalCellStyleStrategy() {
        // 设置边框样式
        WriteCellStyle headWriteCellStyle = new WriteCellStyle();
        WriteCellStyle contentWriteCellStyle = new WriteCellStyle();

        // 设置边框
        BorderStyle borderStyle = BorderStyle.THIN;
        headWriteCellStyle.setBorderTop(borderStyle);
        headWriteCellStyle.setBorderBottom(borderStyle);
        headWriteCellStyle.setBorderLeft(borderStyle);
        headWriteCellStyle.setBorderRight(borderStyle);

        contentWriteCellStyle.setBorderTop(borderStyle);
        contentWriteCellStyle.setBorderBottom(borderStyle);
        contentWriteCellStyle.setBorderLeft(borderStyle);
        contentWriteCellStyle.setBorderRight(borderStyle);

        // 创建水平样式策略
        return new HorizontalCellStyleStrategy(headWriteCellStyle, contentWriteCellStyle);
    }

    /**
     * 提取pdf中的发票二维码
     *
     * @param pdfFile       pdfFile
     * @param fileName      fileName
     * @param errorDataList errorDataList
     * @return List
     * @throws IOException IOException
     */
    private List<QrCodeData> extractQRCodesFromPdf(File pdfFile, String fileName, List<ErrorData> errorDataList) throws IOException {
        List<QrCodeData> qrCodes = new ArrayList<>();
        try (PDDocument document = Loader.loadPDF(pdfFile)) {
            PDFRenderer pdfRenderer = new PDFRenderer(document);
            for (int page = 0; page < document.getNumberOfPages(); ++page) {
                BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
                try {
                    qrCodes.addAll(decodeQRCode(bim, fileName));
                } catch (NotFoundException e) {
                    log.error("解析二维码发生异常", e);
                    errorDataList.add(new ErrorData(fileName, e.getMessage()));
                }
            }
        }
        return qrCodes;
    }

    /**
     * 解析二维码为数据对象
     *
     * @param image    image
     * @param fileName fileName
     * @return List
     */
    private List<QrCodeData> decodeQRCode(BufferedImage image, String fileName) throws NotFoundException {
        List<QrCodeData> result = new ArrayList<>();
        MultiFormatReader multiFormatReader = new MultiFormatReader();

        BinaryBitmap binaryBitmap = new BinaryBitmap(new HybridBinarizer(
                new BufferedImageLuminanceSource(image)));
        Result zxingResult = multiFormatReader.decode(binaryBitmap);

        // 假设二维码内容直接包含三个值,用逗号分隔
        String[] data = zxingResult.getText().split(",");
        if (data.length > 5) {
            result.add(new FapiaoPDFQRcodeParser.QrCodeData(data[3], data[4], data[5], fileName));
        }

        return result;
    }

    /**
     * 所有发票解析后输出的excel路径
     *
     * @return String
     */
    public String getOutputPath() {
        if (outputPath == null) {
            outputPath = folderPath + File.separator + "output.xlsx";
        }
        return outputPath;
    }

    /**
     * 二维码数据对象
     *
     * @author 单红宇
     * @since 2025-01-02 09:00:16
     */
    @Data
    static class QrCodeData {
        /**
         * invoiceNumber
         */
        @ExcelProperty("发票号码")
        private String invoiceNumber;
        /**
         * amount
         */
        @ExcelProperty("金额")
        private String amount;
        /**
         * date
         */
        @ExcelProperty("日期")
        private String date;
        /**
         * fileName
         */
        @ExcelProperty("文件名")
        private String fileName;

        /**
         * QrCodeData
         *
         * @param invoiceNumber invoiceNumber
         * @param amount        amount
         * @param date          date
         * @param fileName      fileName
         */
        public QrCodeData(String invoiceNumber, String amount, String date, String fileName) {
            this.invoiceNumber = invoiceNumber;
            this.amount = amount;
            this.date = date.replace("-","").replace("/", "");
            this.fileName = fileName;
        }

    }

    /**
     * 错误数据内容
     *
     * @author 单红宇
     * @since 2025-01-02 09:00:16
     */
    @Data
    static class ErrorData {
        /**
         * fileName
         */
        @ExcelProperty("文件名")
        private String fileName;

        /**
         * errorMessage
         */
        @ExcelProperty("错误信息")
        private String errorMessage;

        public ErrorData(String fileName, String errorMessage) {
            this.errorMessage = errorMessage;
            this.fileName = fileName;
        }

    }
}