作者:fyupeng
技术专栏:☞ https://github.com/fyupeng
项目地址:☞ https://github.com/fyupeng/distributed-blog-system-api
留给读者
咱们又见面了,本期带给大家什么,请往下看,绝对是干货!
一、介绍
提供 PDF
文件二进制参数,返回删除空白页的PDF
文件二进制。
二、代码
引入依赖:
xml
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.21</version>
</dependency>
代码:
java
public static void main(String[] args) throws IOException {
File file = new File("d:/hztzs.pdf");
byte[] bytes = new byte[(int) file.length()];
FileInputStream fis = new FileInputStream(file);
fis.read(bytes);
bytes = new ArchivElecFileService().removeEmptyPages(bytes);
File newfile = new File("d:/out.pdf");
FileOutputStream fos = new FileOutputStream(newfile);
fos.write(bytes);
}
public byte[] removeEmptyPages(byte[] fileBytes) throws IOException {
// Load the PDF document
PDDocument document = PDDocument.load(fileBytes);
// Iterate through each page
PDPageTree pages = document.getPages();
int pageCount = document.getNumberOfPages();
for (int i = pageCount - 1; i >= 0; i--) {
// Extract text from the page
PDFTextStripper stripper = new PDFTextStripper();
stripper.setStartPage(i + 1); // Page indexes are 1-based in PDFTextStripper
stripper.setEndPage(i + 1);
String text = stripper.getText(document);
PDPage page = pages.get(i);
// Check if the page is empty
if (text.trim().isEmpty()) {
// Remove the page
if (isPageImageOnly(page, document)) {
document.removePage(i);
}
}
}
// 保存结果文件
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
document.save(outputStream);
return outputStream.toByteArray();
}
private static boolean isPageImageOnly(PDPage page, PDDocument document) throws IOException {
PDFRenderer renderer = new PDFRenderer(document);
BufferedImage image = renderer.renderImageWithDPI(document.getPages().indexOf(page), 300); // Adjust DPI as needed
return isImageOnly(image);
}
private static boolean isImageOnly(BufferedImage image) {
// Check if the image contains significant content (e.g., not just white)
// You can implement custom logic based on your requirements
// For simplicity, here's a basic check
int width = image.getWidth();
int height = image.getHeight();
long whitePixelCount = ImageUtils.countWhitePixels(image);
// If more than 90% of the image is white, consider it empty
double whiteRatio = (double) whitePixelCount / (width * height);
return whiteRatio > 0.95; // Adjust threshold as needed
}
// Utility class to count white pixels in an image
static class ImageUtils {
public static long countWhitePixels(BufferedImage image) {
long count = 0;
int width = image.getWidth();
int height = image.getHeight();
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int pixel = image.getRGB(x, y);
if (isWhite(pixel)) {
count++;
}
}
}
return count;
}
private static boolean isWhite(int pixel) {
// Define your white color threshold based on RGB values
// Adjust as per your image characteristics
int red = (pixel >> 16) & 0xff;
int green = (pixel >> 8) & 0xff;
int blue = (pixel) & 0xff;
return red > 250 && green > 250 && blue > 250;
}
}
三、总结
易用、高效、轻便!