iText实战--在现有PDF上工作

6.1 使用PdfReader读取PDF

检索文档和页面信息

D:/data/iText/inAction/chapter03/image_direct.pdf
Number of pages: 1
Size of page 1: [0.0,0.0,283.0,416.0]
Rotation of page 1: 0
Page size with rotation of page 1: Rectangle: 283.0x416.0 (rot: 0 degrees)
Is rebuilt? false
Is encrypted? false

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;


public class PageInformation {

	/** The resulting text file with info about a PDF. */
    public static final String RESULT
        = "D:/data/iText/inAction/chapter06/page_info.txt";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws DocumentException, IOException {
        // Inspecting PDFs
        PrintWriter writer = new PrintWriter(new FileOutputStream(RESULT));
        inspect(writer, "D:/data/iText/inAction/chapter03/image_direct.pdf");
        writer.close();
    }
    
    /**
     * Inspect a PDF file and write the info to a txt file
     * @param writer Writer to a text file
     * @param filename Path to the PDF file
     * @throws IOException
     */
    public static void inspect(PrintWriter writer, String filename)
        throws IOException {
        PdfReader reader = new PdfReader(filename);
        writer.println(filename);
        writer.print("Number of pages: ");
        writer.println(reader.getNumberOfPages());
        Rectangle mediabox = reader.getPageSize(1);
        writer.print("Size of page 1: [");
        writer.print(mediabox.getLeft());
        writer.print(',');
        writer.print(mediabox.getBottom());
        writer.print(',');
        writer.print(mediabox.getRight());
        writer.print(',');
        writer.print(mediabox.getTop());
        writer.println("]");
        writer.print("Rotation of page 1: ");
        writer.println(reader.getPageRotation(1));
        writer.print("Page size with rotation of page 1: ");
        writer.println(reader.getPageSizeWithRotation(1));
        writer.print("Is rebuilt? ");
        writer.println(reader.isRebuilt());
        writer.print("Is encrypted? ");
        writer.println(reader.isEncrypted());
        writer.println();
        writer.flush();
    }
}

Page Size 页面大小

损坏的PDF

加密的PDF

使用PdfReader降低内存

部分读取

    /**
     * Do a full read of a PDF file
     * @param writer a writer to a report file
     * @param filename the file to read
     * @throws IOException
     */
    public static void fullRead(PrintWriter writer, String filename)
        throws IOException {
        long before = getMemoryUse();
        PdfReader reader = new PdfReader(filename);
        reader.getNumberOfPages();
        writer.println(String.format("Memory used by full read: %d",
                    getMemoryUse() - before));
        writer.flush();
    }
    
    /**
     * Do a partial read of a PDF file
     * @param writer a writer to a report file
     * @param filename the file to read
     * @throws IOException
     */
    public static void partialRead(PrintWriter writer, String filename)
        throws IOException {
        long before = getMemoryUse();
        PdfReader reader = new PdfReader(
                new RandomAccessFileOrArray(filename), null);
        reader.getNumberOfPages();
        writer.println(String.format("Memory used by partial read: %d",
                    getMemoryUse() - before));
        writer.flush();
    }

选择页面

PdfReader.selectPages("3");

PdfReader.selectPages("4-8");

执行selectPages()后,页数就变成选中的实际页数,要注意越界。

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;

public class SelectPages {

	/** A resulting PDF file. */
    public static final String RESULT1 = "results/part2/chapter06/timetable_stamper.pdf";
	/** A resulting PDF file. */
    public static final String RESULT2 = "results/part2/chapter06/timetable_copy.pdf"; 
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args) throws IOException, DocumentException {
        PdfReader reader = new PdfReader("D:/data/iText/inAction/chapter03/movie_posters.pdf");
        reader.selectPages("4-8");
        manipulateWithStamper(reader);
        manipulateWithCopy(reader);
    }

    /**
     * Creates a new PDF based on the one in the reader
     * @param reader a reader with a PDF file
     * @throws IOException
     * @throws DocumentException
     */
    private static void manipulateWithStamper(PdfReader reader)
        throws IOException, DocumentException {
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(RESULT1));
        stamper.close();
    }

    /**
     * Creates a new PDF based on the one in the reader
     * @param reader a reader with a PDF file
     * @throws IOException
     * @throws DocumentException
     */
    private static void manipulateWithCopy(PdfReader reader)
        throws IOException, DocumentException {
        int n = reader.getNumberOfPages();
        Document document = new Document();
        PdfCopy copy = new PdfCopy(document, new FileOutputStream(RESULT2));
        document.open();
        for (int i = 0; i < n;) {
            copy.addPage(copy.getImportedPage(reader, ++i));
        }
        document.close();
    }
    
    
}

6.2 从PDF拷贝页面

导入页面

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;

public class ImportingPages1 {

    /** The resulting PDF file. */
    public static final String RESULT
        = "D:/data/iText/inAction/chapter06/time_table_imported1.pdf";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws IOException, DocumentException {
        // step 1
        Document document = new Document();
        // step 2
        PdfWriter writer
            = PdfWriter.getInstance(document, new FileOutputStream(RESULT));
        // step 3
        document.open();
        // step 4
        PdfPTable table = new PdfPTable(2);
        PdfReader reader = new PdfReader("D:/data/iText/inAction/chapter03/movie_posters.pdf");
        int n = reader.getNumberOfPages();
        PdfImportedPage page;
        for (int i = 1; i <= n; i++) {
            page = writer.getImportedPage(reader, i);
            table.addCell(Image.getInstance(page));
        }
        document.add(table);
        // step 5
        document.close();
    }
}

缩放和叠加页面

叠加PDF页面

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.Image;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Font.FontFamily;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfGState;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;

public class Layers {

    /** The resulting PDF. */
    public static final String SOURCE
        = "D:/data/iText/inAction/chapter06/layers_orig.pdf";
    /** The resulting PDF. */
    public static final String RESULT
        = "D:/data/iText/inAction/chapter06/layers.pdf";
    /** The movie poster. */
    public static final String RESOURCE
        = "E:/study/PDF/SourceCodeiText/itext-book/book/resources/img/loa.jpg";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws IOException, DocumentException {
        new Layers().createPdf(SOURCE);
        // Create a reader
        PdfReader reader = new PdfReader(SOURCE);
        // step 1
        Document document = new Document(PageSize.A5.rotate());
        // step 2
        PdfWriter writer
            = PdfWriter.getInstance(document, new FileOutputStream(RESULT));
        // step 3
        document.open();
        // step 4
        PdfContentByte canvas = writer.getDirectContent();
        PdfImportedPage page;
        BaseFont bf
            = BaseFont.createFont(BaseFont.ZAPFDINGBATS, "", BaseFont.EMBEDDED);
        for (int i = 0; i < reader.getNumberOfPages(); ) {
            page = writer.getImportedPage(reader, ++i);
            canvas.addTemplate(page, 1f, 0, 0.4f, 0.4f, 72, 50 * i);
            canvas.beginText();
            canvas.setFontAndSize(bf, 20);
            canvas.showTextAligned(Element.ALIGN_CENTER,
                String.valueOf((char)(181 + i)), 496, 150 + 50 * i, 0);
            canvas.endText();
        }
        // step 5
        document.close();
    }

    /**
     * Creates a PDF document.
     * @param filename the path to the new PDF document
     * @throws    DocumentException 
     * @throws    IOException
     */
    public void createPdf(String filename)
        throws IOException, DocumentException {
        // step 1
        Document document = new Document(PageSize.POSTCARD, 30, 30, 30, 30);
        // step 2
        PdfWriter writer = PdfWriter.getInstance(document,
                new FileOutputStream(filename));
        // step 3
        document.open();
        // step 4
        PdfContentByte under = writer.getDirectContentUnder();
        // Page 1: a rectangle
        drawRectangle(under, PageSize.POSTCARD.getWidth(), PageSize.POSTCARD.getHeight());
        under.setRGBColorFill(0xFF, 0xD7, 0x00);
        under.rectangle(5, 5, PageSize.POSTCARD.getWidth() - 10, PageSize.POSTCARD.getHeight() - 10);
        under.fill();
        document.newPage();
        // Page 2: an image
        drawRectangle(under, PageSize.POSTCARD.getWidth(), PageSize.POSTCARD.getHeight());
        Image img = Image.getInstance(RESOURCE);
        img.setAbsolutePosition((PageSize.POSTCARD.getWidth() - img.getScaledWidth()) / 2,
                (PageSize.POSTCARD.getHeight() - img.getScaledHeight()) / 2);
        document.add(img);
        document.newPage();
        // Page 3: the words "Foobar Film Festival"
        drawRectangle(under, PageSize.POSTCARD.getWidth(), PageSize.POSTCARD.getHeight());;
        Paragraph p = new Paragraph("Foobar Film Festival", new Font(FontFamily.HELVETICA, 22));
        p.setAlignment(Element.ALIGN_CENTER);
        document.add(p);
        document.newPage();
        // Page 4: the words "SOLD OUT"
        drawRectangle(under, PageSize.POSTCARD.getWidth(), PageSize.POSTCARD.getHeight());
        PdfContentByte over = writer.getDirectContent();
        over.saveState();
        float sinus = (float)Math.sin(Math.PI / 60);
        float cosinus = (float)Math.cos(Math.PI / 60);
        BaseFont bf = BaseFont.createFont();
        over.beginText();
        over.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_FILL_STROKE);
        over.setLineWidth(1.5f);
        over.setRGBColorStroke(0xFF, 0x00, 0x00);
        over.setRGBColorFill(0xFF, 0xFF, 0xFF);
        over.setFontAndSize(bf, 36);
        over.setTextMatrix(cosinus, sinus, -sinus, cosinus, 50, 324);
        over.showText("SOLD OUT");
        over.setTextMatrix(0, 0);
        over.endText();
        over.restoreState();
        // step 5
        document.close();
    }
    
    /**
     * Draws a rectangle
     * @param content the direct content layer
     * @param width the width of the rectangle
     * @param height the height of the rectangle
     */
    public static void drawRectangle(PdfContentByte content, float width, float height) {
        content.saveState();
        PdfGState state = new PdfGState();
        state.setFillOpacity(0.6f);
        content.setGState(state);
        content.setRGBColorFill(0xFF, 0xFF, 0xFF);
        content.setLineWidth(3);
        content.rectangle(0, 0, width, height);
        content.fillStroke();
        content.restoreState();
    }
}

导入公司信封

从第N页复制页面

6.3 使用PdfStamper添加内容

在绝对位置添加内容

PdfStamper.getOverContent() 类似 getDirectContent()

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.pdf.ColumnText;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.ygsoft.abc.component.cdes.itext.chapter1.HelloWorldLandscape1;
import com.ygsoft.abc.component.cdes.itext.chapter1.HelloWorldLandscape2;


public class StampText {
    /** A resulting PDF file. */
    public static final String RESULT1
        = "D:/data/iText/inAction/chapter06/hello1.pdf";
    /** A resulting PDF file. */
    public static final String RESULT2
        = "D:/data/iText/inAction/chapter06/hello2.pdf";
    /** A resulting PDF file. */
    public static final String RESULT3
        = "D:/data/iText/inAction/chapter06/hello3.pdf";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws DocumentException, IOException {
        HelloWorldLandscape1.main(args);
        HelloWorldLandscape2.main(args);
        stamp(HelloWorldLandscape1.RESULT, RESULT1);
        stampIgnoreRotation(HelloWorldLandscape1.RESULT, RESULT2);
        stamp(HelloWorldLandscape2.RESULT, RESULT3);
        
    }

    /**
     * Manipulates a PDF file src with the file dest as result
     * @param src the original PDF
     * @param dest the resulting PDF
     * @throws IOException
     * @throws DocumentException
     */
    public static void stamp(String src, String dest)
        throws IOException, DocumentException {
        PdfReader reader = new PdfReader(src);
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
        PdfContentByte canvas = stamper.getOverContent(1);
        ColumnText.showTextAligned(canvas,
                Element.ALIGN_LEFT, new Phrase("Hello people!"), 36, 540, 0);
        stamper.close();
    }

    /**
     * Manipulates a PDF file src with the file dest as result
     * @param src the original PDF
     * @param dest the resulting PDF
     * @throws IOException
     * @throws DocumentException
     */
    public static void stampIgnoreRotation(String src, String dest)
        throws IOException, DocumentException {
        PdfReader reader = new PdfReader(src);
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
        stamper.setRotateContents(false);
        PdfContentByte canvas = stamper.getOverContent(1);
        ColumnText.showTextAligned(canvas,
                Element.ALIGN_LEFT, new Phrase("Hello people!"), 36, 540, 0);
        stamper.close();
    }
}

2步创建PDF

第一步,创建文档内容,第二步,添加页码

import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.SQLException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.PdfWriter;

public class TwoPasses {

    /** The resulting PDF file. */
    public static final String RESULT
        = "results/part2/chapter06/page_x_of_y.pdf";

    /**
     * Main method.
     * @param    args    no arguments needed
     * @throws DocumentException 
     * @throws IOException 
     * @throws SQLException
     * @throws SQLException
     */
    public static void main(String[] args)
        throws SQLException, DocumentException, IOException {
    	
    	// FIRST PASS, CREATE THE PDF WITHOUT HEADER
    	
        // step 1
        Document document = new Document(PageSize.A4, 36, 36, 54, 36);
        // step 2
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        PdfWriter.getInstance(document, baos);
        // step 3
        document.open();
        // step 4
        // PDF文档创建...
        // step 5
        document.close();

        
        // SECOND PASS, ADD THE HEADER
        
        // Create a reader
        PdfReader reader = new PdfReader(baos.toByteArray());
        // Create a stamper
        PdfStamper stamper
            = new PdfStamper(reader, new FileOutputStream(RESULT));
        // Loop over the pages and add a header to each page
        int n = reader.getNumberOfPages();
        for (int i = 1; i <= n; i++) {
            getHeaderTable(i, n).writeSelectedRows(
                    0, -1, 34, 803, stamper.getOverContent(i));
        }
        // Close the stamper
        stamper.close();
    }
    
    /**
     * Create a header table with page X of Y
     * @param x the page number
     * @param y the total number of pages
     * @return a table that can be used as header
     */
    public static PdfPTable getHeaderTable(int x, int y) {
        PdfPTable table = new PdfPTable(2);
        table.setTotalWidth(527);
        table.setLockedWidth(true);
        table.getDefaultCell().setFixedHeight(20);
        table.getDefaultCell().setBorder(Rectangle.BOTTOM);
        table.addCell("FOOBAR FILMFESTIVAL");
        table.getDefaultCell().setHorizontalAlignment(Element.ALIGN_RIGHT);
        table.addCell(String.format("Page %d of %d", x, y));
        return table;
    }
}

添加公司信封到一个存在的文档

    /**
     * Manipulates a PDF file src with the file dest as result
     * @param src the original PDF
     * @param stationery a PDF that will be added as background
     * @param dest the resulting PDF
     * @throws IOException
     * @throws DocumentException
     */
    public void manipulatePdf(String src, String stationery, String dest)
        throws IOException, DocumentException {
    	// Create readers
        PdfReader reader = new PdfReader(src);
        PdfReader s_reader = new PdfReader(stationery);
        // Create the stamper
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
        // Add the stationery to each page
        PdfImportedPage page = stamper.getImportedPage(s_reader, 1);
        int n = reader.getNumberOfPages();
        PdfContentByte background;
        for (int i = 1; i <= n; i++) {
            background = stamper.getUnderContent(i);
            background.addTemplate(page, 0, 0);
        }
        // CLose the stamper
        stamper.close();
    }

插入页面到一个存在的文档

填充PDF表单

6.4 使用PdfCopy 拷贝页面

拼接和拆分PDF文档

拼接文档

import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.SQLException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.pdf.ColumnText;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;

public class ConcatenateStamp {

    /** The resulting PDF file. */
    public static final String RESULT
        = "results/part2/chapter06/concatenated_stamped.pdf";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     * @throws SQLException
     */
    public static void main(String[] args)
        throws IOException, DocumentException, SQLException {
    	// use old examples to create PDFs
        MovieLinks1.main(args);
        MovieHistory.main(args);
        // step 1
        Document document = new Document();
        // step 2
        PdfCopy copy = new PdfCopy(document, new FileOutputStream(RESULT));
        // step 3
        document.open();
        // step 4
        // reader for document 1
        PdfReader reader1 = new PdfReader(MovieLinks1.RESULT);
        int n1 = reader1.getNumberOfPages();
        // reader for document 2
        PdfReader reader2 = new PdfReader(MovieHistory.RESULT);
        int n2 = reader2.getNumberOfPages();
        // initializations
        PdfImportedPage page;
        PdfCopy.PageStamp stamp;
        // Loop over the pages of document 1
        for (int i = 0; i < n1; ) {
            page = copy.getImportedPage(reader1, ++i);
            stamp = copy.createPageStamp(page);
            // add page numbers
            ColumnText.showTextAligned(
                    stamp.getUnderContent(), Element.ALIGN_CENTER,
                    new Phrase(String.format("page %d of %d", i, n1 + n2)),
                    297.5f, 28, 0);
            stamp.alterContents();
            copy.addPage(page);
        }
        // Loop over the pages of document 2
        for (int i = 0; i < n2; ) {
            page = copy.getImportedPage(reader2, ++i);
            stamp = copy.createPageStamp(page);
            // add page numbers
            ColumnText.showTextAligned(
                    stamp.getUnderContent(), Element.ALIGN_CENTER,
                    new Phrase(String.format("page %d of %d", n1 + i, n1 + n2)),
                    297.5f, 28, 0);
            stamp.alterContents();
            copy.addPage(page);
        }
        // step 5
        document.close();
    }
}

拆分文档

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfReader;

public class Burst {

    /** Format of the resulting PDF files. */
    public static final String RESULT
        = "D:/data/iText/inAction/chapter06/timetable_p%d.pdf";

    /**
     * Main method.
     * @param    args    no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws IOException, DocumentException {
        // Create a reader
        PdfReader reader = new PdfReader("D:/data/iText/inAction/chapter03/movie_posters.pdf");
        // We'll create as many new PDFs as there are pages
        Document document;
        PdfCopy copy;
        // loop over all the pages in the original PDF
        int n = reader.getNumberOfPages();
        for (int i = 0; i < n; ) {
        	// step 1
            document = new Document();
            // step 2
            copy = new PdfCopy(document,
                new FileOutputStream(String.format(RESULT, ++i)));
            // step 3
            document.open();
            // step 4
            copy.addPage(copy.getImportedPage(reader, i));
            // step 5
            document.close();
        }
    }
    
    
}

PdfCopy VS PdfSmartCopy

PdfSmartCopy 继承自PdfCopy,其会检查每页添加的冗余对象, 因此可以节省大量磁盘空间或

带宽。这种额外的"智慧"是要付出代价的。PdfSmartCopy 需要更多的内存和时间去拼接文档。

文件大小、带宽优先,选PdfSmartCopy

内存、时间优先,选PdfCopy

拼接表单

相关推荐
小奥超人7 小时前
PDF无法打印!怎么办?
windows·经验分享·pdf·办公技巧·pdf加密解密
m0_748241231 天前
ElasticPDF-新国产 PDF 编辑器开发框架(基于 pdf.js Web PDF批注开发,实现高亮多边形橡皮擦历史记录保存注释文字)
前端·pdf·编辑器
ComPDFKit1 天前
开源 JS PDF 库比较
pdf
杨浦老苏1 天前
开源PDF翻译工具PDFMathTranslate
人工智能·docker·ai·pdf·群晖·翻译
LostSpeed1 天前
在福昕(pdf)阅读器中导航到上次阅读页面的方法
pdf
旭久1 天前
SpringBoot的Thymeleaf做一个可自定义合并td的pdf表格
pdf·html·springboot
神色自若2 天前
Net9为PDF文字替换,使用Spire.PDF版本10.12.4.1360
pdf
机器懒得学习2 天前
解析交通事故报告:利用 PDF、AI 与数据标准化技术构建智能分析系统
pdf
合合技术团队3 天前
高效准确的PDF解析工具,赋能企业非结构化数据治理
人工智能·科技·pdf·aigc·文档
jingling5553 天前
如何使用免费资源--知网篇
开发语言·经验分享·搜索引擎·pdf·开源