iText实战--在现有PDF上工作

6.1 使用PdfReader读取PDF

检索文档和页面信息

D:/data/iText/inAction/chapter03/image_direct.pdf
Number of pages: 1
Size of page 1: [0.0,0.0,283.0,416.0]
Rotation of page 1: 0
Page size with rotation of page 1: Rectangle: 283.0x416.0 (rot: 0 degrees)
Is rebuilt? false
Is encrypted? false

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;


public class PageInformation {

	/** The resulting text file with info about a PDF. */
    public static final String RESULT
        = "D:/data/iText/inAction/chapter06/page_info.txt";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws DocumentException, IOException {
        // Inspecting PDFs
        PrintWriter writer = new PrintWriter(new FileOutputStream(RESULT));
        inspect(writer, "D:/data/iText/inAction/chapter03/image_direct.pdf");
        writer.close();
    }
    
    /**
     * Inspect a PDF file and write the info to a txt file
     * @param writer Writer to a text file
     * @param filename Path to the PDF file
     * @throws IOException
     */
    public static void inspect(PrintWriter writer, String filename)
        throws IOException {
        PdfReader reader = new PdfReader(filename);
        writer.println(filename);
        writer.print("Number of pages: ");
        writer.println(reader.getNumberOfPages());
        Rectangle mediabox = reader.getPageSize(1);
        writer.print("Size of page 1: [");
        writer.print(mediabox.getLeft());
        writer.print(',');
        writer.print(mediabox.getBottom());
        writer.print(',');
        writer.print(mediabox.getRight());
        writer.print(',');
        writer.print(mediabox.getTop());
        writer.println("]");
        writer.print("Rotation of page 1: ");
        writer.println(reader.getPageRotation(1));
        writer.print("Page size with rotation of page 1: ");
        writer.println(reader.getPageSizeWithRotation(1));
        writer.print("Is rebuilt? ");
        writer.println(reader.isRebuilt());
        writer.print("Is encrypted? ");
        writer.println(reader.isEncrypted());
        writer.println();
        writer.flush();
    }
}

Page Size 页面大小

损坏的PDF

加密的PDF

使用PdfReader降低内存

部分读取

    /**
     * Do a full read of a PDF file
     * @param writer a writer to a report file
     * @param filename the file to read
     * @throws IOException
     */
    public static void fullRead(PrintWriter writer, String filename)
        throws IOException {
        long before = getMemoryUse();
        PdfReader reader = new PdfReader(filename);
        reader.getNumberOfPages();
        writer.println(String.format("Memory used by full read: %d",
                    getMemoryUse() - before));
        writer.flush();
    }
    
    /**
     * Do a partial read of a PDF file
     * @param writer a writer to a report file
     * @param filename the file to read
     * @throws IOException
     */
    public static void partialRead(PrintWriter writer, String filename)
        throws IOException {
        long before = getMemoryUse();
        PdfReader reader = new PdfReader(
                new RandomAccessFileOrArray(filename), null);
        reader.getNumberOfPages();
        writer.println(String.format("Memory used by partial read: %d",
                    getMemoryUse() - before));
        writer.flush();
    }

选择页面

PdfReader.selectPages("3");

PdfReader.selectPages("4-8");

执行selectPages()后,页数就变成选中的实际页数,要注意越界。

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;

public class SelectPages {

	/** A resulting PDF file. */
    public static final String RESULT1 = "results/part2/chapter06/timetable_stamper.pdf";
	/** A resulting PDF file. */
    public static final String RESULT2 = "results/part2/chapter06/timetable_copy.pdf"; 
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args) throws IOException, DocumentException {
        PdfReader reader = new PdfReader("D:/data/iText/inAction/chapter03/movie_posters.pdf");
        reader.selectPages("4-8");
        manipulateWithStamper(reader);
        manipulateWithCopy(reader);
    }

    /**
     * Creates a new PDF based on the one in the reader
     * @param reader a reader with a PDF file
     * @throws IOException
     * @throws DocumentException
     */
    private static void manipulateWithStamper(PdfReader reader)
        throws IOException, DocumentException {
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(RESULT1));
        stamper.close();
    }

    /**
     * Creates a new PDF based on the one in the reader
     * @param reader a reader with a PDF file
     * @throws IOException
     * @throws DocumentException
     */
    private static void manipulateWithCopy(PdfReader reader)
        throws IOException, DocumentException {
        int n = reader.getNumberOfPages();
        Document document = new Document();
        PdfCopy copy = new PdfCopy(document, new FileOutputStream(RESULT2));
        document.open();
        for (int i = 0; i < n;) {
            copy.addPage(copy.getImportedPage(reader, ++i));
        }
        document.close();
    }
    
    
}

6.2 从PDF拷贝页面

导入页面

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;

public class ImportingPages1 {

    /** The resulting PDF file. */
    public static final String RESULT
        = "D:/data/iText/inAction/chapter06/time_table_imported1.pdf";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws IOException, DocumentException {
        // step 1
        Document document = new Document();
        // step 2
        PdfWriter writer
            = PdfWriter.getInstance(document, new FileOutputStream(RESULT));
        // step 3
        document.open();
        // step 4
        PdfPTable table = new PdfPTable(2);
        PdfReader reader = new PdfReader("D:/data/iText/inAction/chapter03/movie_posters.pdf");
        int n = reader.getNumberOfPages();
        PdfImportedPage page;
        for (int i = 1; i <= n; i++) {
            page = writer.getImportedPage(reader, i);
            table.addCell(Image.getInstance(page));
        }
        document.add(table);
        // step 5
        document.close();
    }
}

缩放和叠加页面

叠加PDF页面

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.Image;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Font.FontFamily;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfGState;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;

public class Layers {

    /** The resulting PDF. */
    public static final String SOURCE
        = "D:/data/iText/inAction/chapter06/layers_orig.pdf";
    /** The resulting PDF. */
    public static final String RESULT
        = "D:/data/iText/inAction/chapter06/layers.pdf";
    /** The movie poster. */
    public static final String RESOURCE
        = "E:/study/PDF/SourceCodeiText/itext-book/book/resources/img/loa.jpg";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws IOException, DocumentException {
        new Layers().createPdf(SOURCE);
        // Create a reader
        PdfReader reader = new PdfReader(SOURCE);
        // step 1
        Document document = new Document(PageSize.A5.rotate());
        // step 2
        PdfWriter writer
            = PdfWriter.getInstance(document, new FileOutputStream(RESULT));
        // step 3
        document.open();
        // step 4
        PdfContentByte canvas = writer.getDirectContent();
        PdfImportedPage page;
        BaseFont bf
            = BaseFont.createFont(BaseFont.ZAPFDINGBATS, "", BaseFont.EMBEDDED);
        for (int i = 0; i < reader.getNumberOfPages(); ) {
            page = writer.getImportedPage(reader, ++i);
            canvas.addTemplate(page, 1f, 0, 0.4f, 0.4f, 72, 50 * i);
            canvas.beginText();
            canvas.setFontAndSize(bf, 20);
            canvas.showTextAligned(Element.ALIGN_CENTER,
                String.valueOf((char)(181 + i)), 496, 150 + 50 * i, 0);
            canvas.endText();
        }
        // step 5
        document.close();
    }

    /**
     * Creates a PDF document.
     * @param filename the path to the new PDF document
     * @throws    DocumentException 
     * @throws    IOException
     */
    public void createPdf(String filename)
        throws IOException, DocumentException {
        // step 1
        Document document = new Document(PageSize.POSTCARD, 30, 30, 30, 30);
        // step 2
        PdfWriter writer = PdfWriter.getInstance(document,
                new FileOutputStream(filename));
        // step 3
        document.open();
        // step 4
        PdfContentByte under = writer.getDirectContentUnder();
        // Page 1: a rectangle
        drawRectangle(under, PageSize.POSTCARD.getWidth(), PageSize.POSTCARD.getHeight());
        under.setRGBColorFill(0xFF, 0xD7, 0x00);
        under.rectangle(5, 5, PageSize.POSTCARD.getWidth() - 10, PageSize.POSTCARD.getHeight() - 10);
        under.fill();
        document.newPage();
        // Page 2: an image
        drawRectangle(under, PageSize.POSTCARD.getWidth(), PageSize.POSTCARD.getHeight());
        Image img = Image.getInstance(RESOURCE);
        img.setAbsolutePosition((PageSize.POSTCARD.getWidth() - img.getScaledWidth()) / 2,
                (PageSize.POSTCARD.getHeight() - img.getScaledHeight()) / 2);
        document.add(img);
        document.newPage();
        // Page 3: the words "Foobar Film Festival"
        drawRectangle(under, PageSize.POSTCARD.getWidth(), PageSize.POSTCARD.getHeight());;
        Paragraph p = new Paragraph("Foobar Film Festival", new Font(FontFamily.HELVETICA, 22));
        p.setAlignment(Element.ALIGN_CENTER);
        document.add(p);
        document.newPage();
        // Page 4: the words "SOLD OUT"
        drawRectangle(under, PageSize.POSTCARD.getWidth(), PageSize.POSTCARD.getHeight());
        PdfContentByte over = writer.getDirectContent();
        over.saveState();
        float sinus = (float)Math.sin(Math.PI / 60);
        float cosinus = (float)Math.cos(Math.PI / 60);
        BaseFont bf = BaseFont.createFont();
        over.beginText();
        over.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_FILL_STROKE);
        over.setLineWidth(1.5f);
        over.setRGBColorStroke(0xFF, 0x00, 0x00);
        over.setRGBColorFill(0xFF, 0xFF, 0xFF);
        over.setFontAndSize(bf, 36);
        over.setTextMatrix(cosinus, sinus, -sinus, cosinus, 50, 324);
        over.showText("SOLD OUT");
        over.setTextMatrix(0, 0);
        over.endText();
        over.restoreState();
        // step 5
        document.close();
    }
    
    /**
     * Draws a rectangle
     * @param content the direct content layer
     * @param width the width of the rectangle
     * @param height the height of the rectangle
     */
    public static void drawRectangle(PdfContentByte content, float width, float height) {
        content.saveState();
        PdfGState state = new PdfGState();
        state.setFillOpacity(0.6f);
        content.setGState(state);
        content.setRGBColorFill(0xFF, 0xFF, 0xFF);
        content.setLineWidth(3);
        content.rectangle(0, 0, width, height);
        content.fillStroke();
        content.restoreState();
    }
}

导入公司信封

从第N页复制页面

6.3 使用PdfStamper添加内容

在绝对位置添加内容

PdfStamper.getOverContent() 类似 getDirectContent()

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.pdf.ColumnText;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.ygsoft.abc.component.cdes.itext.chapter1.HelloWorldLandscape1;
import com.ygsoft.abc.component.cdes.itext.chapter1.HelloWorldLandscape2;


public class StampText {
    /** A resulting PDF file. */
    public static final String RESULT1
        = "D:/data/iText/inAction/chapter06/hello1.pdf";
    /** A resulting PDF file. */
    public static final String RESULT2
        = "D:/data/iText/inAction/chapter06/hello2.pdf";
    /** A resulting PDF file. */
    public static final String RESULT3
        = "D:/data/iText/inAction/chapter06/hello3.pdf";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws DocumentException, IOException {
        HelloWorldLandscape1.main(args);
        HelloWorldLandscape2.main(args);
        stamp(HelloWorldLandscape1.RESULT, RESULT1);
        stampIgnoreRotation(HelloWorldLandscape1.RESULT, RESULT2);
        stamp(HelloWorldLandscape2.RESULT, RESULT3);
        
    }

    /**
     * Manipulates a PDF file src with the file dest as result
     * @param src the original PDF
     * @param dest the resulting PDF
     * @throws IOException
     * @throws DocumentException
     */
    public static void stamp(String src, String dest)
        throws IOException, DocumentException {
        PdfReader reader = new PdfReader(src);
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
        PdfContentByte canvas = stamper.getOverContent(1);
        ColumnText.showTextAligned(canvas,
                Element.ALIGN_LEFT, new Phrase("Hello people!"), 36, 540, 0);
        stamper.close();
    }

    /**
     * Manipulates a PDF file src with the file dest as result
     * @param src the original PDF
     * @param dest the resulting PDF
     * @throws IOException
     * @throws DocumentException
     */
    public static void stampIgnoreRotation(String src, String dest)
        throws IOException, DocumentException {
        PdfReader reader = new PdfReader(src);
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
        stamper.setRotateContents(false);
        PdfContentByte canvas = stamper.getOverContent(1);
        ColumnText.showTextAligned(canvas,
                Element.ALIGN_LEFT, new Phrase("Hello people!"), 36, 540, 0);
        stamper.close();
    }
}

2步创建PDF

第一步,创建文档内容,第二步,添加页码

import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.SQLException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.PdfWriter;

public class TwoPasses {

    /** The resulting PDF file. */
    public static final String RESULT
        = "results/part2/chapter06/page_x_of_y.pdf";

    /**
     * Main method.
     * @param    args    no arguments needed
     * @throws DocumentException 
     * @throws IOException 
     * @throws SQLException
     * @throws SQLException
     */
    public static void main(String[] args)
        throws SQLException, DocumentException, IOException {
    	
    	// FIRST PASS, CREATE THE PDF WITHOUT HEADER
    	
        // step 1
        Document document = new Document(PageSize.A4, 36, 36, 54, 36);
        // step 2
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        PdfWriter.getInstance(document, baos);
        // step 3
        document.open();
        // step 4
        // PDF文档创建...
        // step 5
        document.close();

        
        // SECOND PASS, ADD THE HEADER
        
        // Create a reader
        PdfReader reader = new PdfReader(baos.toByteArray());
        // Create a stamper
        PdfStamper stamper
            = new PdfStamper(reader, new FileOutputStream(RESULT));
        // Loop over the pages and add a header to each page
        int n = reader.getNumberOfPages();
        for (int i = 1; i <= n; i++) {
            getHeaderTable(i, n).writeSelectedRows(
                    0, -1, 34, 803, stamper.getOverContent(i));
        }
        // Close the stamper
        stamper.close();
    }
    
    /**
     * Create a header table with page X of Y
     * @param x the page number
     * @param y the total number of pages
     * @return a table that can be used as header
     */
    public static PdfPTable getHeaderTable(int x, int y) {
        PdfPTable table = new PdfPTable(2);
        table.setTotalWidth(527);
        table.setLockedWidth(true);
        table.getDefaultCell().setFixedHeight(20);
        table.getDefaultCell().setBorder(Rectangle.BOTTOM);
        table.addCell("FOOBAR FILMFESTIVAL");
        table.getDefaultCell().setHorizontalAlignment(Element.ALIGN_RIGHT);
        table.addCell(String.format("Page %d of %d", x, y));
        return table;
    }
}

添加公司信封到一个存在的文档

    /**
     * Manipulates a PDF file src with the file dest as result
     * @param src the original PDF
     * @param stationery a PDF that will be added as background
     * @param dest the resulting PDF
     * @throws IOException
     * @throws DocumentException
     */
    public void manipulatePdf(String src, String stationery, String dest)
        throws IOException, DocumentException {
    	// Create readers
        PdfReader reader = new PdfReader(src);
        PdfReader s_reader = new PdfReader(stationery);
        // Create the stamper
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
        // Add the stationery to each page
        PdfImportedPage page = stamper.getImportedPage(s_reader, 1);
        int n = reader.getNumberOfPages();
        PdfContentByte background;
        for (int i = 1; i <= n; i++) {
            background = stamper.getUnderContent(i);
            background.addTemplate(page, 0, 0);
        }
        // CLose the stamper
        stamper.close();
    }

插入页面到一个存在的文档

填充PDF表单

6.4 使用PdfCopy 拷贝页面

拼接和拆分PDF文档

拼接文档

import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.SQLException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.pdf.ColumnText;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;

public class ConcatenateStamp {

    /** The resulting PDF file. */
    public static final String RESULT
        = "results/part2/chapter06/concatenated_stamped.pdf";
    
    /**
     * Main method.
     * @param args no arguments needed
     * @throws DocumentException 
     * @throws IOException
     * @throws SQLException
     */
    public static void main(String[] args)
        throws IOException, DocumentException, SQLException {
    	// use old examples to create PDFs
        MovieLinks1.main(args);
        MovieHistory.main(args);
        // step 1
        Document document = new Document();
        // step 2
        PdfCopy copy = new PdfCopy(document, new FileOutputStream(RESULT));
        // step 3
        document.open();
        // step 4
        // reader for document 1
        PdfReader reader1 = new PdfReader(MovieLinks1.RESULT);
        int n1 = reader1.getNumberOfPages();
        // reader for document 2
        PdfReader reader2 = new PdfReader(MovieHistory.RESULT);
        int n2 = reader2.getNumberOfPages();
        // initializations
        PdfImportedPage page;
        PdfCopy.PageStamp stamp;
        // Loop over the pages of document 1
        for (int i = 0; i < n1; ) {
            page = copy.getImportedPage(reader1, ++i);
            stamp = copy.createPageStamp(page);
            // add page numbers
            ColumnText.showTextAligned(
                    stamp.getUnderContent(), Element.ALIGN_CENTER,
                    new Phrase(String.format("page %d of %d", i, n1 + n2)),
                    297.5f, 28, 0);
            stamp.alterContents();
            copy.addPage(page);
        }
        // Loop over the pages of document 2
        for (int i = 0; i < n2; ) {
            page = copy.getImportedPage(reader2, ++i);
            stamp = copy.createPageStamp(page);
            // add page numbers
            ColumnText.showTextAligned(
                    stamp.getUnderContent(), Element.ALIGN_CENTER,
                    new Phrase(String.format("page %d of %d", n1 + i, n1 + n2)),
                    297.5f, 28, 0);
            stamp.alterContents();
            copy.addPage(page);
        }
        // step 5
        document.close();
    }
}

拆分文档

import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfReader;

public class Burst {

    /** Format of the resulting PDF files. */
    public static final String RESULT
        = "D:/data/iText/inAction/chapter06/timetable_p%d.pdf";

    /**
     * Main method.
     * @param    args    no arguments needed
     * @throws DocumentException 
     * @throws IOException
     */
    public static void main(String[] args)
        throws IOException, DocumentException {
        // Create a reader
        PdfReader reader = new PdfReader("D:/data/iText/inAction/chapter03/movie_posters.pdf");
        // We'll create as many new PDFs as there are pages
        Document document;
        PdfCopy copy;
        // loop over all the pages in the original PDF
        int n = reader.getNumberOfPages();
        for (int i = 0; i < n; ) {
        	// step 1
            document = new Document();
            // step 2
            copy = new PdfCopy(document,
                new FileOutputStream(String.format(RESULT, ++i)));
            // step 3
            document.open();
            // step 4
            copy.addPage(copy.getImportedPage(reader, i));
            // step 5
            document.close();
        }
    }
    
    
}

PdfCopy VS PdfSmartCopy

PdfSmartCopy 继承自PdfCopy,其会检查每页添加的冗余对象, 因此可以节省大量磁盘空间或

带宽。这种额外的"智慧"是要付出代价的。PdfSmartCopy 需要更多的内存和时间去拼接文档。

文件大小、带宽优先,选PdfSmartCopy

内存、时间优先,选PdfCopy

拼接表单

相关推荐
一个处女座的程序猿4 小时前
LLMs之PDF:zeroX(一款PDF到Markdown 的视觉模型转换工具)的简介、安装和使用方法、案例应用之详细攻略
pdf·markdown·zerox
Dxy12393102164 小时前
python下载pdf
数据库·python·pdf
周亚鑫4 小时前
vue3 pdf base64转成文件流打开
前端·javascript·pdf
一名技术极客6 小时前
Vue2 doc、excel、pdf、ppt、txt、图片以及视频等在线预览
pdf·powerpoint·excel·文件在线预览
S. Dylan20 小时前
Edge浏览器打开PDF无法显示电子签章
edge·pdf
一马平川的大草原20 小时前
如何基于pdf2image实现pdf批量转换为图片
计算机视觉·pdf·文件拆分
m0_5945263020 小时前
Python批量合并多个PDF
java·python·pdf
hairenjing112320 小时前
将图片添加到 PDF 的 5 种方法
pdf
✿゚卡笨卡20 小时前
pdf 添加页眉页脚,获取前五页
java·pdf
blegn20 小时前
PDF编辑工具Adobe Acrobat DC 2023安装教程(附安装包)
pdf·办公软件·office