带标签的 PDF(也称为 PDF/UA)是一种包含底层标签树(类似于 HTML)的 PDF,用于定义文档的结构。这些标签可以帮助屏幕阅读器浏览整个文档而不会丢失任何信息。本文介绍如何使用Spire.PDF for .NET在 C# 和 VB.NET 中从头开始创建带标签的 PDF 。
Spire.PDF for .NET 是一款独立 PDF 控件,用于 .NET 程序中创建、编辑和操作 PDF 文档。使用 Spire.PDF 类库,开发人员可以新建一个 PDF 文档或者对现有的 PDF 文档进行处理,且无需安装 Adobe Acrobat。
E-iceblue功能类库Spire 系列文档处理组件均由中国本土团队研发,不依赖第三方软件,不受其他国家的技术或法律法规限制,同时适配国产操作系统如中科方德、中标麒麟等,兼容国产文档处理软件 WPS(如 .wps/.et/.dps 等格式
安装 Spire.PDF for .NET
首先,您需要将 Spire.PDF for.NET 包中包含的 DLL 文件作为引用添加到您的 .NET 项目中。
PM> Install-Package Spire.PDF
创建具有丰富元素的标签 PDF
要在带标签的 PDF 文档中添加结构元素,我们必须首先创建PdfTaggedContent 类的对象。然后,使用**PdfTaggedContent.StructureTreeRoot.AppendChildElement()**方法将元素添加到根。以下是使用 Spire.PDF for .NET 向带标签的 PDF 添加"标题"元素的详细步骤。
- 创建一个PdfDocument对象并使用 **PdfDocument.Pages.Add()**方法向其中添加一个页面。
- 创建PdfTaggedContent类的对象。
- **使用PdfTaggedContent.SetPdfUA1Identification()**方法使文档符合 PDF/UA 识别。
- **使用PdfTaggedContent.StructureTreeRoot.AppendChildElement()**方法将"文档"元素添加到文档的根目录。
- **使用PdfStructureElement.AppendChildElement()**方法在"document"元素下添加"heading"元素。
- **使用PdfStructureElement.BeginMarkedContent()**方法添加开始标签,指示标题元素的开始。
- **使用PdfPageBase.Canvas.DrawString()**方法在页面上绘制标题文本。
- **使用PdfStructureElement.BeginMarkedContent()**方法添加结束标签,这意味着标题元素在此结束。
- **使用PdfDocument.SaveToFile()**方法将文档保存为 PDF 文件。
以下代码片段提供了一个示例,说明如何在 C# 和 VB.NET 中在标记的 PDF 文档中创建各种元素,包括文档、标题、段落、图形和表格。
【C#】
using Spire.Pdf;
using Spire.Pdf.Graphics;
using Spire.Pdf.Interchange.TaggedPdf;
using Spire.Pdf.Tables;
using System.Data;
using System.Drawing;
namespace CreatePDFUA
{
class Program
{
static void Main(string[] args)
{
//Create a PdfDocument object
PdfDocument doc = new PdfDocument();
//Add a page
PdfPageBase page = doc.Pages.Add(PdfPageSize.A4, new PdfMargins(20));
//Set tab order
page.SetTabOrder(TabOrder.Structure);
//Create an object of PdfTaggedContent class
PdfTaggedContent taggedContent = new PdfTaggedContent(doc);
//Set language and title for the document
taggedContent.SetLanguage("en-US");
taggedContent.SetTitle("test");
//Set PDF/UA1 identification
taggedContent.SetPdfUA1Identification();
//Create font and brush
PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("Times New Roman", 14), true);
PdfSolidBrush brush = new PdfSolidBrush(Color.Black);
//Add a "document" element
PdfStructureElement document = taggedContent.StructureTreeRoot.AppendChildElement(PdfStandardStructTypes.Document);
//Add a "heading" element
PdfStructureElement heading1 = document.AppendChildElement(PdfStandardStructTypes.HeadingLevel1);
heading1.BeginMarkedContent(page);
string headingText = "What Is a Tagged PDF?";
page.Canvas.DrawString(headingText, font, brush, new PointF(0, 0));
heading1.EndMarkedContent(page);
//Add a "paragraph" element
PdfStructureElement paragraph = document.AppendChildElement(PdfStandardStructTypes.Paragraph);
paragraph.BeginMarkedContent(page);
string paragraphText = "“Tagged PDF” doesn’t seem like a life-changing term. But for some, it is. For people who are " +
"blind or have low vision and use assistive technology (such as screen readers and connected Braille displays) to " +
"access information, an untagged PDF means they are missing out on information contained in the document because assistive " +
"technology cannot “read” untagged PDFs. Digital accessibility has opened up so many avenues to information that were once " +
"closed to people with visual disabilities, but PDFs often get left out of the equation.";
RectangleF rect = new RectangleF(0, 30, page.Canvas.ClientSize.Width, page.Canvas.ClientSize.Height);
page.Canvas.DrawString(paragraphText, font, brush, rect);
paragraph.EndMarkedContent(page);
//Add a "figure" element to
PdfStructureElement figure = document.AppendChildElement(PdfStandardStructTypes.Figure);
figure.BeginMarkedContent(page);
PdfImage image = PdfImage.FromFile(@"C:\Users\Administrator\Desktop\pdfua.png");
page.Canvas.DrawImage(image, new PointF(0, 150));
figure.EndMarkedContent(page);
//Add a "table" element
PdfStructureElement table = document.AppendChildElement(PdfStandardStructTypes.Table);
table.BeginMarkedContent(page);
PdfTable pdfTable = new PdfTable();
pdfTable.Style.DefaultStyle.Font = font;
DataTable dataTable = new DataTable();
dataTable.Columns.Add("Name");
dataTable.Columns.Add("Age");
dataTable.Columns.Add("Sex");
dataTable.Rows.Add(new string[] { "John", "22", "Male" });
dataTable.Rows.Add(new string[] { "Katty", "25", "Female" });
pdfTable.DataSource = dataTable;
pdfTable.Style.ShowHeader = true;
pdfTable.Draw(page.Canvas, new PointF(0, 280), 300f);
table.EndMarkedContent(page);
//Save the document to file
doc.SaveToFile("CreatePDFUA.pdf");
}
}
}
【VB.NET】
Imports Spire.Pdf
Imports Spire.Pdf.Graphics
Imports Spire.Pdf.Interchange.TaggedPdf
Imports Spire.Pdf.Tables
Imports System.Data
Imports System.Drawing
Namespace CreatePDFUA
Class Program
Shared Sub Main(ByVal args() As String)
'Create a PdfDocument object
Dim doc As PdfDocument = New PdfDocument()
'Add a page
Dim page As PdfPageBase = doc.Pages.Add(PdfPageSize.A4,New PdfMargins(20))
'Set tab order
page.SetTabOrder(TabOrder.Structure)
'Create an object of PdfTaggedContent class
Dim taggedContent As PdfTaggedContent = New PdfTaggedContent(doc)
'Set language and title for the document
taggedContent.SetLanguage("en-US")
taggedContent.SetTitle("test")
'Set PDF/UA1 identification
taggedContent.SetPdfUA1Identification()
'Create font and brush
Dim font As PdfTrueTypeFont = New PdfTrueTypeFont(New Font("Times New Roman",14),True)
Dim brush As PdfSolidBrush = New PdfSolidBrush(Color.Black)
'Add a "document" element
Dim document As PdfStructureElement = taggedContent.StructureTreeRoot.AppendChildElement(PdfStandardStructTypes.Document)
'Add a "heading" element
Dim heading1 As PdfStructureElement = document.AppendChildElement(PdfStandardStructTypes.HeadingLevel1)
heading1.BeginMarkedContent(page)
Dim headingText As String = "What Is a Tagged PDF?"
page.Canvas.DrawString(headingText,font,brush,New PointF(0,0))
heading1.EndMarkedContent(page)
'Add a "paragraph" element
Dim paragraph As PdfStructureElement = document.AppendChildElement(PdfStandardStructTypes.Paragraph)
paragraph.BeginMarkedContent(page)
String paragraphText = "“Tagged PDF” doesn’t seem like a life-changing term. But for some, it is. For people who are " +
"blind or have low vision and use assistive technology (such as screen readers and connected Braille displays) to " +
"access information, an untagged PDF means they are missing out on information contained in the document because assistive " +
"technology cannot “read” untagged PDFs. Digital accessibility has opened up so many avenues to information that were once " +
"closed to people with visual disabilities, but PDFs often get left out of the equation."
Dim rect As RectangleF = New RectangleF(0,30,page.Canvas.ClientSize.Width,page.Canvas.ClientSize.Height)
page.Canvas.DrawString(paragraphText, font, brush, rect)
paragraph.EndMarkedContent(page)
'Add a "figure" element to
Dim figure As PdfStructureElement = document.AppendChildElement(PdfStandardStructTypes.Figure)
figure.BeginMarkedContent(page)
Dim image As PdfImage = PdfImage.FromFile("C:\Users\Administrator\Desktop\pdfua.png")
page.Canvas.DrawImage(image,New PointF(0,150))
figure.EndMarkedContent(page)
'Add a "table" element
Dim table As PdfStructureElement = document.AppendChildElement(PdfStandardStructTypes.Table)
table.BeginMarkedContent(page)
Dim pdfTable As PdfTable = New PdfTable()
pdfTable.Style.DefaultStyle.Font = font
Dim dataTable As DataTable = New DataTable()
dataTable.Columns.Add("Name")
dataTable.Columns.Add("Age")
dataTable.Columns.Add("Sex")
Dim String() As dataTable.Rows.Add(New
{
"John", "22", "Male"
}
)
Dim String() As dataTable.Rows.Add(New
{
"Katty", "25", "Female"
}
)
pdfTable.DataSource = dataTable
pdfTable.Style.ShowHeader = True
pdfTable.Draw(page.Canvas,New PointF(0,280),300f)
table.EndMarkedContent(page)
'Save the document to file
doc.SaveToFile("CreatePDFUA.pdf")
End Sub
End Class
End Namespace
