安装这两个库,第二个库一定要安装否则有些pdf文件读取会出现异常
读取
csharp
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Kernel.Pdf.Canvas.Parser.Listener;
namespace TestReadPdf
{
public static class PdfHelper
{
public static IEnumerable<string> ExtractText(string filename)
{
using (var r = new PdfReader(filename))
using (var doc = new PdfDocument(r))
{
for (int i = 1; i < doc.GetNumberOfPages(); i++)
{
ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
string text = PdfTextExtractor.GetTextFromPage(doc.GetPage(i), strategy);
yield return text;
}
}
}
}
}