VBA是无法直接读取PDF文档的,但结合上期我给大家介绍了PDF转换工具xpdf-tools-4.05,先利用它将PDF文档转换为TXT文档,然后再将TXT的内容写入Excel,这样就间接实现了将PDF文档的内容导入Excel的操作。下面的代码将向大家演示如何实现这一操作:
Sub Import_PDF_File()
'
' 导入PDF文档
'
Dim pdftotext As String
pdftotext = "C:\Program Files (x86)\xpdf-tools\bin32\pdftotext.exe"
Dim fd As fileDialog
Dim filePath As String
' 文件选择对话框
Set fd = Application.fileDialog(msoFileDialogFilePicker)
With fd
.Title = "选择PDF文件"
.InitialFileName = UserDirectory ' 设置默认路径
.Filters.Clear
.Filters.Add "PDF文件", "*.pdf"
.AllowMultiSelect = False
If .Show <> -1 Then Exit Sub
filePath = .SelectedItems(1)
End With
Cells.ClearContents ' 清除所有数据
Application.ScreenUpdating = False ' 禁用屏幕更新
Dim InputFile As String, OutputFile As String
InputFile = filePath
OutputFile = Left(filePath, Len(filePath) - 4) & ".txt"
' 转换PDF为TXT文档
Dim shellCommand As String
shellCommand = pdftotext & " -layout -enc UTF-8 """ & InputFile & """ """ & OutputFile & """"
Shell shellCommand, vbHide
' 等待转换完成
Application.Wait Now + TimeValue("00:00:02")
Dim txtPath As String
txtPath = OutputFile
If UTF8TOANSI(txtPath) = False Then MsgBox "转换ANSI编码失败" & txtPath, vbCritical, "错误"
Dim TxtPathANSI As String
TxtPathANSI = Left(txtPath, Len(txtPath) - 4) & "_ANSI.txt"
' 读取文本文件
Dim i As Long
Dim Line As String
Dim LineNum As Long
Dim symbols As String
Open TxtPathANSI For Input As #1
i = 0
Do While Not EOF(1)
Line Input #1, Line
i = i + 1
Cells(i, 1).Value = Line
LineNum = i
Loop
Close #1
' 删除临时文件
Kill txtPath
Kill TxtPathANSI
Columns("A:A").Select
With Selection
.HorizontalAlignment = xlLeft ' 左对齐
End With
Range("A1").Select
Application.ScreenUpdating = True ' 启用屏幕更新
MsgBox "成功导入 " & LineNum & " 行数据。", vbInformation, "提示"
End Sub
该操作只适合文字版的PDF,不适合图片版的PDF,也就是说如果你的PDF是使用扫描仪生成的那该方法不适合。另外,导入txt文档需要使用UTF8TOANSI函数将UTF8编码转换为ANSI编码,否则可能导入的是乱码,关于该函数的使用方法详见《 VBA转换TXT文档编码(UTF-8转换为ANSI)》