有两种方法:
通过提供的现成api进行调用读取pdf文件,或doc、xlsx、pptx文件;可能商业需要付费
https://www.e-iceblue.cn/pdf_java_document_operation/set-pdf-document-properties-in-java.html
Spire.PDF for Java
java
import com.spire.pdf.*;
import java.io.*;
public class getPDFProperties {
public static void main(String[] args) throws IOException {
//创建 PdfDocument 类的对象
PdfDocument pdf = new PdfDocument();
//从磁盘加载PDF文档
pdf.loadFromFile("" + "setPDFProperties.pdf");
//创建 StringBuilder 的对象以储存获取的属性数据
StringBuilder stringBuilder = new StringBuilder();
//获取PDF文档的属性数据并储存于创建的 StringBuilder
stringBuilder.append("标题:" + pdf.getDocumentInformation().getTitle() + "\r\n");
stringBuilder.append("作者" + pdf.getDocumentInformation().getAuthor() + "\r\n");
stringBuilder.append("主题:" + pdf.getDocumentInformation().getSubject() + "\r\n");
stringBuilder.append("关键词:" + pdf.getDocumentInformation().getKeywords() + "\r\n");
stringBuilder.append("创建者:" + pdf.getDocumentInformation().getCreator() + "\r\n");
stringBuilder.append("创建时间:" + pdf.getDocumentInformation().getCreationDate() + "\r\n");
stringBuilder.append("制作工具:" + pdf.getDocumentInformation().getProducer() + "\r\n");
//创建一个TXT文件
File file = new File("getPDFProperties.txt");
file.createNewFile();
//将 StringBuilder 写入TXT文件
FileWriter fileWriter = new FileWriter(file, true);
BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);
bufferedWriter.write(stringBuilder.toString());
bufferedWriter.flush();
}
}
第二种方法:
通过Apache POI进行读取实现;另外不同版本 方法实现也会有所不同;
Apache POI 的不同版本
引入依赖
java
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.24</version> <!-- 请检查最新版本 -->
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version> <!-- 检查是否有更新的版本 -->
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.17.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version> <!-- 请检查最新版本 -->
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
最后直接提供方法实现:
java
package com.ruoyi.project.backstage.pdf;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.impl.values.XmlComplexContentImpl;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
/**
* @className: FileUtils
* @author: 3.0
* @date: 2024/10/16
* @Version: 1.0
* @description:
*/
public class FileUtils {
// public static void main(String[] args) {
// try (PDDocument document = PDDocument.load(new File("E:\\project\\" + "1.pdf"))) {
// PDDocumentInformation info = document.getDocumentInformation();
// System.out.println("Title: " + info.getTitle());
// System.out.println("Author: " + info.getAuthor());
// System.out.println("Subject: " + info.getSubject());
// // 其他属性...
// } catch (IOException e) {
// e.printStackTrace();
// }
//
// }
// public static void main(String[] args) throws Exception {
// XWPFDocument doc = new XWPFDocument(new FileInputStream(new File("E:\\project\\log.sh用法(1).docx")));
CTProperties coreProps = doc.getProperties().getCoreProperties();
// POIXMLProperties.CoreProperties coreProps = doc.getProperties().getCoreProperties();
// System.out.println("Title: " + coreProps.getTitle());
// System.out.println("Author: " + coreProps.getCreator());
// System.out.println("主题: " + coreProps.getSubject());
// // 更多属性...
// doc.close();
// }
public static void main(String[] args) throws Exception {
Workbook workbook = WorkbookFactory.create(new FileInputStream(new File("E:\\project\\工作簿1 - 副本.xls")));
if (workbook instanceof POIXMLDocument) {
POIXMLDocument poixmlDocument = (POIXMLDocument) workbook;
POIXMLProperties properties = poixmlDocument.getProperties();
POIXMLProperties.CoreProperties coreProperties = properties.getCoreProperties();
// 现在你可以访问核心属性了
String title = coreProperties.getTitle();
String subject = coreProperties.getSubject();
String creator = coreProperties.getCreator();
// ... 其他属性
// 打印属性到控制台
System.out.println("Title: " + title);
System.out.println("Subject: " + subject);
System.out.println("Creator: " + creator);
// ...
} else {
System.out.println("The workbook is not a POIXMLDocument (not an .xlsx file?).");
}
// 关闭工作簿(在try-with-resources中自动关闭fis,但这里显式关闭workbook以强调)
workbook.close();
}
//
// public static void main(String[] args) throws Exception {
// XMLSlideShow ppt = new XMLSlideShow(OPCPackage.open(new FileInputStream(new File("E:\\project\\演示文稿1.pptx"))));
// System.out.println("Title: " + ppt.getProperties().getCoreProperties().getTitle());
// System.out.println("Author: " + ppt.getProperties().getCoreProperties().getCreator());
// System.out.println("主题: " + ppt.getProperties().getCoreProperties().getSubject());
// // 更多属性...
// ppt.close();
// }
}
如果有需要读取https地址的需求;可以实现下面的:
从远程 HTTPS URL 读取文件并将其转换为 FileInputStream 对象,你可以先将远程文件下载到本地磁盘,然后再使用 FileInputStream 打开它。以下是实现这一过程的一种方法:
下载文件到本地:
1、使用 Java 的 HttpURLConnection 或者 HttpClient 等工具来下载文件。
2、创建 FileInputStream:使用下载后的本地文件路径创建 FileInputStream。
java
public static void main(String[] args) throws Exception {
String remoteUrl = "http://s3.api.com/diaoyun//survey/answer/.xlsx";
String localPath = "E:\\project\\file11.xlsx"; // 本地临时文件路径
downloadFileFromURL(remoteUrl, localPath);
File file = new File(localPath);
FileInputStream fileInputStream = new FileInputStream(file);
Workbook workbook = WorkbookFactory.create(fileInputStream);
// Workbook workbook = WorkbookFactory.create(new FileInputStream(new File("E:\\project\\工作簿1 - 副本.xls")));
if (workbook instanceof POIXMLDocument) {
POIXMLDocument poixmlDocument = (POIXMLDocument) workbook;
POIXMLProperties properties = poixmlDocument.getProperties();
POIXMLProperties.CoreProperties coreProperties = properties.getCoreProperties();
// 现在你可以访问核心属性了
String title = coreProperties.getTitle();
String subject = coreProperties.getSubject();
String creator = coreProperties.getCreator();
// ... 其他属性
// 打印属性到控制台
System.out.println("Title: " + title);
System.out.println("Subject: " + subject);
System.out.println("Creator: " + creator);
// ...
} else {
System.out.println("The workbook is not a POIXMLDocument (not an .xlsx file?).");
}
// 关闭工作簿(在try-with-resources中自动关闭fis,但这里显式关闭workbook以强调)
workbook.close();
file.delete();
}
private static void downloadFileFromURL(String urlStr, String localPath) throws IOException {
URL url = new URL(urlStr);
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
try (InputStream in = urlConnection.getInputStream();
FileOutputStream out = new FileOutputStream(localPath)) {
byte[] buffer = new byte[1024];
int bytesRead;
while ((bytesRead = in.read(buffer)) != -1) {
out.write(buffer, 0, bytesRead);
}
}
}