笔者这里整理的jar包包含pdf读写的jar包,jar包如图1所示

图1
读取world文件代码如下
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.xwpf.usermodel.*;
import java.io.FileInputStream;
import java.util.List;
public class Testread {
public static void main(String[] args) {
String path="D://111.doc";
if(path.indexOf(".docx")>-1){
readdocxFile(path);
}else{
readDocTables("D://111.doc");
}
}
public static void readDocTables(String filePath) {
try (FileInputStream fis = new FileInputStream(filePath)) {
HWPFDocument document = new HWPFDocument(fis);
Range range = document.getRange();
TableIterator iterator = new TableIterator(range);
int tableCount = 0;
while (iterator.hasNext()) {
tableCount++;
Table table = iterator.next();
System.out.println("=== 表格 " + tableCount + " ===");
readOldTable(table);
}
if (tableCount == 0) {
System.out.println("文档中未找到表格");
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void readOldTable(Table table) {
for (int i = 0; i < table.numRows(); i++) {
org.apache.poi.hwpf.usermodel.TableRow row = table.getRow(i);
for (int j = 0; j < row.numCells(); j++) {
String cellText = row.getCell(j).getParagraph(0).text();
System.out.print(cellText.trim() + "\t");
}
System.out.println();
}
}
public static void readdocxFile(String path){
try (FileInputStream fis = new FileInputStream(path)) {
XWPFDocument document = new XWPFDocument(fis);
// 方法1:判断文档是否包含表格
List<XWPFTable> tables = document.getTables();
if (tables.isEmpty()) {
System.out.println("文档中未找到表格");
} else {
System.out.println("找到 " + tables.size() + " 个表格");
// 读取所有表格内容
readAllTables(document);
}
} catch (Exception e) {
e.printStackTrace();
}
}
// 读取所有表格
public static void readAllTables(XWPFDocument document) {
List<XWPFTable> tables = document.getTables();
for (int i = 0; i < tables.size(); i++) {
System.out.println("=== 表格 " + (i + 1) + " ===");
readTable(tables.get(i));
}
}
// 读取单个表格
public static void readTable(XWPFTable table) {
for (XWPFTableRow row : table.getRows()) {
for (XWPFTableCell cell : row.getTableCells()) {
// 读取单元格文本
String cellText = cell.getText();
System.out.print(cellText + "\t");
}
System.out.println(); // 换行
}
}
}