pip install pywin32
import os
import win32com.client
import pythoncom # 新增:用于处理COM线程
import sys
def docx_to_pdf(docx_path, pdf_path=None):
"""将Word文档转换为PDF格式,修复退出时的COM错误"""
if not os.path.exists(docx_path):
raise FileNotFoundError(f"文件不存在: {docx_path}")
if pdf_path is None:
pdf_path = os.path.splitext(docx_path)[0] + ".pdf"
# 初始化COM线程(避免线程相关的错误)
pythoncom.CoInitialize()
word = None
doc = None
try:
# 创建Word应用对象
word = win32com.client.Dispatch("Word.Application")
word.Visible = False
# 打开文档(添加只读参数,避免锁定文件)
doc = word.Documents.Open(docx_path, ReadOnly=True)
# 保存为PDF
doc.SaveAs2(pdf_path, FileFormat=17)
print(f"转换成功: {pdf_path}")
except Exception as e:
print(f"转换失败: {str(e)}")
finally:
# 先关闭文档(确保文档先释放)
if doc is not None:
doc.Close(SaveChanges=0) # 0表示不保存更改
doc = None # 显式释放对象
# 再退出Word(确保文档关闭后再退出)
if word is not None:
try:
word.Quit()
except Exception as e:
# 忽略退出时的错误(因为转换已成功)
print(f"关闭Word时警告: {str(e)}")
word = None # 显式释放对象
# 释放COM资源
pythoncom.CoUninitialize()
if __name__ == "__main__":
# 检查命令行参数
if len(sys.argv) < 2:
print("单文件Word转PDF转换器")
print("用法: python docx_to_pdf.py <Word文件路径> [输出PDF路径]")
print("示例1: python docx_to_pdf.py D:\\d\\a.docx")
print("示例2: python docx_to_pdf.py D:\\d\\a.docx D:\\d\\output.pdf")
sys.exit(1)
# 获取输入文件路径
input_file = sys.argv[1]
# 获取输出文件路径(如果提供了)
output_file = sys.argv[2] if len(sys.argv) > 2 else None
# 检查输入文件是否存在
if not os.path.exists(input_file):
print(f"错误: 输入文件不存在: {input_file}")
sys.exit(1)
# 检查输入文件是否为Word文档
if not input_file.lower().endswith(('.doc', '.docx')):
print(f"错误: 输入文件不是Word文档: {input_file}")
sys.exit(1)
# 执行转换
try:
docx_to_pdf(input_file, output_file)
print("转换完成!")
except Exception as e:
print(f"转换过程中发生错误: {str(e)}")
sys.exit(1)
然后在java里面调用这个脚本
bash
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class PythonScriptCaller {
/**
* 调用Python脚本实现Word转PDF
* @param scriptPath Python脚本的绝对路径
* @param docxPath 需要转换的Word文档路径
* @return 转换结果(成功/失败信息)
*/
public static String callDocxToPdfScript(String scriptPath, String docxPath) {
// 构建命令:python 脚本路径 文档路径(通过参数传递docx路径,增强灵活性)
String[] command = {"python", scriptPath, docxPath};
ProcessBuilder processBuilder = new ProcessBuilder(command);
// 合并错误流到输出流,方便统一处理
processBuilder.redirectErrorStream(true);
Process process = null;
StringBuilder result = new StringBuilder();
try {
// 启动进程执行命令
process = processBuilder.start();
// 读取脚本输出
InputStream inputStream = process.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "GBK")); // 适配中文输出
String line;
while ((line = reader.readLine()) != null) {
result.append(line).append("\n");
}
// 等待进程执行完成
int exitCode = process.waitFor();
if (exitCode == 0) {
result.append("脚本执行成功,退出码:").append(exitCode);
} else {
result.append("脚本执行失败,退出码:").append(exitCode);
}
} catch (IOException e) {
result.append("执行脚本时发生IO错误:").append(e.getMessage());
} catch (InterruptedException e) {
result.append("脚本执行被中断:").append(e.getMessage());
Thread.currentThread().interrupt(); // 恢复中断状态
} finally {
if (process != null) {
process.destroy(); // 确保进程销毁
}
}
return result.toString();
}
// 测试方法
public static void main(String[] args) {
// 替换为实际的脚本路径和Word文档路径
String scriptPath = "D:\\git\\docx_to_pdf\\docx_to_pdf.py";
String docxPath = "D:\\d\\a.docx";
String result = callDocxToPdfScript(scriptPath, docxPath);
System.out.println("转换结果:\n" + result);
}
}
批量递归
bash
import os
import win32com.client
import pythoncom
import shutil
import sys
def convert_all_docs_to_pdf(source_dir, target_dir):
"""
递归遍历源目录,将所有Word文档转换为PDF并保存到目标目录
Args:
source_dir: 源目录路径
target_dir: 目标目录路径
"""
# 确保目标目录存在
if not os.path.exists(target_dir):
os.makedirs(target_dir)
# 遍历源目录
for root, dirs, files in os.walk(source_dir):
# 计算相对路径
relative_path = os.path.relpath(root, source_dir)
if relative_path == '.':
relative_path = ''
# 创建对应的目标目录
target_subdir = os.path.join(target_dir, relative_path)
if not os.path.exists(target_subdir):
os.makedirs(target_subdir)
# 处理当前目录下的文件
for file in files:
if file.endswith(('.doc', '.docx')):
# 源文件路径
source_file_path = os.path.join(root, file)
# 目标PDF文件路径(保持相同文件名但扩展名为.pdf)
pdf_filename = os.path.splitext(file)[0] + '.pdf'
target_file_path = os.path.join(target_subdir, pdf_filename)
# 转换文件
print(f"正在转换: {source_file_path}")
docx_to_pdf(source_file_path, target_file_path)
print("所有文件转换完成!")
def docx_to_pdf(docx_path, pdf_path=None):
"""将Word文档转换为PDF格式,修复退出时的COM错误"""
if not os.path.exists(docx_path):
raise FileNotFoundError(f"文件不存在: {docx_path}")
if pdf_path is None:
pdf_path = os.path.splitext(docx_path)[0] + ".pdf"
# 初始化COM线程(避免线程相关的错误)
pythoncom.CoInitialize()
word = None
doc = None
try:
# 创建Word应用对象
word = win32com.client.Dispatch("Word.Application")
word.Visible = False
# 打开文档(添加只读参数,避免锁定文件)
doc = word.Documents.Open(docx_path, ReadOnly=True)
# 保存为PDF
doc.SaveAs2(pdf_path, FileFormat=17)
print(f"转换成功: {pdf_path}")
except Exception as e:
print(f"转换失败: {str(e)}")
finally:
# 先关闭文档(确保文档先释放)
if doc is not None:
doc.Close(SaveChanges=0) # 0表示不保存更改
doc = None # 显式释放对象
# 再退出Word(确保文档关闭后再退出)
if word is not None:
try:
word.Quit()
except Exception as e:
# 忽略退出时的错误(因为转换已成功)
print(f"关闭Word时警告: {str(e)}")
word = None # 显式释放对象
# 释放COM资源
pythoncom.CoUninitialize()
if __name__ == "__main__":
# 检查命令行参数
if len(sys.argv) < 3:
print("批量转换Word文档到PDF")
print("用法: python batch_doc_to_pdf.py <源目录> <目标目录>")
print("示例: python batch_doc_to_pdf.py D:\\d1 D:\\d2")
sys.exit(1)
source_directory = sys.argv[1]
target_directory = sys.argv[2]
# 检查源目录是否存在
if not os.path.exists(source_directory):
print(f"错误: 源目录不存在: {source_directory}")
sys.exit(1)
# 检查源目录和目标目录是否相同(防止误操作)
if os.path.abspath(source_directory) == os.path.abspath(target_directory):
print("错误: 源目录和目标目录不能相同")
sys.exit(1)
print(f"开始转换: {source_directory} -> {target_directory}")
# 执行转换
convert_all_docs_to_pdf(source_directory, target_directory)
java调用python 批量脚本:
bash
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class PythonBatchConverter {
/**
* 调用Python批量转换脚本实现Word转PDF
* @param scriptPath Python脚本的绝对路径
* @param sourceDir 源目录路径
* @param targetDir 目标目录路径
* @return 转换结果(成功/失败信息)
*/
public static String callBatchConversionScript(String scriptPath, String sourceDir, String targetDir) {
// 构建命令:python 脚本路径 源目录 目标目录
String[] command = {"python", scriptPath, sourceDir, targetDir};
ProcessBuilder processBuilder = new ProcessBuilder(command);
// 合并错误流到输出流,方便统一处理
processBuilder.redirectErrorStream(true);
Process process = null;
StringBuilder result = new StringBuilder();
try {
// 启动进程执行命令
process = processBuilder.start();
// 读取脚本输出
InputStream inputStream = process.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "GBK")); // 适配中文输出
String line;
while ((line = reader.readLine()) != null) {
result.append(line).append("\n");
}
// 等待进程执行完成
int exitCode = process.waitFor();
if (exitCode == 0) {
result.append("批量转换执行成功,退出码:").append(exitCode);
} else {
result.append("批量转换执行失败,退出码:").append(exitCode);
}
} catch (IOException e) {
result.append("执行脚本时发生IO错误:").append(e.getMessage());
} catch (InterruptedException e) {
result.append("脚本执行被中断:").append(e.getMessage());
Thread.currentThread().interrupt(); // 恢复中断状态
} finally {
if (process != null) {
process.destroy(); // 确保进程销毁
}
}
return result.toString();
}
/**
* 调用单个文件转换脚本实现Word转PDF
* @param scriptPath Python脚本的绝对路径
* @param docxPath 需要转换的Word文档路径
* @return 转换结果(成功/失败信息)
*/
public static String callSingleFileScript(String scriptPath, String docxPath) {
// 构建命令:python 脚本路径 文档路径
String[] command = {"python", scriptPath, docxPath};
ProcessBuilder processBuilder = new ProcessBuilder(command);
// 合并错误流到输出流,方便统一处理
processBuilder.redirectErrorStream(true);
Process process = null;
StringBuilder result = new StringBuilder();
try {
// 启动进程执行命令
process = processBuilder.start();
// 读取脚本输出
InputStream inputStream = process.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "GBK")); // 适配中文输出
String line;
while ((line = reader.readLine()) != null) {
result.append(line).append("\n");
}
// 等待进程执行完成
int exitCode = process.waitFor();
if (exitCode == 0) {
result.append("单文件转换执行成功,退出码:").append(exitCode);
} else {
result.append("单文件转换执行失败,退出码:").append(exitCode);
}
} catch (IOException e) {
result.append("执行脚本时发生IO错误:").append(e.getMessage());
} catch (InterruptedException e) {
result.append("脚本执行被中断:").append(e.getMessage());
Thread.currentThread().interrupt(); // 恢复中断状态
} finally {
if (process != null) {
process.destroy(); // 确保进程销毁
}
}
return result.toString();
}
// 测试方法 - 批量转换
public static void main(String[] args) {
// 替换为实际的脚本路径和目录路径
String scriptPath = "D:\\git\\docx_to_pdf\\batch_doc_to_pdf.py";
String sourceDir = "D:\\d1";
String targetDir = "D:\\d2";
String result = callBatchConversionScript(scriptPath, sourceDir, targetDir);
System.out.println("批量转换结果:\n" + result);
// 测试单文件转换
String singleFileScriptPath = "D:\\git\\docx_to_pdf\\docx_to_pdf.py";
String docxPath = "D:\\d1\\a.docx";
String singleResult = callSingleFileScript(singleFileScriptPath, docxPath);
System.out.println("单文件转换结果:\n" + singleResult);
}
}