bash
# 安装LibreOffice(如果尚未安装)
sudo apt update
sudo apt install libreoffice
# 将DOC转换为PDF
libreoffice --headless --convert-to pdf 你的文档.doc
# 或者指定输出目录
libreoffice --headless --convert-to pdf --outdir /输出目录 你的文档.doc
python
python
import subprocess
import os
import sys
def convert_doc_to_pdf(doc_path, output_dir=None):
"""
将DOC文档转换为PDF
:param doc_path: DOC文档路径
:param output_dir: 输出目录(可选)
:return: 转换后的PDF路径,失败返回None
"""
# 检查文件是否存在
if not os.path.exists(doc_path):
print(f"错误:文件不存在 {doc_path}")
return None
# 检查文件格式
if not doc_path.lower().endswith(('.doc', '.docx')):
print(f"错误:不支持的文件格式 {doc_path}")
return None
# 设置输出目录
if output_dir is None:
output_dir = os.path.dirname(doc_path)
else:
os.makedirs(output_dir, exist_ok=True)
# 构建输出文件路径
base_name = os.path.splitext(os.path.basename(doc_path))[0]
pdf_path = os.path.join(output_dir, f"{base_name}.pdf")
try:
# 使用LibreOffice进行转换
cmd = [
'libreoffice',
'--headless',
'--convert-to', 'pdf',
'--outdir', output_dir,
doc_path
]
print(f"正在转换: {doc_path} -> {pdf_path}")
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if result.returncode == 0:
print(f"转换成功: {pdf_path}")
return pdf_path
else:
print(f"转换失败: {result.stderr}")
return None
except subprocess.TimeoutExpired:
print("转换超时")
return None
except FileNotFoundError:
print("未找到LibreOffice,请先安装: sudo apt install libreoffice")
return None
except Exception as e:
print(f"转换过程中出错: {e}")
return None
# 批量转换函数
def batch_convert_docs_to_pdf(directory, output_dir=None):
"""
批量转换目录中的所有DOC文档为PDF
"""
if output_dir is None:
output_dir = directory
supported_extensions = ('.doc', '.docx')
converted_files = []
for filename in os.listdir(directory):
if filename.lower().endswith(supported_extensions):
doc_path = os.path.join(directory, filename)
pdf_path = convert_doc_to_pdf(doc_path, output_dir)
if pdf_path:
converted_files.append(pdf_path)
return converted_files
# 使用示例
if __name__ == "__main__":
# 单个文件转换
doc_file = "/home/yklele/anli.doc"
pdf_file = convert_doc_to_pdf(doc_file)
if pdf_file:
print(f"转换完成: {pdf_file}")
else:
print("转换失败")
# 批量转换示例
# docs_directory = "/path/to/your/documents"
# converted_files = batch_convert_docs_to_pdf(docs_directory)
# print(f"批量转换完成: {len(converted_files)} 个文件")
comtypes
python
import os
import comtypes.client
import pythoncom
def convert_doc_to_pdf_wps(doc_path, pdf_path=None):
"""
使用WPS进行DOC到PDF的转换(需要安装WPS)
"""
if not os.path.exists(doc_path):
return False
if pdf_path is None:
pdf_path = os.path.splitext(doc_path)[0] + '.pdf'
try:
# 初始化COM
pythoncom.CoInitialize()
# 创建WPS应用对象
wps = comtypes.client.CreateObject("KWPS.Application")
wps.Visible = False
# 打开文档
doc = wps.Documents.Open(doc_path)
# 另存为PDF
doc.SaveAs(pdf_path, FileFormat=17) # 17代表PDF格式
# 关闭文档和应用
doc.Close()
wps.Quit()
print(f"转换成功: {pdf_path}")
return True
except Exception as e:
print(f"转换失败: {e}")
return False
finally:
pythoncom.CoUninitialize()