目录
使用mammoth库
mammoth库支持将word转为HTML和markdown格式的文件。
python
import mammoth
def word_html(word_file):
html_save_name = fr'{word_file.split('.')[0]}.html'
with open(word_file, 'rb') as f:
data = mammoth.convert_to_html(f)
with open(html_save_name, 'w') as f:
f.write(data.value)
使用spire.doc库
强大的word文件处理库,不太好的就是商业库转换出的文件有水印。
python
from spire.doc import Document, FileFormat
def word_html(word_file):
html_save_name = fr'{word_file.split('.')[0]}.html'
doc = Document()
doc.LoadFromFile(word_file)
doc.SaveToFile(html_save_name, FileFormat.Html)
doc.Close()
with open(html_save_name, 'r', encoding='utf-8') as f:
data = f.read().replace('Evaluation Warning: The document was created with Spire.Doc for Python.', '') # 去掉商业库spire.doc生成的水印信息
with open(html_save_name, 'w', encoding='utf-8') as f:
f.write(data)