python-docx:操作word
读取word
- 安装:
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple python-docx
- 使用:
import docx
3.使用document获取对象
注意只支持.docx 格式 不支持doc
from docx import Document
#返回word对象
documnet_obj=Document(word地址)
- 段落的读取
#通过循环每个段落的对象,并调用text
for p in documnet_obj.paragraphs:
print(p.text)
5.表格内容的读取
#通过循环获取行列的内容,每个表格字段字符串
for t in documnet_obj.tables:
for row in t.rows:
for cell in row.cells:
print(cell.text)
#根据列读取
for t in documnet_obj.tables:
for col in t.columns:
for cell in col.cells:
print(cell.text)
创建word
- 生成标题
document_obj=Document()
#标题样式等级0-9
titleobj=document_obj.add_heading(标题内容,标题样式等级)
#内容追加
titleobj.add_run(字符串)
#保存文档
document_obj.save("xx.docx")
2.保存文档
document_obj=Document()
document_obj.save("xx.docx")
3.添加段落
paraobj=document_obj.add_paragraph("段落内容")
#内容追加
paraobj.add_run("字符串内容")
#可以使用\n来进行换行
4.添加图片
#宽高的定义
from docx.shared import Inches
image_obj=document_obj.add_picture("图片地址",宽,高)
image_obj=document_obj.add_picture("图片地址",width=Inches(5),height=Inches(5))
5.添加表格:add_table
data:image/s3,"s3://crabby-images/456b7/456b796557747d43f251180d9b65fc117b713f5e" alt=""
6.分页 add_page_break()
data:image/s3,"s3://crabby-images/86701/86701f359aa8a8b43056158747b92545a0480c48" alt=""
- 设置全局样式 document_obj.styles['Noemal']
- 查看所有样式
data:image/s3,"s3://crabby-images/1956a/1956a31fbd6407d8d40f2a197fdb0ee5051aa4aa" alt=""
- 设置全局样式
data:image/s3,"s3://crabby-images/53501/535016f9d4c80cca5a33059315be188528222dad" alt=""
- 文本样式
data:image/s3,"s3://crabby-images/bb700/bb70059178eac321c9bb0b30d884a801f8e39f2a" alt=""
data:image/s3,"s3://crabby-images/4f7db/4f7db0c1a430082f454ffa95b5aacd53efbfe189" alt=""
- 图片样式
data:image/s3,"s3://crabby-images/d59ae/d59ae9069dea363714d5867806d55c1748dacd34" alt=""
- 表格样式
data:image/s3,"s3://crabby-images/3640d/3640d634051bae50cef3cb8ded0a1793f9e6211b" alt=""
生成pdf
- pdf工具包:pdfkit
data:image/s3,"s3://crabby-images/04054/04054e6c1f8665557c3f3b253d0d39ec3a6ffc4b" alt=""
https://wkhtmltopdf.org/downloads.html中可以下载不同系统的模块
2.html转pdf
data:image/s3,"s3://crabby-images/6dd4e/6dd4e9e305dd4d82b86d335e5ccb83c14604046f" alt=""
3.网址转pdf
data:image/s3,"s3://crabby-images/80a23/80a23a2c19b51b420aaad27f5013cf23662c82ea" alt=""
4.字符串生成pdf
data:image/s3,"s3://crabby-images/ca1f5/ca1f5377c28c4a51c8f3623584a1fa9051f13e6e" alt=""
5.word转html
data:image/s3,"s3://crabby-images/ed634/ed6348a4e9dea424766f51f88f168e3b28866d52" alt=""
- demo
# coding:utf-8 import pdfkit from pydocx import PyDocX #html转pdf #注意需要先按装依赖在:https://wkhtmltopdf.org/downloads.html下载 path_to_wkhtmltopdf = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe' config = pdfkit.configuration(wkhtmltopdf=path_to_wkhtmltopdf) #html文件转pdf #pdfkit.from_file("你好.html","你好1.pdf",configuration=config) #网址转pdf #pdfkit.from_url("https://www.imooc.com","慕课网.pdf",configuration=config) html=""" <html> <head> <meta charset="utf-8"/> </head> <body> <h1>你好</h1> </body> </html> """ #html字符串转pdf #pdfkit.from_string(html,"你好.pdf",configuration=config) #word转pdf 先将word转为html html_word=PyDocX.to_html("测试.docx") pdfkit.from_string(html_word,"测试.pdf",configuration=config)