dom解析
python
from xml.dom.minidom import parse
import xml.dom.minidom
# 使用minidom解析器打开 XML 文档
DOMTree = xml.dom.minidom.parse("xxx.xml")
collection = DOMTree.documentElement
# 根据标签名称获取
movies = collection.getElementsByTagName("fistTag")[0]
movies = movies.getElementsByTagName('secondTag')
rpmList = []
for movie in movies:
# 内容获取方式xxx.childNodes[0].data
strs = movie.childNodes[0].data.split(',')
for str in strs:
rpmList.append(str.strip())
另还有sax解析,可更好产生bean类
python
from openpyxl import load_workbook
class RpmData:
def __init__(self, name, incFileNum):
self.name = name
self.incFileNum = incFileNum
def get_index(char):
return ord(char) - ord('A')
import re
def get_rpm_package_name(rpm_name):
# 正则表达式匹配RPM包名
match = re.match(r"^(?P<name>[^-]+)-(?P<version>[^-]+)-(?P<release>.+)\.rpm$", rpm_name)
if match:
return match.group('name')
else:
raise ValueError("Invalid RPM package name format")
def extract_name(rpm_package_name):
parts = rpm_package_name.split('-')
return '-'.join(parts[:len(parts)-2])
wb = load_workbook(r'xxx.xlsx')
sheet = wb['rpm视图']
# os.remove("sample.xlsx")
from openpyxl import Workbook
# 创建一个新的工作簿
dest_wb = Workbook()
# ws = dest_wb.create_sheet(title="rpm视图")
ws = dest_wb.active
x=1
y=1
for row in sheet.iter_rows(min_row=3):
incFileNum = row[get_index('Q')].value
if incFileNum <= 0.0:
continue
rpmName = extract_name(row[get_index('A')].value)
if rpmName in rpmList:
y=1
for cell in row:
# destCell = ws[cell.coordinate]
destCell = ws.cell(row=x, column=y)
destCell.value = cell.value
y+=1
# cell.parent = ws
# ws.append(row)
x+=1
# rpmDataList.append(RpmData(row[get_index('A')].value, row[get_index('Q')].value))
# for cell in row:
# print(cell.value)
dest_wb.save("sample.xlsx")
当原excel有引用时,实际只要cell.parent = ws,就可复制,但文件格式会有点问题
QA
python文件的时候竟然报SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: tr这个错误,其实引起这个错误的原因就是转义的问题
df = pd.read_excel(r'xxx.xlsx')
xlrd.biffh.XLRDError: Excel xlsx file; not supported
高版本不支持xlsx