文章目录
- 前言
- [一、 XML 简介与解析方法对比](#一、 XML 简介与解析方法对比)
-
- [1.1 Python 中 XML 解析的三种主要方式](#1.1 Python 中 XML 解析的三种主要方式)
- [1.2 Python XML 处理库概览](#1.2 Python XML 处理库概览)
- [二、 创建和写入 XML 文件](#二、 创建和写入 XML 文件)
-
- [2.1 使用 ElementTree 创建 XML](#2.1 使用 ElementTree 创建 XML)
- [2.2 使用 lxml 创建更复杂的 XML(第三方库)](#2.2 使用 lxml 创建更复杂的 XML(第三方库))
- [三、 解析 XML 文件](#三、 解析 XML 文件)
-
- [3.1 DOM 方式解析](#3.1 DOM 方式解析)
- [3.2 SAX 方式解析](#3.2 SAX 方式解析)
- [3.3 ElementTree 方式解析](#3.3 ElementTree 方式解析)
前言
本文主要介绍XML文件基础知识、创建和写入 XML 文件以及解析 XML 文件三种方式。
一、 XML 简介与解析方法对比
XML(可扩展标记语言) 是一种用于存储和传输数据的标记语言,以其结构化、自描述和平台无关的特性,广泛应用于配置文件、Web 服务、数据交换等领域。
1.1 Python 中 XML 解析的三种主要方式
解析方式 工作原理 优点 缺点 适用场景
DOM 将整个 XML 文档加载到内存中,形成树状结构 可以随机访问节点,支持复杂的查询和修改 内存占用大,不适合大文件 小型 XML 文件,需要频繁修改
SAX 基于事件驱动,逐行读取并触发事件 内存占用小,适合大文件,速度快 只能顺序读取,无法随机访问 大型 XML 文件,只需读取一次
ElementTree 结合了 DOM 和 SAX 的优点 内存友好,API 简单,支持 XPath 功能相对有限 大多数场景,特别是需要 XPath 支持时
1.2 Python XML 处理库概览
python
python
# Python 内置的 XML 处理模块
import xml.etree.ElementTree as ET # ElementTree(推荐)
import xml.dom.minidom # DOM 解析器
import xml.sax # SAX 解析器
# 第三方库
# 1. lxml:功能强大,支持 XPath 1.0、XSLT 等
# pip install lxml
# 2. BeautifulSoup:主要用于 HTML,也支持 XML
# pip install beautifulsoup4
二、 创建和写入 XML 文件
2.1 使用 ElementTree 创建 XML
python
python
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom
from datetime import datetime
class XMLWriter:
"""XML 文档写入器"""
@staticmethod
def create_simple_xml(output_file='students.xml'):
"""创建简单的 XML 文档"""
# 1. 创建根元素
root = ET.Element('school')
root.set('name', '示范学校')
root.set('location', '北京市')
# 2. 创建子元素
students_data = [
{'id': '001', 'name': '张三', 'gender': '男', 'age': '20', 'major': '计算机科学'},
{'id': '002', 'name': '李四', 'gender': '女', 'age': '18', 'major': '数学'},
{'id': '003', 'name': '王五', 'gender': '男', 'age': '21', 'major': '物理'},
{'id': '004', 'name': '赵六', 'gender': '女', 'age': '19', 'major': '化学'},
]
# 3. 添加学生元素
for student_info in students_data:
student = ET.SubElement(root, 'student')
student.set('id', student_info['id'])
# 添加学生详细信息
ET.SubElement(student, 'name').text = student_info['name']
ET.SubElement(student, 'gender').text = student_info['gender']
ET.SubElement(student, 'age').text = student_info['age']
ET.SubElement(student, 'major').text = student_info['major']
# 添加联系信息子元素
contact = ET.SubElement(student, 'contact')
ET.SubElement(contact, 'email').text = f"{student_info['name']}@example.com"
ET.SubElement(contact, 'phone').text = '138-0000-0000'
# 添加课程信息
courses = ET.SubElement(student, 'courses')
course_list = ['数学', '英语', '编程']
for course in course_list:
course_elem = ET.SubElement(courses, 'course')
course_elem.text = course
course_elem.set('type', '必修' if course == '数学' else '选修')
# 4. 添加学校信息
info = ET.SubElement(root, 'info')
ET.SubElement(info, 'established').text = '1990'
ET.SubElement(info, 'students_count').text = str(len(students_data))
ET.SubElement(info, 'last_updated').text = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# 5. 格式化并保存 XML
XMLWriter._save_pretty_xml(root, output_file)
print(f"XML 文件已创建: {output_file}")
return output_file
@staticmethod
def create_nested_xml(output_file='library.xml'):
"""创建嵌套结构的 XML 文档"""
# 创建图书馆数据
library = ET.Element('library')
library.set('name', '市立图书馆')
# 添加图书分类
categories = [
{
'name': '计算机科学',
'books': [
{'title': 'Python编程', 'author': '张三', 'year': '2023', 'price': '59.9'},
{'title': '算法导论', 'author': '李四', 'year': '2022', 'price': '89.9'},
]
},
{
'name': '文学',
'books': [
{'title': '百年孤独', 'author': '马尔克斯', 'year': '1967', 'price': '45.0'},
{'title': '围城', 'author': '钱钟书', 'year': '1947', 'price': '38.0'},
]
}
]
for category in categories:
category_elem = ET.SubElement(library, 'category')
category_elem.set('name', category['name'])
for book in category['books']:
book_elem = ET.SubElement(category_elem, 'book')
book_elem.set('id', f"B{len(category_elem.findall('book')) + 1:03d}")
for key, value in book.items():
elem = ET.SubElement(book_elem, key)
elem.text = value
# 保存文件
XMLWriter._save_pretty_xml(library, output_file)
print(f"嵌套 XML 文件已创建: {output_file}")
return output_file
@staticmethod
def create_from_dict(data_dict, root_name='data', output_file='from_dict.xml'):
"""从字典创建 XML"""
def dict_to_xml(tag, d):
"""递归将字典转换为 XML 元素"""
elem = ET.Element(tag)
for key, val in d.items():
if isinstance(val, dict):
# 如果是字典,递归创建子元素
elem.append(dict_to_xml(key, val))
elif isinstance(val, list):
# 如果是列表,为每个元素创建子元素
for item in val:
if isinstance(item, dict):
elem.append(dict_to_xml(key, item))
else:
child = ET.SubElement(elem, key)
child.text = str(item)
else:
# 简单值
child = ET.SubElement(elem, key)
child.text = str(val)
return elem
# 创建 XML
root = dict_to_xml(root_name, data_dict)
# 保存文件
XMLWriter._save_pretty_xml(root, output_file)
print(f"从字典创建的 XML 文件: {output_file}")
return output_file
@staticmethod
def _save_pretty_xml(root_element, filename, encoding='utf-8'):
"""格式化并保存 XML 文件"""
# 将 ElementTree 转换为字符串
rough_string = ET.tostring(root_element, encoding=encoding)
# 使用 minidom 格式化
reparsed = minidom.parseString(rough_string)
# 添加 XML 声明和格式化
xml_declaration = '<?xml version="1.0" encoding="{}"?>\n'.format(encoding)
pretty_xml = xml_declaration + reparsed.toprettyxml(indent=' ')
# 移除多余的空白行
lines = pretty_xml.split('\n')
clean_lines = [line for line in lines if line.strip()]
# 写入文件
with open(filename, 'w', encoding=encoding) as f:
f.write('\n'.join(clean_lines))
# 使用示例
if __name__ == "__main__":
# 创建简单 XML
simple_xml = XMLWriter.create_simple_xml('students.xml')
# 创建嵌套 XML
nested_xml = XMLWriter.create_nested_xml('library.xml')
# 从字典创建 XML
sample_dict = {
'company': {
'name': '示例公司',
'address': {
'street': '科技路123号',
'city': '北京',
'zipcode': '100000'
},
'employees': [
{'name': '张三', 'position': '工程师', 'salary': '8000'},
{'name': '李四', 'position': '经理', 'salary': '12000'}
],
'departments': ['技术部', '市场部', '人事部']
}
}
dict_xml = XMLWriter.create_from_dict(sample_dict, 'company', 'company.xml')
2.2 使用 lxml 创建更复杂的 XML(第三方库)
python
python
try:
from lxml import etree as lxml_etree
class LXMLWriter:
"""使用 lxml 库创建 XML"""
@staticmethod
def create_with_namespaces():
"""创建带命名空间的 XML"""
# 定义命名空间
namespaces = {
'xs': 'http://www.w3.org/2001/XMLSchema',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
'app': 'http://example.com/application'
}
# 创建根元素并添加命名空间
root = lxml_etree.Element(
'{%s}application' % namespaces['app'],
nsmap=namespaces
)
# 添加 schema 位置
root.set('{%s}schemaLocation' % namespaces['xsi'],
'http://example.com/application application.xsd')
# 添加子元素
config = lxml_etree.SubElement(root, '{%s}config' % namespaces['app'])
lxml_etree.SubElement(config, '{%s}setting' % namespaces['app'],
name='timeout').text = '30'
lxml_etree.SubElement(config, '{%s}setting' % namespaces['app'],
name='retries').text = '3'
# 保存文件
xml_str = lxml_etree.tostring(
root,
pretty_print=True,
encoding='utf-8',
xml_declaration=True
).decode('utf-8')
with open('with_namespace.xml', 'w', encoding='utf-8') as f:
f.write(xml_str)
print("带命名空间的 XML 文件已创建: with_namespace.xml")
@staticmethod
def create_with_cdata():
"""创建包含 CDATA 部分的 XML"""
root = lxml_etree.Element('document')
# 普通文本
title = lxml_etree.SubElement(root, 'title')
title.text = 'XML 文档示例'
# CDATA 部分(保护特殊字符)
content = lxml_etree.SubElement(root, 'content')
cdata = lxml_etree.CDATA('<script>alert("Hello");</script> & 更多内容')
content.append(cdata)
# 保存文件
xml_str = lxml_etree.tostring(
root,
pretty_print=True,
encoding='utf-8',
xml_declaration=True
).decode('utf-8')
with open('with_cdata.xml', 'w', encoding='utf-8') as f:
f.write(xml_str)
print("包含 CDATA 的 XML 文件已创建: with_cdata.xml")
# 使用示例
LXMLWriter.create_with_namespaces()
LXMLWriter.create_with_cdata()
except ImportError:
print("lxml 库未安装,请使用: pip install lxml")
三、 解析 XML 文件
3.1 DOM 方式解析
python
python
import xml.dom.minidom
class DOMParser:
"""使用 DOM 方式解析 XML"""
def __init__(self, xml_file):
self.xml_file = xml_file
self.dom = None
def parse(self):
"""解析 XML 文件"""
print("=" * 60)
print("DOM 方式解析 XML")
print("=" * 60)
try:
# 1. 加载 XML 文档
self.dom = xml.dom.minidom.parse(self.xml_file)
print(f"成功加载: {self.xml_file}")
# 2. 获取根元素
root = self.dom.documentElement
print(f"根元素: {root.tagName}")
# 3. 遍历元素
self._traverse_node(root, 0)
return self.dom
except Exception as e:
print(f"解析失败: {e}")
return None
def _traverse_node(self, node, depth):
"""递归遍历节点"""
indent = " " * depth
# 处理元素节点
if node.nodeType == node.ELEMENT_NODE:
print(f"{indent}<{node.tagName}>")
# 显示属性
if node.hasAttributes():
attrs = node.attributes
for i in range(attrs.length):
attr = attrs.item(i)
print(f"{indent} @{attr.name} = {attr.value}")
# 递归处理子节点
for child in node.childNodes:
self._traverse_node(child, depth + 1)
print(f"{indent}</{node.tagName}>")
# 处理文本节点
elif node.nodeType == node.TEXT_NODE:
text = node.data.strip()
if text:
print(f"{indent}文本: {text}")
def get_element_by_tag(self, tag_name):
"""根据标签名获取元素"""
if not self.dom:
return []
elements = self.dom.getElementsByTagName(tag_name)
print(f"\n标签 '{tag_name}' 找到 {len(elements)} 个元素")
for i, elem in enumerate(elements, 1):
print(f"\n元素 {i}:")
print(f" 标签名: {elem.tagName}")
# 获取属性
if elem.hasAttributes():
print(" 属性:")
for attr_name, attr_value in elem.attributes.items():
print(f" {attr_name} = {attr_value}")
# 获取文本内容
text_content = self._get_text_content(elem)
if text_content:
print(f" 内容: {text_content}")
return elements
def _get_text_content(self, element):
"""获取元素的文本内容"""
text_nodes = []
for child in element.childNodes:
if child.nodeType == child.TEXT_NODE:
text = child.data.strip()
if text:
text_nodes.append(text)
return ' '.join(text_nodes)
def query_students(self):
"""查询学生信息"""
print("\n" + "-" * 60)
print("学生信息查询")
print("-" * 60)
students = self.dom.getElementsByTagName('student')
for student in students:
student_id = student.getAttribute('id')
print(f"\n学生ID: {student_id}")
# 获取学生详细信息
fields = ['name', 'gender', 'age', 'major']
for field in fields:
elements = student.getElementsByTagName(field)
if elements:
value = self._get_text_content(elements[0])
print(f" {field}: {value}")
# 获取联系信息
contact = student.getElementsByTagName('contact')
if contact:
email = contact[0].getElementsByTagName('email')
phone = contact[0].getElementsByTagName('phone')
if email:
print(f" 邮箱: {self._get_text_content(email[0])}")
if phone:
print(f" 电话: {self._get_text_content(phone[0])}")
# 获取课程信息
courses = student.getElementsByTagName('course')
if courses:
course_list = [self._get_text_content(course) for course in courses]
print(f" 课程: {', '.join(course_list)}")
# 使用示例
if __name__ == "__main__":
dom_parser = DOMParser('students.xml')
dom_parser.parse()
dom_parser.get_element_by_tag('student')
dom_parser.query_students()
3.2 SAX 方式解析
python
python
import xml.sax
import xml.sax.handler
class SAXContentHandler(xml.sax.ContentHandler):
"""SAX 内容处理器"""
def __init__(self):
super().__init__()
self.current_path = []
self.current_data = ""
self.current_attributes = {}
self.element_count = {}
self.text_content = {}
# 跟踪当前处理的元素
self.current_element = None
def startDocument(self):
"""文档开始"""
print("=" * 60)
print("SAX 方式解析 XML - 开始解析")
print("=" * 60)
def endDocument(self):
"""文档结束"""
print("\n" + "=" * 60)
print("解析完成")
print("=" * 60)
# 输出统计信息
print("\n元素统计:")
for tag, count in sorted(self.element_count.items()):
print(f" {tag}: {count} 次")
def startElement(self, name, attrs):
"""元素开始"""
self.current_path.append(name)
self.current_attributes = dict(attrs)
self.current_data = ""
# 统计元素出现次数
self.element_count[name] = self.element_count.get(name, 0) + 1
# 输出开始标签
indent = " " * (len(self.current_path) - 1)
print(f"{indent}<{name}>", end="")
# 输出属性
if attrs.getLength() > 0:
print(" [", end="")
for i in range(attrs.getLength()):
attr_name = attrs.getName(i)
attr_value = attrs.getValue(i)
print(f"{attr_name}={attr_value}", end="")
if i < attrs.getLength() - 1:
print(", ", end="")
print("]", end="")
print()
# 存储当前元素信息
self.current_element = {
'name': name,
'attributes': self.current_attributes,
'path': '/'.join(self.current_path)
}
def endElement(self, name):
"""元素结束"""
# 输出结束标签(如果有内容的话)
if self.current_data.strip():
indent = " " * len(self.current_path)
print(f"{indent}内容: {self.current_data.strip()}")
indent = " " * (len(self.current_path) - 1)
print(f"{indent}</{name}>")
# 记录元素内容
if self.current_data.strip():
path = '/'.join(self.current_path)
self.text_content[path] = self.current_data.strip()
# 弹出当前路径
self.current_path.pop()
self.current_data = ""
def characters(self, content):
"""字符数据处理"""
self.current_data += content
def get_student_info(self):
"""从解析结果中提取学生信息"""
print("\n" + "-" * 60)
print("从 SAX 解析结果提取学生信息")
print("-" * 60)
student_info = []
current_student = None
# 这里需要在实际解析过程中收集数据
# SAX 是流式解析,需要在事件处理时保存数据
return student_info
class StudentSAXHandler(xml.sax.ContentHandler):
"""专门处理学生信息的 SAX 处理器"""
def __init__(self):
super().__init__()
self.students = []
self.current_student = None
self.current_field = None
self.in_student = False
self.in_contact = False
self.in_courses = False
def startElement(self, name, attrs):
if name == "student":
self.in_student = True
self.current_student = {
'id': attrs.get('id', ''),
'name': '',
'gender': '',
'age': '',
'major': '',
'contact': {'email': '', 'phone': ''},
'courses': []
}
elif self.in_student:
if name == "contact":
self.in_contact = True
elif name == "courses":
self.in_courses = True
elif name == "course":
if self.in_courses:
course_info = {
'name': '',
'type': attrs.get('type', '')
}
self.current_student['courses'].append(course_info)
self.current_field = ('course', len(self.current_student['courses']) - 1, 'name')
else:
self.current_field = name
def endElement(self, name):
if name == "student":
self.in_student = False
self.students.append(self.current_student)
self.current_student = None
elif name == "contact":
self.in_contact = False
elif name == "courses":
self.in_courses = False
elif name == "course":
self.current_field = None
def characters(self, content):
if self.current_field and content.strip():
text = content.strip()
if isinstance(self.current_field, tuple):
# 处理课程
field_type, index, subfield = self.current_field
if field_type == 'course':
self.current_student['courses'][index][subfield] = text
else:
# 处理普通字段
field = self.current_field
if self.in_contact:
self.current_student['contact'][field] = text
else:
self.current_student[field] = text
def print_students(self):
"""打印学生信息"""
print(f"\n找到 {len(self.students)} 名学生:")
for i, student in enumerate(self.students, 1):
print(f"\n学生 {i}:")
print(f" ID: {student['id']}")
print(f" 姓名: {student['name']}")
print(f" 性别: {student['gender']}")
print(f" 年龄: {student['age']}")
print(f" 专业: {student['major']}")
print(f" 邮箱: {student['contact']['email']}")
print(f" 电话: {student['contact']['phone']}")
print(f" 课程: {', '.join([c['name'] for c in student['courses']])}")
# 使用示例
if __name__ == "__main__":
# 方法1:通用 SAX 解析器
print("方法1:通用 SAX 解析")
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
handler = SAXContentHandler()
parser.setContentHandler(handler)
parser.parse('students.xml')
# 方法2:专门的学生信息解析器
print("\n\n方法2:专门的学生信息 SAX 解析")
parser2 = xml.sax.make_parser()
parser2.setFeature(xml.sax.handler.feature_namespaces, 0)
student_handler = StudentSAXHandler()
parser2.setContentHandler(student_handler)
parser2.parse('students.xml')
student_handler.print_students()
3.3 ElementTree 方式解析
python
python
import xml.etree.ElementTree as ET
import os
class ElementTreeParser:
"""使用 ElementTree 解析 XML"""
def __init__(self, xml_file):
self.xml_file = xml_file
self.tree = None
self.root = None
def parse(self):
"""解析 XML 文件"""
print("=" * 60)
print("ElementTree 方式解析 XML")
print("=" * 60)
try:
# 1. 解析 XML 文件
self.tree = ET.parse(self.xml_file)
self.root = self.tree.getroot()
print(f"成功加载: {self.xml_file}")
print(f"根元素: {self.root.tag}")
# 2. 显示文档结构
self._display_structure(self.root)
return self.tree
except ET.ParseError as e:
print(f"XML 解析错误: {e}")
return None
except FileNotFoundError:
print(f"文件未找到: {self.xml_file}")
return None
except Exception as e:
print(f"未知错误: {e}")
return None
def _display_structure(self, element, depth=0):
"""递归显示 XML 结构"""
indent = " " * depth
# 显示当前元素
tag_display = element.tag
# 如果有属性,显示属性
if element.attrib:
attrs = ' '.join([f'{k}="{v}"' for k, v in element.attrib.items()])
print(f"{indent}<{tag_display} {attrs}>")
else:
print(f"{indent}<{tag_display}>")
# 显示文本内容
if element.text and element.text.strip():
text = element.text.strip()
print(f"{indent} {text}")
# 递归处理子元素
for child in element:
self._display_structure(child, depth + 1)
# 显示结束标签
print(f"{indent}</{tag_display}>")
def find_all(self, tag_name):
"""查找所有指定标签的元素"""
if not self.root:
return []
elements = self.root.findall(f'.//{tag_name}')
print(f"\n使用 findall('.//{tag_name}') 找到 {len(elements)} 个元素")
return elements
def find_with_xpath(self, xpath_expression):
"""使用类 XPath 语法查找元素"""
if not self.root:
return []
elements = self.root.findall(xpath_expression)
print(f"\n使用 XPath '{xpath_expression}' 找到 {len(elements)} 个元素")
for i, elem in enumerate(elements, 1):
print(f"\n元素 {i}:")
print(f" 标签: {elem.tag}")
if elem.attrib:
print(" 属性:")
for key, value in elem.attrib.items():
print(f" {key} = {value}")
if elem.text and elem.text.strip():
print(f" 内容: {elem.text.strip()}")
return elements
def query_students_advanced(self):
"""高级学生信息查询"""
print("\n" + "-" * 60)
print("高级学生信息查询")
print("-" * 60)
# 1. 查找所有学生
students = self.root.findall('student')
print(f"总学生数: {len(students)}")
# 2. 按条件筛选
print("\n1. 按性别筛选:")
male_students = [s for s in students if s.find('gender').text == '男']
female_students = [s for s in students if s.find('gender').text == '女']
print(f" 男生: {len(male_students)} 人")
print(f" 女生: {len(female_students)} 人")
# 3. 按年龄筛选
print("\n2. 按年龄筛选:")
for student in students:
age = int(student.find('age').text)
if age >= 20:
name = student.find('name').text
print(f" {name}: {age} 岁")
# 4. 统计专业分布
print("\n3. 专业分布:")
majors = {}
for student in students:
major = student.find('major').text
majors[major] = majors.get(major, 0) + 1
for major, count in majors.items():
print(f" {major}: {count} 人")
# 5. 获取学生详细信息
print("\n4. 学生详细信息:")
for student in students:
student_id = student.get('id')
name = student.find('name').text
gender = student.find('gender').text
age = student.find('age').text
major = student.find('major').text
# 获取联系信息
contact = student.find('contact')
email = contact.find('email').text if contact is not None else 'N/A'
# 获取课程列表
courses_elem = student.find('courses')
courses = [course.text for course in courses_elem.findall('course')] if courses_elem is not None else []
print(f"\n ID: {student_id}")
print(f" 姓名: {name}")
print(f" 性别: {gender}")
print(f" 年龄: {age}")
print(f" 专业: {major}")
print(f" 邮箱: {email}")
print(f" 课程: {', '.join(courses)}")
def modify_xml(self):
"""修改 XML 内容"""
print("\n" + "-" * 60)
print("修改 XML 内容")
print("-" * 60)
# 1. 添加新学生
new_student = ET.Element('student', {'id': '005'})
ET.SubElement(new_student, 'name').text = '孙七'
ET.SubElement(new_student, 'gender').text = '男'
ET.SubElement(new_student, 'age').text = '22'
ET.SubElement(new_student, 'major').text = '生物'
# 添加联系信息
contact = ET.SubElement(new_student, 'contact')
ET.SubElement(contact, 'email').text = 'sunqi@example.com'
ET.SubElement(contact, 'phone').text = '139-0000-0000'
# 添加课程信息
courses = ET.SubElement(new_student, 'courses')
for course_name in ['生物学', '化学', '数学']:
course = ET.SubElement(courses, 'course', {'type': '必修'})
course.text = course_name
# 添加到根元素
self.root.append(new_student)
print("已添加新学生: 孙七")
# 2. 修改现有数据
for student in self.root.findall('student'):
if student.get('id') == '001':
# 修改年龄
age_elem = student.find('age')
if age_elem is not None:
new_age = str(int(age_elem.text) + 1)
age_elem.text = new_age
print(f"已将学生 001 的年龄更新为: {new_age}")
# 添加新字段
if student.find('grade') is None:
ET.SubElement(student, 'grade').text = 'A'
# 3. 删除元素
# 示例:删除特定学生(这里不实际执行)
# for student in self.root.findall('student'):
# if student.get('id') == '002':
# self.root.remove(student)
# print("已删除学生: 002")
# 4. 保存修改
modified_file = 'students_modified.xml'
self.tree.write(modified_file, encoding='utf-8', xml_declaration=True)
print(f"修改后的文件已保存: {modified_file}")
return modified_file
# 使用示例
if __name__ == "__main__":
# 创建解析器实例
et_parser = ElementTreeParser('students.xml')
# 解析 XML
et_parser.parse()
# 使用不同方法查找元素
et_parser.find_all('student')
et_parser.find_with_xpath('.//student[@id="001"]')
et_parser.find_with_xpath('.//course[@type="必修"]')
# 高级查询
et_parser.query_students_advanced()
# 修改 XML
# modified = et_parser.modify_xml()