9.1 数据序列化
9.1.1 JSON模块
python
复制代码
import json
# Python对象转JSON字符串
data = {
"name": "Alice",
"age": 25,
"hobbies": ["reading", "music"],
"married": False
}
# dumps() - 转为JSON字符串
json_str = json.dumps(data)
print(json_str)
# {"name": "Alice", "age": 25, "hobbies": ["reading", "music"], "married": false}
# 格式化输出
json_str = json.dumps(data, indent=4)
print(json_str)
# 确保中文正常显示
data_cn = {"姓名": "张三", "年龄": 30}
json_str = json.dumps(data_cn, ensure_ascii=False)
print(json_str) # {"姓名": "张三", "年龄": 30}
# JSON字符串转Python对象
json_str = '{"name": "Bob", "age": 30}'
data = json.loads(json_str)
print(data) # {'name': 'Bob', 'age': 30}
print(type(data)) # <class 'dict'>
# 写入文件
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
# 从文件读取
with open("data.json", "r", encoding="utf-8") as f:
loaded_data = json.load(f)
print(loaded_data)
# 自定义对象序列化
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
def person_encoder(obj):
if isinstance(obj, Person):
return {"name": obj.name, "age": obj.age}
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
person = Person("Alice", 25)
json_str = json.dumps(person, default=person_encoder)
print(json_str)
# 或使用自定义编码器类
class PersonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Person):
return {"name": obj.name, "age": obj.age}
return super().default(obj)
json_str = json.dumps(person, cls=PersonEncoder)
print(json_str)
9.1.2 Pickle模块
python
复制代码
import pickle
# Python对象序列化(二进制)
data = {
"name": "Alice",
"age": 25,
"hobbies": ["reading", "music"]
}
# dumps() - 序列化为bytes
pickled = pickle.dumps(data)
print(pickled) # b'\x80\x04\x95...'
print(type(pickled)) # <class 'bytes'>
# loads() - 反序列化
unpickled = pickle.loads(pickled)
print(unpickled)
# 写入文件(二进制模式)
with open("data.pkl", "wb") as f:
pickle.dump(data, f)
# 从文件读取
with open("data.pkl", "rb") as f:
loaded = pickle.load(f)
print(loaded)
# Pickle可以序列化更多Python对象
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
person = Person("Bob", 30)
# 序列化自定义对象
with open("person.pkl", "wb") as f:
pickle.dump(person, f)
with open("person.pkl", "rb") as f:
loaded_person = pickle.load(f)
print(f"{loaded_person.name}, {loaded_person.age}")
# 注意:Pickle不安全,不要反序列化不信任的数据
9.1.3 CSV模块
python
复制代码
import csv
# 写入CSV
data = [
["姓名", "年龄", "城市"],
["Alice", 25, "北京"],
["Bob", 30, "上海"],
["Charlie", 35, "广州"]
]
with open("data.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(data)
# 读取CSV
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
print(row)
# 使用DictWriter
data = [
{"name": "Alice", "age": 25, "city": "北京"},
{"name": "Bob", "age": 30, "city": "上海"}
]
with open("data.csv", "w", newline="", encoding="utf-8") as f:
fieldnames = ["name", "age", "city"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
# 使用DictReader
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
print(f"{row['name']}: {row['age']}岁,来自{row['city']}")
# 自定义分隔符
with open("data.tsv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f, delimiter="\t")
writer.writerows(data)
9.1.4 XML处理
python
复制代码
import xml.etree.ElementTree as ET
# 创建XML
root = ET.Element("students")
student1 = ET.SubElement(root, "student")
ET.SubElement(student1, "name").text = "Alice"
ET.SubElement(student1, "age").text = "25"
student2 = ET.SubElement(root, "student")
ET.SubElement(student2, "name").text = "Bob"
ET.SubElement(student2, "age").text = "30"
# 生成XML树
tree = ET.ElementTree(root)
tree.write("students.xml", encoding="utf-8", xml_declaration=True)
# 读取XML
tree = ET.parse("students.xml")
root = tree.getroot()
for student in root.findall("student"):
name = student.find("name").text
age = student.find("age").text
print(f"{name}: {age}岁")
9.2 时间与日期
9.2.1 datetime模块
python
复制代码
from datetime import datetime, date, time, timedelta
# 获取当前时间
now = datetime.now()
print(now) # 2026-01-01 12:00:00.123456
today = date.today()
print(today) # 2026-01-01
# 创建日期时间对象
dt = datetime(2026, 1, 1, 12, 30, 45)
print(dt)
d = date(2026, 1, 1)
print(d)
t = time(12, 30, 45)
print(t)
# 格式化输出
print(now.strftime("%Y-%m-%d %H:%M:%S"))
print(now.strftime("%Y年%m月%d日 %H时%M分%S秒"))
print(now.strftime("%A, %B %d, %Y"))
# 常用格式化符号
# %Y - 四位年份 %y - 两位年份
# %m - 月份(01-12) %B - 月份名称 %b - 月份缩写
# %d - 日期(01-31)
# %H - 小时(00-23) %I - 小时(01-12)
# %M - 分钟(00-59)
# %S - 秒(00-59)
# %A - 星期名称 %a - 星期缩写
# %p - AM/PM
# 解析字符串为日期时间
date_str = "2026-01-01 12:30:45"
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
print(dt)
# 时间计算
now = datetime.now()
# 加减时间
tomorrow = now + timedelta(days=1)
yesterday = now - timedelta(days=1)
next_week = now + timedelta(weeks=1)
next_hour = now + timedelta(hours=1)
print(f"明天:{tomorrow}")
print(f"昨天:{yesterday}")
# 时间差
date1 = datetime(2026, 1, 1)
date2 = datetime(2026, 12, 31)
diff = date2 - date1
print(f"相差{diff.days}天")
print(f"相差{diff.total_seconds()}秒")
# 日期属性
dt = datetime.now()
print(f"年:{dt.year}")
print(f"月:{dt.month}")
print(f"日:{dt.day}")
print(f"小时:{dt.hour}")
print(f"分钟:{dt.minute}")
print(f"秒:{dt.second}")
print(f"星期几:{dt.weekday()}") # 0-6,0是星期一
9.2.2 time模块
python
复制代码
import time
# 获取当前时间戳
timestamp = time.time()
print(timestamp) # 1735747200.123456
# 暂停执行
print("开始")
time.sleep(2) # 暂停2秒
print("2秒后")
# 计时
start = time.time()
# 执行一些操作
for i in range(1000000):
pass
end = time.time()
print(f"耗时:{end - start}秒")
# 时间元组
local_time = time.localtime()
print(local_time)
# time.struct_time(tm_year=2026, tm_mon=1, tm_mday=1, ...)
# 格式化
formatted = time.strftime("%Y-%m-%d %H:%M:%S", local_time)
print(formatted)
# 性能计数器(更精确)
start = time.perf_counter()
time.sleep(0.1)
end = time.perf_counter()
print(f"精确耗时:{end - start}秒")
9.2.3 calendar模块
python
复制代码
import calendar
# 打印日历
print(calendar.month(2026, 1))
# 打印全年日历
print(calendar.calendar(2026))
# 判断闰年
print(calendar.isleap(2024)) # True
print(calendar.isleap(2025)) # False
# 某月有多少天
print(calendar.monthrange(2026, 2)) # (6, 28) - 2月1日是星期六,有28天
# 星期几
print(calendar.weekday(2026, 1, 1)) # 3(星期四)
# 月份第一天是星期几
calendar.setfirstweekday(calendar.SUNDAY) # 设置周日为一周的第一天
9.3 正则表达式
9.3.1 re模块基础
python
复制代码
import re
# 匹配模式
text = "Hello, my phone number is 138-1234-5678"
# search() - 搜索第一个匹配
match = re.search(r"\d{3}-\d{4}-\d{4}", text)
if match:
print(match.group()) # 138-1234-5678
print(match.start()) # 匹配开始位置
print(match.end()) # 匹配结束位置
# match() - 从字符串开头匹配
text = "Python is great"
match = re.match(r"Python", text)
if match:
print(match.group()) # Python
# 不匹配
match = re.match(r"Java", text)
print(match) # None
# findall() - 查找所有匹配
text = "My numbers are 123, 456, and 789"
numbers = re.findall(r"\d+", text)
print(numbers) # ['123', '456', '789']
# finditer() - 返回迭代器
for match in re.finditer(r"\d+", text):
print(match.group(), match.start(), match.end())
# split() - 分割字符串
text = "apple,banana;cherry orange"
fruits = re.split(r"[,;\s]+", text)
print(fruits) # ['apple', 'banana', 'cherry', 'orange']
# sub() - 替换
text = "My phone is 138-1234-5678"
new_text = re.sub(r"\d", "*", text)
print(new_text) # My phone is ***-****-****
# 替换并限制次数
new_text = re.sub(r"\d", "*", text, count=3)
print(new_text) # My phone is ***-1234-5678
9.3.2 正则表达式语法
python
复制代码
# 基本字符
# . - 任意字符(除换行符)
# \d - 数字[0-9]
# \D - 非数字
# \w - 字母数字下划线[a-zA-Z0-9_]
# \W - 非字母数字下划线
# \s - 空白字符
# \S - 非空白字符
# 量词
# * - 0次或多次
# + - 1次或多次
# ? - 0次或1次
# {n} - 恰好n次
# {n,} - 至少n次
# {n,m} - n到m次
# 示例
print(re.findall(r"\d+", "a1b22c333")) # ['1', '22', '333']
print(re.findall(r"\d{3}", "12 123 1234")) # ['123', '123']
print(re.findall(r"\d{2,4}", "1 12 123 1234")) # ['12', '123', '1234']
# 字符类
# [abc] - a或b或c
# [a-z] - a到z
# [^abc] - 除了a、b、c
print(re.findall(r"[aeiou]", "hello")) # ['e', 'o']
print(re.findall(r"[^aeiou]", "hello")) # ['h', 'l', 'l']
# 边界匹配
# ^ - 字符串开头
# $ - 字符串结尾
# \b - 单词边界
text = "The cat sat on the mat"
print(re.findall(r"\bcat\b", text)) # ['cat']
print(re.findall(r"\bcat", "catalog")) # ['cat']
# 分组
# () - 分组
# | - 或
match = re.search(r"(\d{3})-(\d{4})-(\d{4})", "138-1234-5678")
if match:
print(match.group(0)) # 完整匹配:138-1234-5678
print(match.group(1)) # 第一组:138
print(match.group(2)) # 第二组:1234
print(match.group(3)) # 第三组:5678
print(match.groups()) # 所有组:('138', '1234', '5678')
# 命名分组
match = re.search(r"(?P<area>\d{3})-(?P<prefix>\d{4})-(?P<number>\d{4})",
"138-1234-5678")
if match:
print(match.group("area")) # 138
print(match.group("prefix")) # 1234
print(match.groupdict()) # {'area': '138', 'prefix': '1234', 'number': '5678'}
9.3.3 编译正则表达式
python
复制代码
# 编译正则表达式(提高性能)
pattern = re.compile(r"\d{3}-\d{4}-\d{4}")
# 使用编译后的模式
text = "Phone: 138-1234-5678"
match = pattern.search(text)
print(match.group())
# 标志
# re.IGNORECASE (re.I) - 忽略大小写
# re.MULTILINE (re.M) - 多行模式
# re.DOTALL (re.S) - .匹配所有字符包括换行符
# re.VERBOSE (re.X) - 详细模式,可以添加注释
pattern = re.compile(r"python", re.IGNORECASE)
print(pattern.findall("Python is great, python rocks!"))
# ['Python', 'python']
# 详细模式
pattern = re.compile(r"""
(\d{3}) # 区号
- # 分隔符
(\d{4}) # 前缀
- # 分隔符
(\d{4}) # 号码
""", re.VERBOSE)
match = pattern.search("138-1234-5678")
print(match.groups())
9.3.4 实用案例
python
复制代码
# 邮箱验证
def is_valid_email(email):
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
print(is_valid_email("user@example.com")) # True
print(is_valid_email("invalid.email")) # False
# 手机号验证
def is_valid_phone(phone):
pattern = r'^1[3-9]\d{9}$'
return re.match(pattern, phone) is not None
print(is_valid_phone("13812345678")) # True
print(is_valid_phone("12345678901")) # False
# 提取URL
text = "Visit https://www.example.com and http://test.com"
urls = re.findall(r'https?://[^\s]+', text)
print(urls) # ['https://www.example.com', 'http://test.com']
# 提取HTML标签内容
html = "<div>Hello</div><p>World</p>"
content = re.findall(r'<[^>]+>(.*?)</[^>]+>', html)
print(content) # ['Hello', 'World']
# 移除HTML标签
clean_text = re.sub(r'<[^>]+>', '', html)
print(clean_text) # HelloWorld
9.4 集合与队列
9.4.1 collections模块
python
复制代码
from collections import Counter, defaultdict, OrderedDict, deque, namedtuple
# Counter - 计数器
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
counter = Counter(words)
print(counter) # Counter({'apple': 3, 'banana': 2, 'cherry': 1})
# 最常见的元素
print(counter.most_common(2)) # [('apple', 3), ('banana', 2)]
# 统计字符
text = "hello world"
char_count = Counter(text)
print(char_count) # Counter({'l': 3, 'o': 2, ...})
# defaultdict - 默认字典
# 不存在的键自动创建默认值
dd = defaultdict(int) # 默认值为0
dd["a"] += 1
dd["b"] += 2
print(dd) # defaultdict(<class 'int'>, {'a': 1, 'b': 2})
# 列表默认值
dd = defaultdict(list)
dd["fruits"].append("apple")
dd["fruits"].append("banana")
dd["vegetables"].append("carrot")
print(dd)
# 分组
students = [
{"name": "Alice", "class": "A"},
{"name": "Bob", "class": "B"},
{"name": "Charlie", "class": "A"}
]
groups = defaultdict(list)
for student in students:
groups[student["class"]].append(student["name"])
print(dict(groups))
# {'A': ['Alice', 'Charlie'], 'B': ['Bob']}
# OrderedDict - 有序字典(Python 3.7+普通dict已有序)
od = OrderedDict()
od["a"] = 1
od["b"] = 2
od["c"] = 3
print(od)
# deque - 双端队列
d = deque()
# 右侧添加
d.append(1)
d.append(2)
d.append(3)
print(d) # deque([1, 2, 3])
# 左侧添加
d.appendleft(0)
print(d) # deque([0, 1, 2, 3])
# 右侧弹出
d.pop()
print(d) # deque([0, 1, 2])
# 左侧弹出
d.popleft()
print(d) # deque([1, 2])
# 旋转
d = deque([1, 2, 3, 4, 5])
d.rotate(2) # 向右旋转2个位置
print(d) # deque([4, 5, 1, 2, 3])
# 限制长度
d = deque(maxlen=3)
for i in range(5):
d.append(i)
print(d)
# deque([0], maxlen=3)
# deque([0, 1], maxlen=3)
# deque([0, 1, 2], maxlen=3)
# deque([1, 2, 3], maxlen=3)
# deque([2, 3, 4], maxlen=3)
# namedtuple - 命名元组
Point = namedtuple('Point', ['x', 'y'])
p = Point(10, 20)
print(p.x, p.y) # 10 20
print(p[0], p[1]) # 10 20
Person = namedtuple('Person', ['name', 'age', 'city'])
alice = Person('Alice', 25, 'Beijing')
print(alice.name) # Alice
9.4.2 heapq模块
python
复制代码
import heapq
# 堆(优先队列)
numbers = [5, 1, 8, 3, 2, 9, 4, 7, 6]
# 转换为堆
heapq.heapify(numbers)
print(numbers) # [1, 2, 4, 3, 5, 9, 8, 7, 6]
# 弹出最小元素
smallest = heapq.heappop(numbers)
print(smallest) # 1
print(numbers) # [2, 3, 4, 6, 5, 9, 8, 7]
# 添加元素
heapq.heappush(numbers, 0)
print(numbers) # [0, 2, 4, 3, 5, 9, 8, 7, 6]
# 最小的n个元素
numbers = [5, 1, 8, 3, 2, 9, 4, 7, 6]
print(heapq.nsmallest(3, numbers)) # [1, 2, 3]
# 最大的n个元素
print(heapq.nlargest(3, numbers)) # [9, 8, 7]
# 根据属性排序
students = [
{"name": "Alice", "score": 85},
{"name": "Bob", "score": 92},
{"name": "Charlie", "score": 78}
]
top_students = heapq.nlargest(2, students, key=lambda s: s["score"])
print(top_students)
# [{'name': 'Bob', 'score': 92}, {'name': 'Alice', 'score': 85}]
9.5 其他常用模块
9.5.1 sys模块
python
复制代码
import sys
# 命令行参数
print(sys.argv) # ['script.py', 'arg1', 'arg2']
# Python版本
print(sys.version)
print(sys.version_info)
# 模块搜索路径
print(sys.path)
# 退出程序
# sys.exit(0) # 正常退出
# sys.exit(1) # 异常退出
# 标准输入输出
sys.stdout.write("Hello\n")
# line = sys.stdin.readline()
# 最大整数
print(sys.maxsize)
# 递归深度
print(sys.getrecursionlimit()) # 默认1000
# sys.setrecursionlimit(2000)
# 平台信息
print(sys.platform) # 'win32', 'linux', 'darwin'
9.5.2 logging模块
python
复制代码
import logging
# 基本配置
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
filename='app.log',
filemode='w'
)
# 日志级别
logging.debug("调试信息")
logging.info("一般信息")
logging.warning("警告信息")
logging.error("错误信息")
logging.critical("严重错误")
# 创建logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# 创建处理器
console_handler = logging.StreamHandler()
file_handler = logging.FileHandler('app.log')
# 设置级别
console_handler.setLevel(logging.INFO)
file_handler.setLevel(logging.DEBUG)
# 创建格式器
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)
# 添加处理器
logger.addHandler(console_handler)
logger.addHandler(file_handler)
# 使用logger
logger.debug("这是调试信息")
logger.info("这是一般信息")
logger.error("这是错误信息")
9.5.3 argparse模块
python
复制代码
import argparse
# 创建解析器
parser = argparse.ArgumentParser(description="示例程序")
# 添加参数
parser.add_argument("name", help="姓名")
parser.add_argument("age", type=int, help="年龄")
parser.add_argument("-v", "--verbose", action="store_true", help="详细输出")
parser.add_argument("-o", "--output", default="output.txt", help="输出文件")
# 解析参数
args = parser.parse_args()
# 使用参数
print(f"姓名:{args.name}")
print(f"年龄:{args.age}")
if args.verbose:
print("详细模式")
print(f"输出到:{args.output}")
# 运行示例:
# python script.py Alice 25 -v -o result.txt
9.6 参考资料
官方文档
学习资源
工具