第09章-标准库与常用模块

9.1 数据序列化

9.1.1 JSON模块

python 复制代码
import json

# Python对象转JSON字符串
data = {
    "name": "Alice",
    "age": 25,
    "hobbies": ["reading", "music"],
    "married": False
}

# dumps() - 转为JSON字符串
json_str = json.dumps(data)
print(json_str)
# {"name": "Alice", "age": 25, "hobbies": ["reading", "music"], "married": false}

# 格式化输出
json_str = json.dumps(data, indent=4)
print(json_str)

# 确保中文正常显示
data_cn = {"姓名": "张三", "年龄": 30}
json_str = json.dumps(data_cn, ensure_ascii=False)
print(json_str)  # {"姓名": "张三", "年龄": 30}

# JSON字符串转Python对象
json_str = '{"name": "Bob", "age": 30}'
data = json.loads(json_str)
print(data)  # {'name': 'Bob', 'age': 30}
print(type(data))  # <class 'dict'>

# 写入文件
with open("data.json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=4)

# 从文件读取
with open("data.json", "r", encoding="utf-8") as f:
    loaded_data = json.load(f)
    print(loaded_data)

# 自定义对象序列化
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age

def person_encoder(obj):
    if isinstance(obj, Person):
        return {"name": obj.name, "age": obj.age}
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

person = Person("Alice", 25)
json_str = json.dumps(person, default=person_encoder)
print(json_str)

# 或使用自定义编码器类
class PersonEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Person):
            return {"name": obj.name, "age": obj.age}
        return super().default(obj)

json_str = json.dumps(person, cls=PersonEncoder)
print(json_str)

9.1.2 Pickle模块

python 复制代码
import pickle

# Python对象序列化(二进制)
data = {
    "name": "Alice",
    "age": 25,
    "hobbies": ["reading", "music"]
}

# dumps() - 序列化为bytes
pickled = pickle.dumps(data)
print(pickled)  # b'\x80\x04\x95...'
print(type(pickled))  # <class 'bytes'>

# loads() - 反序列化
unpickled = pickle.loads(pickled)
print(unpickled)

# 写入文件(二进制模式)
with open("data.pkl", "wb") as f:
    pickle.dump(data, f)

# 从文件读取
with open("data.pkl", "rb") as f:
    loaded = pickle.load(f)
    print(loaded)

# Pickle可以序列化更多Python对象
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age

person = Person("Bob", 30)

# 序列化自定义对象
with open("person.pkl", "wb") as f:
    pickle.dump(person, f)

with open("person.pkl", "rb") as f:
    loaded_person = pickle.load(f)
    print(f"{loaded_person.name}, {loaded_person.age}")

# 注意:Pickle不安全,不要反序列化不信任的数据

9.1.3 CSV模块

python 复制代码
import csv

# 写入CSV
data = [
    ["姓名", "年龄", "城市"],
    ["Alice", 25, "北京"],
    ["Bob", 30, "上海"],
    ["Charlie", 35, "广州"]
]

with open("data.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerows(data)

# 读取CSV
with open("data.csv", "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

# 使用DictWriter
data = [
    {"name": "Alice", "age": 25, "city": "北京"},
    {"name": "Bob", "age": 30, "city": "上海"}
]

with open("data.csv", "w", newline="", encoding="utf-8") as f:
    fieldnames = ["name", "age", "city"]
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data)

# 使用DictReader
with open("data.csv", "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(f"{row['name']}: {row['age']}岁,来自{row['city']}")

# 自定义分隔符
with open("data.tsv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f, delimiter="\t")
    writer.writerows(data)

9.1.4 XML处理

python 复制代码
import xml.etree.ElementTree as ET

# 创建XML
root = ET.Element("students")

student1 = ET.SubElement(root, "student")
ET.SubElement(student1, "name").text = "Alice"
ET.SubElement(student1, "age").text = "25"

student2 = ET.SubElement(root, "student")
ET.SubElement(student2, "name").text = "Bob"
ET.SubElement(student2, "age").text = "30"

# 生成XML树
tree = ET.ElementTree(root)
tree.write("students.xml", encoding="utf-8", xml_declaration=True)

# 读取XML
tree = ET.parse("students.xml")
root = tree.getroot()

for student in root.findall("student"):
    name = student.find("name").text
    age = student.find("age").text
    print(f"{name}: {age}岁")

9.2 时间与日期

9.2.1 datetime模块

python 复制代码
from datetime import datetime, date, time, timedelta

# 获取当前时间
now = datetime.now()
print(now)  # 2026-01-01 12:00:00.123456

today = date.today()
print(today)  # 2026-01-01

# 创建日期时间对象
dt = datetime(2026, 1, 1, 12, 30, 45)
print(dt)

d = date(2026, 1, 1)
print(d)

t = time(12, 30, 45)
print(t)

# 格式化输出
print(now.strftime("%Y-%m-%d %H:%M:%S"))
print(now.strftime("%Y年%m月%d日 %H时%M分%S秒"))
print(now.strftime("%A, %B %d, %Y"))

# 常用格式化符号
# %Y - 四位年份  %y - 两位年份
# %m - 月份(01-12)  %B - 月份名称  %b - 月份缩写
# %d - 日期(01-31)
# %H - 小时(00-23)  %I - 小时(01-12)
# %M - 分钟(00-59)
# %S - 秒(00-59)
# %A - 星期名称  %a - 星期缩写
# %p - AM/PM

# 解析字符串为日期时间
date_str = "2026-01-01 12:30:45"
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
print(dt)

# 时间计算
now = datetime.now()

# 加减时间
tomorrow = now + timedelta(days=1)
yesterday = now - timedelta(days=1)
next_week = now + timedelta(weeks=1)
next_hour = now + timedelta(hours=1)

print(f"明天:{tomorrow}")
print(f"昨天:{yesterday}")

# 时间差
date1 = datetime(2026, 1, 1)
date2 = datetime(2026, 12, 31)
diff = date2 - date1
print(f"相差{diff.days}天")
print(f"相差{diff.total_seconds()}秒")

# 日期属性
dt = datetime.now()
print(f"年:{dt.year}")
print(f"月:{dt.month}")
print(f"日:{dt.day}")
print(f"小时:{dt.hour}")
print(f"分钟:{dt.minute}")
print(f"秒:{dt.second}")
print(f"星期几:{dt.weekday()}")  # 0-6,0是星期一

9.2.2 time模块

python 复制代码
import time

# 获取当前时间戳
timestamp = time.time()
print(timestamp)  # 1735747200.123456

# 暂停执行
print("开始")
time.sleep(2)  # 暂停2秒
print("2秒后")

# 计时
start = time.time()
# 执行一些操作
for i in range(1000000):
    pass
end = time.time()
print(f"耗时:{end - start}秒")

# 时间元组
local_time = time.localtime()
print(local_time)
# time.struct_time(tm_year=2026, tm_mon=1, tm_mday=1, ...)

# 格式化
formatted = time.strftime("%Y-%m-%d %H:%M:%S", local_time)
print(formatted)

# 性能计数器(更精确)
start = time.perf_counter()
time.sleep(0.1)
end = time.perf_counter()
print(f"精确耗时:{end - start}秒")

9.2.3 calendar模块

python 复制代码
import calendar

# 打印日历
print(calendar.month(2026, 1))

# 打印全年日历
print(calendar.calendar(2026))

# 判断闰年
print(calendar.isleap(2024))  # True
print(calendar.isleap(2025))  # False

# 某月有多少天
print(calendar.monthrange(2026, 2))  # (6, 28) - 2月1日是星期六,有28天

# 星期几
print(calendar.weekday(2026, 1, 1))  # 3(星期四)

# 月份第一天是星期几
calendar.setfirstweekday(calendar.SUNDAY)  # 设置周日为一周的第一天

9.3 正则表达式

9.3.1 re模块基础

python 复制代码
import re

# 匹配模式
text = "Hello, my phone number is 138-1234-5678"

# search() - 搜索第一个匹配
match = re.search(r"\d{3}-\d{4}-\d{4}", text)
if match:
    print(match.group())  # 138-1234-5678
    print(match.start())  # 匹配开始位置
    print(match.end())    # 匹配结束位置

# match() - 从字符串开头匹配
text = "Python is great"
match = re.match(r"Python", text)
if match:
    print(match.group())  # Python

# 不匹配
match = re.match(r"Java", text)
print(match)  # None

# findall() - 查找所有匹配
text = "My numbers are 123, 456, and 789"
numbers = re.findall(r"\d+", text)
print(numbers)  # ['123', '456', '789']

# finditer() - 返回迭代器
for match in re.finditer(r"\d+", text):
    print(match.group(), match.start(), match.end())

# split() - 分割字符串
text = "apple,banana;cherry orange"
fruits = re.split(r"[,;\s]+", text)
print(fruits)  # ['apple', 'banana', 'cherry', 'orange']

# sub() - 替换
text = "My phone is 138-1234-5678"
new_text = re.sub(r"\d", "*", text)
print(new_text)  # My phone is ***-****-****

# 替换并限制次数
new_text = re.sub(r"\d", "*", text, count=3)
print(new_text)  # My phone is ***-1234-5678

9.3.2 正则表达式语法

python 复制代码
# 基本字符
# . - 任意字符(除换行符)
# \d - 数字[0-9]
# \D - 非数字
# \w - 字母数字下划线[a-zA-Z0-9_]
# \W - 非字母数字下划线
# \s - 空白字符
# \S - 非空白字符

# 量词
# * - 0次或多次
# + - 1次或多次
# ? - 0次或1次
# {n} - 恰好n次
# {n,} - 至少n次
# {n,m} - n到m次

# 示例
print(re.findall(r"\d+", "a1b22c333"))  # ['1', '22', '333']
print(re.findall(r"\d{3}", "12 123 1234"))  # ['123', '123']
print(re.findall(r"\d{2,4}", "1 12 123 1234"))  # ['12', '123', '1234']

# 字符类
# [abc] - a或b或c
# [a-z] - a到z
# [^abc] - 除了a、b、c

print(re.findall(r"[aeiou]", "hello"))  # ['e', 'o']
print(re.findall(r"[^aeiou]", "hello"))  # ['h', 'l', 'l']

# 边界匹配
# ^ - 字符串开头
# $ - 字符串结尾
# \b - 单词边界

text = "The cat sat on the mat"
print(re.findall(r"\bcat\b", text))  # ['cat']
print(re.findall(r"\bcat", "catalog"))  # ['cat']

# 分组
# () - 分组
# | - 或

match = re.search(r"(\d{3})-(\d{4})-(\d{4})", "138-1234-5678")
if match:
    print(match.group(0))  # 完整匹配:138-1234-5678
    print(match.group(1))  # 第一组:138
    print(match.group(2))  # 第二组:1234
    print(match.group(3))  # 第三组:5678
    print(match.groups())  # 所有组:('138', '1234', '5678')

# 命名分组
match = re.search(r"(?P<area>\d{3})-(?P<prefix>\d{4})-(?P<number>\d{4})",
                  "138-1234-5678")
if match:
    print(match.group("area"))    # 138
    print(match.group("prefix"))  # 1234
    print(match.groupdict())      # {'area': '138', 'prefix': '1234', 'number': '5678'}

9.3.3 编译正则表达式

python 复制代码
# 编译正则表达式(提高性能)
pattern = re.compile(r"\d{3}-\d{4}-\d{4}")

# 使用编译后的模式
text = "Phone: 138-1234-5678"
match = pattern.search(text)
print(match.group())

# 标志
# re.IGNORECASE (re.I) - 忽略大小写
# re.MULTILINE (re.M) - 多行模式
# re.DOTALL (re.S) - .匹配所有字符包括换行符
# re.VERBOSE (re.X) - 详细模式,可以添加注释

pattern = re.compile(r"python", re.IGNORECASE)
print(pattern.findall("Python is great, python rocks!"))
# ['Python', 'python']

# 详细模式
pattern = re.compile(r"""
    (\d{3})  # 区号
    -        # 分隔符
    (\d{4})  # 前缀
    -        # 分隔符
    (\d{4})  # 号码
""", re.VERBOSE)

match = pattern.search("138-1234-5678")
print(match.groups())

9.3.4 实用案例

python 复制代码
# 邮箱验证
def is_valid_email(email):
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None

print(is_valid_email("user@example.com"))  # True
print(is_valid_email("invalid.email"))     # False

# 手机号验证
def is_valid_phone(phone):
    pattern = r'^1[3-9]\d{9}$'
    return re.match(pattern, phone) is not None

print(is_valid_phone("13812345678"))  # True
print(is_valid_phone("12345678901"))  # False

# 提取URL
text = "Visit https://www.example.com and http://test.com"
urls = re.findall(r'https?://[^\s]+', text)
print(urls)  # ['https://www.example.com', 'http://test.com']

# 提取HTML标签内容
html = "<div>Hello</div><p>World</p>"
content = re.findall(r'<[^>]+>(.*?)</[^>]+>', html)
print(content)  # ['Hello', 'World']

# 移除HTML标签
clean_text = re.sub(r'<[^>]+>', '', html)
print(clean_text)  # HelloWorld

9.4 集合与队列

9.4.1 collections模块

python 复制代码
from collections import Counter, defaultdict, OrderedDict, deque, namedtuple

# Counter - 计数器
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
counter = Counter(words)
print(counter)  # Counter({'apple': 3, 'banana': 2, 'cherry': 1})

# 最常见的元素
print(counter.most_common(2))  # [('apple', 3), ('banana', 2)]

# 统计字符
text = "hello world"
char_count = Counter(text)
print(char_count)  # Counter({'l': 3, 'o': 2, ...})

# defaultdict - 默认字典
# 不存在的键自动创建默认值
dd = defaultdict(int)  # 默认值为0
dd["a"] += 1
dd["b"] += 2
print(dd)  # defaultdict(<class 'int'>, {'a': 1, 'b': 2})

# 列表默认值
dd = defaultdict(list)
dd["fruits"].append("apple")
dd["fruits"].append("banana")
dd["vegetables"].append("carrot")
print(dd)

# 分组
students = [
    {"name": "Alice", "class": "A"},
    {"name": "Bob", "class": "B"},
    {"name": "Charlie", "class": "A"}
]

groups = defaultdict(list)
for student in students:
    groups[student["class"]].append(student["name"])

print(dict(groups))
# {'A': ['Alice', 'Charlie'], 'B': ['Bob']}

# OrderedDict - 有序字典(Python 3.7+普通dict已有序)
od = OrderedDict()
od["a"] = 1
od["b"] = 2
od["c"] = 3
print(od)

# deque - 双端队列
d = deque()

# 右侧添加
d.append(1)
d.append(2)
d.append(3)
print(d)  # deque([1, 2, 3])

# 左侧添加
d.appendleft(0)
print(d)  # deque([0, 1, 2, 3])

# 右侧弹出
d.pop()
print(d)  # deque([0, 1, 2])

# 左侧弹出
d.popleft()
print(d)  # deque([1, 2])

# 旋转
d = deque([1, 2, 3, 4, 5])
d.rotate(2)  # 向右旋转2个位置
print(d)  # deque([4, 5, 1, 2, 3])

# 限制长度
d = deque(maxlen=3)
for i in range(5):
    d.append(i)
    print(d)
# deque([0], maxlen=3)
# deque([0, 1], maxlen=3)
# deque([0, 1, 2], maxlen=3)
# deque([1, 2, 3], maxlen=3)
# deque([2, 3, 4], maxlen=3)

# namedtuple - 命名元组
Point = namedtuple('Point', ['x', 'y'])
p = Point(10, 20)
print(p.x, p.y)  # 10 20
print(p[0], p[1])  # 10 20

Person = namedtuple('Person', ['name', 'age', 'city'])
alice = Person('Alice', 25, 'Beijing')
print(alice.name)  # Alice

9.4.2 heapq模块

python 复制代码
import heapq

# 堆(优先队列)
numbers = [5, 1, 8, 3, 2, 9, 4, 7, 6]

# 转换为堆
heapq.heapify(numbers)
print(numbers)  # [1, 2, 4, 3, 5, 9, 8, 7, 6]

# 弹出最小元素
smallest = heapq.heappop(numbers)
print(smallest)  # 1
print(numbers)   # [2, 3, 4, 6, 5, 9, 8, 7]

# 添加元素
heapq.heappush(numbers, 0)
print(numbers)  # [0, 2, 4, 3, 5, 9, 8, 7, 6]

# 最小的n个元素
numbers = [5, 1, 8, 3, 2, 9, 4, 7, 6]
print(heapq.nsmallest(3, numbers))  # [1, 2, 3]

# 最大的n个元素
print(heapq.nlargest(3, numbers))  # [9, 8, 7]

# 根据属性排序
students = [
    {"name": "Alice", "score": 85},
    {"name": "Bob", "score": 92},
    {"name": "Charlie", "score": 78}
]

top_students = heapq.nlargest(2, students, key=lambda s: s["score"])
print(top_students)
# [{'name': 'Bob', 'score': 92}, {'name': 'Alice', 'score': 85}]

9.5 其他常用模块

9.5.1 sys模块

python 复制代码
import sys

# 命令行参数
print(sys.argv)  # ['script.py', 'arg1', 'arg2']

# Python版本
print(sys.version)
print(sys.version_info)

# 模块搜索路径
print(sys.path)

# 退出程序
# sys.exit(0)  # 正常退出
# sys.exit(1)  # 异常退出

# 标准输入输出
sys.stdout.write("Hello\n")
# line = sys.stdin.readline()

# 最大整数
print(sys.maxsize)

# 递归深度
print(sys.getrecursionlimit())  # 默认1000
# sys.setrecursionlimit(2000)

# 平台信息
print(sys.platform)  # 'win32', 'linux', 'darwin'

9.5.2 logging模块

python 复制代码
import logging

# 基本配置
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    filename='app.log',
    filemode='w'
)

# 日志级别
logging.debug("调试信息")
logging.info("一般信息")
logging.warning("警告信息")
logging.error("错误信息")
logging.critical("严重错误")

# 创建logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

# 创建处理器
console_handler = logging.StreamHandler()
file_handler = logging.FileHandler('app.log')

# 设置级别
console_handler.setLevel(logging.INFO)
file_handler.setLevel(logging.DEBUG)

# 创建格式器
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)

# 添加处理器
logger.addHandler(console_handler)
logger.addHandler(file_handler)

# 使用logger
logger.debug("这是调试信息")
logger.info("这是一般信息")
logger.error("这是错误信息")

9.5.3 argparse模块

python 复制代码
import argparse

# 创建解析器
parser = argparse.ArgumentParser(description="示例程序")

# 添加参数
parser.add_argument("name", help="姓名")
parser.add_argument("age", type=int, help="年龄")
parser.add_argument("-v", "--verbose", action="store_true", help="详细输出")
parser.add_argument("-o", "--output", default="output.txt", help="输出文件")

# 解析参数
args = parser.parse_args()

# 使用参数
print(f"姓名:{args.name}")
print(f"年龄:{args.age}")
if args.verbose:
    print("详细模式")
print(f"输出到:{args.output}")

# 运行示例:
# python script.py Alice 25 -v -o result.txt

9.6 参考资料

官方文档

学习资源

工具

相关推荐
玄同7652 天前
Python 异常捕获与处理:从基础语法到工程化实践的万字深度指南
开发语言·人工智能·python·自然语言处理·正则表达式·nlp·知识图谱
zhuzhihongNO12 天前
Java正则表达式持续更新
正则表达式·pattern.dotall·正则表达式贪婪模式·正则表达式惰性模式·java正则表达式
玄同7653 天前
Python 正则表达式:LLM 噪声语料的精准清洗
人工智能·python·自然语言处理·正则表达式·nlp·知识图谱·rag
white-persist3 天前
【内网运维】Netsh 全体系 + Windows 系统专属命令行指令大全
运维·数据结构·windows·python·算法·安全·正则表达式
k***92163 天前
[C++][正则表达式]常用C++正则表达式用法
开发语言·c++·正则表达式
白日做梦Q4 天前
【MySQL】9.吃透关键SQL语法:从正则表达式、窗口函数、条件函数到结果集合并的实战拆解
数据库·sql·mysql·正则表达式
快点好好学习吧4 天前
PHP程序员到底为什么要学习正则表达式?使用场景是什么?底层原理是什么?
学习·正则表达式·php
坐不住的爱码4 天前
表单验证和正则表达式
正则表达式