5.1 数据库连接基础
概念:Python 数据库连接
Python 通过数据库驱动连接数据库。MySQL 常用驱动有 pymysql(同步)和 aiomysql(异步),PostgreSQL 常用 psycopg2(同步)和 asyncpg(异步)。
pymysql 连接 MySQL
概念:pymysql 是纯 Python 实现的 MySQL 驱动,无需编译,适合所有平台。使用 with 语句自动管理连接的开启和关闭。
bash
pip install pymysql
python
import pymysql
# 创建连接
connection = pymysql.connect(
host="localhost",
port=3306,
user="root",
password="your_password",
database="myapp",
charset="utf8mb4"
)
# 使用 with 自动管理连接
with connection:
with connection.cursor() as cursor:
# 执行查询
cursor.execute("SELECT * FROM users WHERE id = %s", (1,))
result = cursor.fetchone()
print(result)
# 增删改需要提交
with connection.cursor() as cursor:
cursor.execute("INSERT INTO users (username, email) VALUES (%s, %s)", ("alice", "alice@example.com"))
connection.commit()
psycopg2 连接 PostgreSQL
概念:psycopg2 是 PostgreSQL 的主流 Python 驱动,性能高,支持预处理语句和参数化查询。使用方式与 pymysql 类似。
bash
pip install psycopg2-binary
python
import psycopg2
connection = psycopg2.connect(
host="localhost",
port=5432,
user="postgres",
password="your_password",
database="myapp"
)
with connection:
with connection.cursor() as cursor:
cursor.execute("SELECT * FROM users WHERE id = %s", (1,))
result = cursor.fetchone()
print(result)
连接参数说明
概念:连接参数用于指定数据库服务器的位置、认证信息和默认数据库。正确配置参数是成功连接数据库的前提。
| 参数 | 说明 | 示例 |
|---|---|---|
| host | 数据库地址 | localhost / 127.0.0.1 |
| port | 端口号 | 3306 (MySQL) / 5432 (PostgreSQL) |
| user | 用户名 | root / postgres |
| password | 密码 | your_password |
| database | 数据库名 | myapp |
| charset | 字符集 | utf8mb4 (MySQL) |
5.2 增删改操作
概念:增删改 (INSERT/UPDATE/DELETE)
增删改操作会修改数据库内容,需要调用 connection.commit() 提交事务。
INSERT 插入数据
概念:INSERT 用于向表中添加新记录。cursor.lastrowid 可以获取自增主键的值,executemany 可以批量插入多条数据。
python
import pymysql
connection = pymysql.connect(host="localhost", user="root", password="pwd", database="myapp")
with connection:
with connection.cursor() as cursor:
# 插入单条
sql = "INSERT INTO users (username, email, age) VALUES (%s, %s, %s)"
cursor.execute(sql, ("alice", "alice@example.com", 25))
connection.commit()
print(f"插入ID: {cursor.lastrowid}")
with connection.cursor() as cursor:
# 批量插入
sql = "INSERT INTO users (username, email) VALUES (%s, %s)"
data = [
("bob", "bob@example.com"),
("charlie", "charlie@example.com"),
("dave", "dave@example.com")
]
cursor.executemany(sql, data)
connection.commit()
print(f"批量插入 {cursor.rowcount} 条")
UPDATE 更新数据
概念:UPDATE 用于修改表中已存在的记录。cursor.rowcount 返回受影响的行数,可用于判断更新是否成功。
python
with connection.cursor() as cursor:
# 更新单条
sql = "UPDATE users SET age = %s WHERE username = %s"
cursor.execute(sql, (26, "alice"))
connection.commit()
print(f"更新了 {cursor.rowcount} 条")
with connection.cursor() as cursor:
# 批量更新
sql = "UPDATE users SET is_active = %s WHERE created_at < %s"
cursor.execute(sql, (False, "2024-01-01"))
connection.commit()
print(f"更新了 {cursor.rowcount} 条")
DELETE 删除数据
概念:DELETE 用于删除表中符合条件的记录。删除操作要谨慎,通常要带 WHERE 条件避免误删所有数据。
python
with connection.cursor() as cursor:
# 删除单条
sql = "DELETE FROM users WHERE id = %s"
cursor.execute(sql, (1,))
connection.commit()
print(f"删除了 {cursor.rowcount} 条")
with connection.cursor() as cursor:
# 批量删除
sql = "DELETE FROM users WHERE is_active = %s"
cursor.execute(sql, (False,))
connection.commit()
print(f"删除了 {cursor.rowcount} 条")
事务的回滚
概念:事务回滚用于撤销已执行的操作。当发生错误时调用 connection.rollback(),可以回到事务开始前的状态。
python
with connection.cursor() as cursor:
try:
cursor.execute("INSERT INTO users (username) VALUES ('test')")
cursor.execute("UPDATE users SET age = 100 WHERE username = 'nonexistent'")
connection.commit()
except Exception as e:
connection.rollback() # 回滚事务
print(f"事务回滚: {e}")
5.3 查询操作
概念:查询 (SELECT)
查询操作不会修改数据,不需要 commit。fetchone() 获取一条,fetchall() 获取全部,fetchmany(n) 获取 n 条。
基础查询
概念:基础查询使用 fetch 系列方法获取结果。fetchone() 返回单行(元组或字典),fetchall() 返回所有行,fetchmany(n) 返回 n 行。
python
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
# 查询所有
cursor.execute("SELECT * FROM users")
users = cursor.fetchall()
for user in users:
print(user)
with connection.cursor() as cursor:
# 查询单条
cursor.execute("SELECT * FROM users WHERE id = %s", (1,))
user = cursor.fetchone()
print(user)
with connection.cursor() as cursor:
# 查询多条(限制数量)
cursor.execute("SELECT * FROM users LIMIT %s", (10,))
users = cursor.fetchmany(5)
for user in users:
print(user)
条件查询
概念:条件查询通过 WHERE 子句筛选数据。支持 AND/OR 组合、LIKE 模糊匹配、IN 列表匹配、BETWEEN 范围查询。
python
with connection.cursor() as cursor:
# AND 条件
cursor.execute(
"SELECT * FROM users WHERE age >= %s AND is_active = %s",
(18, True)
)
users = cursor.fetchall()
# OR 条件
cursor.execute(
"SELECT * FROM users WHERE username = %s OR email = %s",
("alice", "alice@example.com")
)
users = cursor.fetchall()
# LIKE 模糊查询
cursor.execute(
"SELECT * FROM users WHERE username LIKE %s",
("a%",) # 以 a 开头的用户名
)
users = cursor.fetchall()
# IN 查询
cursor.execute(
"SELECT * FROM users WHERE id IN (%s, %s, %s)",
(1, 2, 3)
)
users = cursor.fetchall()
# BETWEEN 范围查询
cursor.execute(
"SELECT * FROM users WHERE age BETWEEN %s AND %s",
(18, 30)
)
users = cursor.fetchall()
排序和分页
概念:ORDER BY 用于对结果排序,LIMIT/OFFSET 用于分页。分页计算公式:offset = (page - 1) * page_size。
python
with connection.cursor() as cursor:
# 排序
cursor.execute("SELECT * FROM users ORDER BY age DESC")
users = cursor.fetchall()
cursor.execute("SELECT * FROM users ORDER BY created_at ASC, age DESC")
users = cursor.fetchall()
# 分页查询
page = 2 # 第2页
page_size = 10 # 每页10条
offset = (page - 1) * page_size
cursor.execute(
"SELECT * FROM users ORDER BY id LIMIT %s OFFSET %s",
(page_size, offset)
)
users = cursor.fetchall()
5.4 复杂查询
概念:复杂查询
复杂查询包括聚合统计、分组、JOIN 连表、子查询等高级查询技巧。
聚合统计
概念:聚合函数对一组值执行计算并返回单一值。COUNT 计数、SUM 求和、AVG 平均、MAX 最大、MIN 最小。
python
with connection.cursor() as cursor:
# COUNT 统计数量
cursor.execute("SELECT COUNT(*) FROM users")
count = cursor.fetchone()[0]
print(f"用户总数: {count}")
# COUNT 带条件
cursor.execute("SELECT COUNT(*) FROM users WHERE is_active = %s", (True,))
active_count = cursor.fetchone()[0]
# SUM 求和
cursor.execute("SELECT SUM(price) FROM orders WHERE user_id = %s", (1,))
total = cursor.fetchone()[0] or 0
# AVG 平均值
cursor.execute("SELECT AVG(age) FROM users")
avg_age = cursor.fetchone()[0]
# MAX 最大值 / MIN 最小值
cursor.execute("SELECT MAX(price), MIN(price) FROM products")
max_price, min_price = cursor.fetchone()
分组查询 GROUP BY
概念:GROUP BY 按一个或多个列分组,配合聚合函数实现分类统计。HAVING 用于过滤分组后的结果。
python
with connection.cursor() as cursor:
# 按分组统计
sql = """
SELECT category, COUNT(*) as count, AVG(price) as avg_price
FROM products
GROUP BY category
HAVING COUNT(*) > 5
ORDER BY count DESC
"""
cursor.execute(sql)
results = cursor.fetchall()
for row in results:
print(f"分类: {row[0]}, 数量: {row[1]}, 平均价格: {row[2]}")
JOIN 连表查询
概念:JOIN 用于连接多个表获取关联数据。INNER JOIN 只保留两边匹配的记录,LEFT JOIN 保留左边所有记录,多表 JOIN 可连接多个相关表。
python
# 表结构: users (id, username), orders (id, user_id, total), order_items (id, order_id, product_id, quantity)
with connection.cursor() as cursor:
# INNER JOIN - 两表都有的记录
sql = """
SELECT u.username, o.id, o.total, o.created_at
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.total > 100
ORDER BY o.created_at DESC
"""
cursor.execute(sql)
results = cursor.fetchall()
# LEFT JOIN - 保留左边所有记录
sql = """
SELECT u.username, COUNT(o.id) as order_count, COALESCE(SUM(o.total), 0) as total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username
HAVING COUNT(o.id) > 0
"""
cursor.execute(sql)
results = cursor.fetchall()
# 多表 JOIN
sql = """
SELECT u.username, o.id as order_id, p.name as product_name, oi.quantity
FROM orders o
INNER JOIN users u ON o.user_id = u.id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE o.id = %s
"""
cursor.execute(sql, (1,))
items = cursor.fetchall()
子查询
概念:子查询是嵌套在另一个查询中的查询。WHERE 型子查询用于条件判断,IN/EXISTS 用于存在性检查,FROM 型子查询作为临时表。
python
with connection.cursor() as cursor:
# WHERE 子查询 - 查询价格高于平均的产品
sql = """
SELECT * FROM products
WHERE price > (SELECT AVG(price) FROM products)
"""
cursor.execute(sql)
expensive_products = cursor.fetchall()
# IN 子查询 - 查询有订单的用户
sql = """
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM orders)
"""
cursor.execute(sql)
users_with_orders = cursor.fetchall()
# EXISTS 子查询 - 查询有产品的分类
sql = """
SELECT * FROM categories c
WHERE EXISTS (
SELECT 1 FROM products p WHERE p.category_id = c.id
)
"""
cursor.execute(sql)
categories_with_products = cursor.fetchall()
# FROM 子查询 - 分组统计后再过滤
sql = """
SELECT * FROM (
SELECT category_id, COUNT(*) as cnt, AVG(price) as avg_price
FROM products
GROUP BY category_id
) AS stats
WHERE cnt > 10
"""
cursor.execute(sql)
result = cursor.fetchall()
UNION 合并查询
概念:UNION 合并两个查询的结果集,自动去重;UNION ALL 不去重但性能更好。合并的查询必须有相同的列数和数据类型。
python
with connection.cursor() as cursor:
# UNION - 合并结果并去重
sql = """
SELECT username FROM users WHERE is_active = 1
UNION
SELECT username FROM admin_users
"""
cursor.execute(sql)
active_usernames = cursor.fetchall()
# UNION ALL - 不去重
sql = """
SELECT 'user' as type, username FROM users
UNION ALL
SELECT 'admin' as type, username FROM admin_users
"""
cursor.execute(sql)
all_users = cursor.fetchall()
5.5 数据库连接池
概念:连接池
连接池预先创建一定数量的数据库连接,使用时从池中获取,使用完毕后归还。避免频繁创建销毁连接,提升性能和资源利用率。
pymysqlpool(同步连接池)
概念:pymysqlpool 是基于 pymysql 的同步连接池实现,配合 DBUtils 库使用。适合 Flask/Django 等同步 Web 框架。
bash
pip install pymysql
# 或者使用 DBUtils
pip install dbutils
python
from dbutils.pooled_db import PooledDB
import pymysql
# 创建连接池
pool = PooledDB(
creator=pymysql, # 使用 pymysql
maxconnections=20, # 最大连接数
mincached=5, # 初始化时创建的空闲连接数
maxcached=10, # 最多空闲连接数
blocking=True, # 连接用完时是否阻塞等待
maxusage=None, # 单个连接最大使用次数
setsession=[], # 连接前执行的 SQL 语句
ping=1, # 检测连接活性频率
host="localhost",
port=3306,
user="root",
password="pwd",
database="myapp",
charset="utf8mb4"
)
# 使用连接
def query_users():
conn = pool.connection() # 从池中获取连接
try:
with conn.cursor() as cursor:
cursor.execute("SELECT * FROM users")
return cursor.fetchall()
finally:
conn.close() # 归还连接到池中
def insert_user(username, email):
conn = pool.connection()
try:
with conn.cursor() as cursor:
cursor.execute("INSERT INTO users (username, email) VALUES (%s, %s)", (username, email))
conn.commit()
return cursor.lastrowid
finally:
conn.close()
# 批量操作
def batch_insert_users(users):
conn = pool.connection()
try:
with conn.cursor() as cursor:
sql = "INSERT INTO users (username, email) VALUES (%s, %s)"
cursor.executemany(sql, users)
conn.commit()
finally:
conn.close()
aiomysql(异步连接池)
概念:aiomysql 是异步 MySQL 驱动,支持 async/await。适合 FastAPI/Quart 等异步 Web 框架,配合 asyncio.gather 实现高并发。
bash
pip install aiomysql
python
import asyncio
import aiomysql
async def create_pool():
pool = await aiomysql.create_pool(
host="localhost",
port=3306,
user="root",
password="pwd",
db="myapp",
minsize=5,
maxsize=20,
charset="utf8mb4"
)
return pool
async def query_users(pool):
async with pool.acquire() as conn:
async with conn.cursor(aiomysql.DictCursor) as cursor:
await cursor.execute("SELECT * FROM users")
return await cursor.fetchall()
async def insert_user(pool, username, email):
async with pool.acquire() as conn:
async with conn.cursor() as cursor:
await cursor.execute(
"INSERT INTO users (username, email) VALUES (%s, %s)",
(username, email)
)
await conn.commit()
return cursor.lastrowid
async def main():
pool = await create_pool()
try:
# 查询
users = await query_users(pool)
print(users)
# 插入
user_id = await insert_user(pool, "alice", "alice@example.com")
print(f"插入ID: {user_id}")
# 批量查询(并发)
tasks = [query_users(pool) for _ in range(10)]
results = await asyncio.gather(*tasks)
finally:
pool.close()
await pool.wait_closed()
asyncio.run(main())
SQLAlchemy 连接池
概念:SQLAlchemy 引擎内置连接池功能,pool_size 控制池大小,max_overflow 控制溢出连接数,pool_pre_ping 检测失效连接。
bash
pip install sqlalchemy pymysql
python
from sqlalchemy import create_engine, text
# 创建引擎(默认带连接池)
engine = create_engine(
"mysql+pymysql://user:pwd@localhost/myapp",
pool_size=10, # 池中连接数
max_overflow=20, # 超出 pool_size 的最大连接数
pool_recycle=3600, # 连接回收时间(秒)
pool_pre_ping=True, # 使用前检测连接
echo=False # 是否打印 SQL
)
# 使用连接
with engine.connect() as conn:
result = conn.execute(text("SELECT * FROM users"))
users = result.fetchall()
# 事务操作
with engine.connect() as conn:
with conn.begin():
conn.execute(text("INSERT INTO users (username) VALUES (:username)"), {"username": "alice"})
连接池参数说明
概念:根据应用场景和数据库服务器配置,合理设置连接池参数可提高性能和资源利用率。
| 参数 | 说明 | 推荐值 |
|---|---|---|
| pool_size | 池中连接数 | 5-20 |
| max_overflow | 最大扩展连接数 | 10-30 |
| pool_recycle | 连接回收时间(秒) | 3600 |
| pool_pre_ping | 使用前检测连接 | True |
| minsize | 最小连接数 | 2-5 |
| maxsize | 最大连接数 | 10-50 |
连接池使用场景
python
# 场景1: Web 应用(请求结束归还连接)
pool = PooledDB(creator=pymysql, maxconnections=20, ...)
def handle_request(user_id):
conn = pool.connection()
try:
with conn.cursor() as cursor:
cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
return cursor.fetchone()
finally:
conn.close()
# 场景2: 定时任务(批量处理)
def daily_report():
conn = pool.connection()
try:
with conn.cursor() as cursor:
cursor.execute("SELECT COUNT(*) FROM orders WHERE DATE(created_at) = CURDATE()")
return cursor.fetchone()
finally:
conn.close()
# 场景3: 异步 Web(使用 aiomysql 池)
async def async_query(pool):
async with pool.acquire() as conn:
async with conn.cursor() as cursor:
await cursor.execute("SELECT * FROM users")
return await cursor.fetchall()
5.6 ORM 操作
概念:ORM
ORM(Object-Relational Mapping)将数据库表映射为 Python 类,用面向对象方式操作数据库。
SQLAlchemy ORM
概念:SQLAlchemy 是 Python 最强大的 ORM 库,支持完整的 SQL 表达式。declarative_base 创建模型基类,sessionmaker 创建会话工厂。
bash
pip install sqlalchemy
python
from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship
from datetime import datetime
engine = create_engine("mysql+pymysql://user:pwd@localhost/myapp", pool_pre_ping=True)
Base = declarative_base()
Session = sessionmaker(bind=engine)
# 定义模型
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True, autoincrement=True)
username = Column(String(50), unique=True, nullable=False)
email = Column(String(100), unique=True, nullable=False)
age = Column(Integer, default=0)
is_active = Column(Boolean, default=True)
created_at = Column(DateTime, default=datetime.utcnow)
orders = relationship("Order", back_populates="user")
def __repr__(self):
return f"<User {self.username}>"
class Order(Base):
__tablename__ = "orders"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("users.id"))
total = Column(Integer, default=0)
created_at = Column(DateTime, default=datetime.utcnow)
user = relationship("User", back_populates="orders")
# 创建表
Base.metadata.create_all(engine)
# CRUD 操作
session = Session()
try:
# CREATE
user = User(username="alice", email="alice@example.com", age=25)
session.add(user)
session.commit()
# READ
user = session.query(User).filter_by(username="alice").first()
users = session.query(User).filter(User.age >= 18).all()
# UPDATE
user.age = 26
session.commit()
# DELETE
session.delete(user)
session.commit()
# 复杂查询
users_with_orders = session.query(User).join(Order).filter(Order.total > 100).all()
# 聚合查询
from sqlalchemy import func
stats = session.query(
User.username,
func.count(Order.id).label("order_count"),
func.sum(Order.total).label("total_spent")
).join(Order).group_by(User.id).all()
finally:
session.close()
5.7 数据库事务深入
概念:事务
事务是数据库中一组原子性的操作,确保 ACID 特性:原子性(Atomicity)、一致性(Consistency)、隔离性(Isolation)、持久性(Durability)。
事务隔离级别
概念:隔离级别决定事务间的隔离程度。MySQL 支持 4 种隔离级别:READ UNCOMMITTED、READ COMMITTED、REPEATABLE READ(默认)、SERIALIZABLE。
python
import pymysql
connection = pymysql.connect(
host="localhost",
user="root",
password="pwd",
database="myapp",
autocommit=False # 手动控制事务
)
with connection:
# 设置隔离级别
with connection.cursor() as cursor:
cursor.execute("SET TRANSACTION ISOLATION LEVEL READ COMMITTED")
# 开启事务
connection.begin()
try:
with connection.cursor() as cursor:
# 转账操作
cursor.execute("UPDATE accounts SET balance = balance - 100 WHERE user_id = 1")
cursor.execute("UPDATE accounts SET balance = balance + 100 WHERE user_id = 2")
# 提交事务
connection.commit()
except Exception as e:
# 回滚事务
connection.rollback()
print(f"事务回滚: {e}")
事务隔离级别说明
| 隔离级别 | 脏读 | 不可重复读 | 幻读 |
|---|---|---|---|
| READ UNCOMMITTED | 可能 | 可能 | 可能 |
| READ COMMITTED | 不可能 | 可能 | 可能 |
| REPEATABLE READ | 不可能 | 不可能 | 可能 |
| SERIALIZABLE | 不可能 | 不可能 | 不可能 |
Savepoint 保存点
概念:Savepoint 允许在事务中创建中间点,可以只回滚到指定保存点而不是整个事务。适合复杂事务中的部分回滚需求。
python
with connection:
connection.begin()
try:
with connection.cursor() as cursor:
cursor.execute("INSERT INTO users (username) VALUES ('a')")
savepoint1 = "savepoint1"
cursor.execute(f"SAVEPOINT {savepoint1}")
cursor.execute("INSERT INTO users (username) VALUES ('b')")
# 回滚到保存点
cursor.execute(f"ROLLBACK TO SAVEPOINT {savepoint1}")
connection.commit()
except Exception as e:
connection.rollback()
5.8 SQL 注入防护
概念:SQL 注入
SQL 注入是一种常见攻击手段,攻击者通过在输入中插入恶意 SQL 代码来操作数据库。防护重点是永远不要拼接 SQL 字符串,始终使用参数化查询。
✅ 正确做法:参数化查询
概念:参数化查询使用占位符(%s)传递参数,驱动程序会自动转义特殊字符,防止 SQL 注入攻击。这是唯一安全的 SQL 编写方式。
python
# ✅ 正确:使用参数化查询
with connection.cursor() as cursor:
username = "alice' OR '1'='1" # 恶意输入
cursor.execute("SELECT * FROM users WHERE username = %s", (username,))
# 参数会被转义,安全!
# ✅ 正确:查询参数
with connection.cursor() as cursor:
cursor.execute("SELECT * FROM users WHERE age > %s AND is_active = %s", (18, True))
# ✅ 正确:LIKE 模糊查询(需要转义)
with connection.cursor() as cursor:
keyword = "%abc%" # 用户输入
cursor.execute("SELECT * FROM users WHERE username LIKE %s", (keyword,))
❌ 错误做法:字符串拼接
概念:字符串拼接 SQL 是 SQL 注入攻击的根本原因。无论是否信任用户输入,都必须使用参数化查询。
python
# ❌ 错误:绝对不要这样做!
with connection.cursor() as cursor:
username = "alice' OR '1'='1" # 恶意输入
cursor.execute(f"SELECT * FROM users WHERE username = '{username}'")
# 这会导致 SQL 注入攻击!
LIKE 查询防注入
概念:LIKE 通配符(%、_)也可能被攻击者利用。需要在转义普通特殊字符后,再将用户输入作为 LIKE 参数传递。
python
# LIKE 查询需要转义特殊字符
import pymysql
import re
def escape_like(value):
# 转义 LIKE 中的特殊字符:% _ \
return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
with connection.cursor() as cursor:
keyword = "100%" # 用户输入包含 %
escaped = escape_like(keyword)
cursor.execute("SELECT * FROM users WHERE username LIKE %s", (f"%{escaped}%",))
5.9 错误处理与重试
概念:错误处理
数据库操作可能遇到网络中断、连接超时、死锁等错误,需要适当的错误处理和重试机制。
错误类型与处理
概念:数据库错误分为操作性错误(网络中断、超时)和数据库错误(约束冲突、语法错误)。不同错误类型需要不同的处理策略。
python
import pymysql
from pymysql.err import OperationalError, InterfaceError, DatabaseError
import time
def get_connection():
return pymysql.connect(
host="localhost",
user="root",
password="pwd",
database="myapp",
charset="utf8mb4"
)
def query_with_retry(sql, params=None, max_retries=3, retry_delay=1):
"""带重试的查询"""
for attempt in range(max_retries):
try:
connection = get_connection()
with connection:
with connection.cursor() as cursor:
if params:
cursor.execute(sql, params)
else:
cursor.execute(sql)
return cursor.fetchall()
except (OperationalError, InterfaceError) as e:
# 网络错误、连接断开
print(f"尝试 {attempt + 1} 失败: {e}")
if attempt < max_retries - 1:
time.sleep(retry_delay)
else:
raise
except DatabaseError as e:
# 数据库错误(如死锁),稍后重试
print(f"数据库错误: {e}")
if attempt < max_retries - 1:
time.sleep(retry_delay * (attempt + 1)) # 递增延迟
else:
raise
# 使用
results = query_with_retry("SELECT * FROM users")
连接失效自动重连
概念:数据库连接可能因超时或服务器重启而失效。通过 ping() 检测连接活性,失效时自动重建连接池,保证服务可用性。
python
from dbutils.pooled_db import PooledDB
import pymysql
class ReconnectPoolDB:
def __init__(self, **kwargs):
self.kwargs = kwargs
self.pool = None
self._create_pool()
def _create_pool(self):
self.pool = PooledDB(
creator=pymysql,
maxconnections=20,
mincached=5,
ping=1, # 使用前检测连接
**self.kwargs
)
def connection(self):
try:
conn = self.pool.connection()
# 测试连接
conn.ping(reconnect=True)
return conn
except Exception:
# 连接失效,重新创建池
self._create_pool()
return self.pool.connection()
# 使用
pool = ReconnectPoolDB(
host="localhost",
user="root",
password="pwd",
database="myapp"
)
5.10 批量插入优化
概念:批量插入
当需要插入大量数据时,循环单条插入效率很低。优化方式包括 executemany、LOAD DATA INFILE、批量提交。
executemany 批量插入
概念:executemany 是单次 API 调用执行多条 SQL 的高效方式。比循环多次 execute 减少网络往返次数,提升插入性能。
python
# 普通批量插入
with connection.cursor() as cursor:
sql = "INSERT INTO users (username, email) VALUES (%s, %s)"
data = [
("user1", "user1@example.com"),
("user2", "user2@example.com"),
# ... 10000 条
]
cursor.executemany(sql, data)
connection.commit()
批量插入优化策略
python
def batch_insert_optimized(table, columns, values_batch, batch_size=1000):
"""
大批量插入优化:分批插入 + 事务
"""
total = len(values_batch)
with connection.cursor() as cursor:
sql = f"INSERT INTO {table} ({', '.join(columns)}) VALUES ({', '.join(['%s'] * len(columns))})"
for i in range(0, total, batch_size):
batch = values_batch[i:i + batch_size]
cursor.executemany(sql, batch)
connection.commit()
print(f"已插入 {min(i + batch_size, total)}/{total} 条")
# 使用
batch_insert_optimized(
table="users",
columns=["username", "email", "age"],
values_batch=[(f"user{i}", f"user{i}@example.com", i % 100) for i in range(100000)],
batch_size=5000
)
LOAD DATA INFILE(最快)
概念:LOAD DATA INFILE 是 MySQL 服务器端批量导入命令,数据直接由服务器读取文件,避免了客户端与服务器的大量数据传输,是最快的导入方式。
python
# MySQL 的 LOAD DATA INFILE 比 INSERT 快 10-20 倍
def load_data_infile(table, columns, filepath):
"""使用 LOAD DATA INFILE 批量导入"""
with connection.cursor() as cursor:
cols = ", ".join(columns)
sql = f"""
LOAD DATA LOCAL INFILE '{filepath}'
INTO TABLE {table}
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\\n'
({cols})
"""
cursor.execute(sql)
connection.commit()
# 准备 CSV 文件
# username,email,age
# user1,user1@example.com,25
# user2,user2@example.com,30
异步批量插入(aiomysql)
概念:异步批量插入结合 aiomysql 和 asyncio,适用于 FastAPI 等异步框架的高并发场景,可以批量插入大量数据而不阻塞事件循环。
python
import asyncio
import aiomysql
async def batch_insert_async(pool, users):
"""异步批量插入"""
async with pool.acquire() as conn:
async with conn.cursor() as cursor:
sql = "INSERT INTO users (username, email) VALUES (%s, %s)"
# aiomysql 支持 executemany
await cursor.executemany(sql, users)
await conn.commit()
async def main():
pool = await aiomysql.create_pool(host="localhost", user="root", password="pwd", db="myapp")
try:
users = [(f"user{i}", f"user{i}@example.com") for i in range(10000)]
await batch_insert_async(pool, users)
finally:
pool.close()
await pool.wait_closed()
asyncio.run(main())
5.11 分页深入
概念:分页
分页有两种方式:OFFSET 分页(传统)和游标分页(性能更好)。大数据量时推荐游标分页。
OFFSET 分页(传统)
概念:OFFSET 分页通过 LIMIT offset, count 实现。简单直观,但 offset 过大会导致性能问题(数据库需扫描丢弃的行)。
python
# 传统 OFFSET 分页
def paginate_offset(page, page_size):
offset = (page - 1) * page_size
with connection.cursor() as cursor:
# 查询数据
cursor.execute(
"SELECT * FROM users ORDER BY id LIMIT %s OFFSET %s",
(page_size, offset)
)
data = cursor.fetchall()
# 查询总数(性能开销大)
cursor.execute("SELECT COUNT(*) FROM users")
total = cursor.fetchone()[0]
return {
"data": data,
"page": page,
"page_size": page_size,
"total": total,
"total_pages": (total + page_size - 1) // page_size
}
游标分页(推荐大数据量)
概念:游标分页基于上一页最后一条的 ID 进行查询,WHERE id > last_id。查询效率稳定,不受数据量影响,适合无限滚动场景。
python
# 游标分页:基于上一页最后一条的 ID
def paginate_cursor(last_id, page_size):
"""
游标分页:性能更好,适合大数据量
"""
with connection.cursor() as cursor:
if last_id is None:
cursor.execute(
"SELECT * FROM users ORDER BY id LIMIT %s",
(page_size,)
)
else:
cursor.execute(
"SELECT * FROM users WHERE id > %s ORDER BY id LIMIT %s",
(last_id, page_size)
)
data = cursor.fetchall()
# 返回下一页的游标
next_cursor = data[-1][0] if data else None
return {
"data": data,
"next_cursor": next_cursor,
"has_more": len(data) == page_size
}
# 使用
result = paginate_cursor(last_id=None, page_size=10)
while result["has_more"]:
print(result["data"])
result = paginate_cursor(last_id=result["next_cursor"], page_size=10)
OFFSET vs 游标分页
概念:两种分页方式各有优劣。OFFSET 适合需要跳页的场景(如点击页码),游标分页适合无限滚动和大数据量性能优化。
| 特性 | OFFSET 分页 | 游标分页 |
|---|---|---|
| 实现复杂度 | 简单 | 稍复杂 |
| 大数据量性能 | 慢(OFFSET 越大越慢) | 快(恒定时间) |
| 支持跳页 | 支持 | 不支持 |
| 适用场景 | 小数据量、需跳页 | 大数据量、顺序浏览 |
5.12 数据库迁移
概念:数据库迁移
数据库迁移用于管理数据库结构的变更(创建表、修改字段等),确保多环境数据库结构一致。Alembic 是 SQLAlchemy 推荐的迁移工具。
Alembic 迁移
概念:Alembic 是 SQLAlchemy 官方推荐的数据库迁移工具,通过版本化的迁移脚本管理数据库结构变更,支持升级、降级和版本历史。
bash
pip install alembic
bash
# 初始化
alembic init alembic
# 生成迁移文件
alembic revision --autogenerate -m "Add users table"
# 升级
alembic upgrade head
# 降级
alembic downgrade -1
# 查看历史
alembic history
迁移文件示例
python
# alembic/versions/xxxx_add_users.py
from alembic import op
import sqlalchemy as sa
def upgrade():
op.create_table(
'users',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('username', sa.String(50), nullable=False),
sa.Column('email', sa.String(100), nullable=False),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('username'),
sa.UniqueConstraint('email')
)
def downgrade():
op.drop_table('users')
Django 迁移
概念:Django 自带 ORM 和迁移系统,makemigrations 自动生成迁移文件,migrate 执行变更。适合 Django 全栈项目快速开发。
bash
# Django 自带迁移功能
python manage.py makemigrations # 创建迁移
python manage.py migrate # 执行迁移
python manage.py showmigrations # 查看迁移状态
5.13 异步 ORM
概念:异步 ORM
异步 ORM 允许在异步环境中高效操作数据库,避免阻塞。SQLAlchemy 从 1.4 开始支持异步。
SQLAlchemy 异步
概念:SQLAlchemy 异步使用 create_async_engine 创建异步引擎,async_sessionmaker 创建会话工厂,配合 async/await 实现非阻塞数据库操作。
bash
pip install sqlalchemy[asyncio] aiosqlite
python
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.orm import declarative_base
from sqlalchemy import Column, Integer, String
Base = declarative_base()
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
username = Column(String(50))
# 创建异步引擎
engine = create_async_engine(
"sqlite+aiosqlite:///myapp.db",
echo=True
)
# 创建会话工厂
async_session = async_sessionmaker(engine, class_=AsyncSession)
# CRUD 操作
async def crud_operations():
async with async_session() as session:
# CREATE
user = User(username="alice")
session.add(user)
await session.commit()
# READ
result = await session.execute(
select(User).where(User.username == "alice")
)
user = result.scalar_one_or_none()
# UPDATE
user.username = "alice_new"
await session.commit()
# DELETE
await session.delete(user)
await session.commit()
asyncio.run(crud_operations())
5.14 实战案例:用户管理系统
完整 CRUD 示例
概念:用户管理系统演示了使用连接池 + contextmanager 的最佳实践,实现用户的注册、登录、信息更新、积分累计等完整业务功能。
python
import pymysql
from dbutils.pooled_db import PooledDB
from contextlib import contextmanager
# 连接池
pool = PooledDB(
creator=pymysql,
maxconnections=20,
mincached=5,
host="localhost",
user="root",
password="pwd",
database="myapp",
charset="utf8mb4"
)
@contextmanager
def get_connection():
conn = pool.connection()
try:
yield conn
finally:
conn.close()
class UserService:
@staticmethod
def create(username, email, age=0):
with get_connection() as conn:
with conn.cursor() as cursor:
sql = "INSERT INTO users (username, email, age) VALUES (%s, %s, %s)"
cursor.execute(sql, (username, email, age))
conn.commit()
return cursor.lastrowid
@staticmethod
def get_by_id(user_id):
with get_connection() as conn:
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
return cursor.fetchone()
@staticmethod
def get_by_username(username):
with get_connection() as conn:
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute("SELECT * FROM users WHERE username = %s", (username,))
return cursor.fetchone()
@staticmethod
def list_all(page=1, page_size=10):
offset = (page - 1) * page_size
with get_connection() as conn:
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute("SELECT * FROM users ORDER BY id LIMIT %s OFFSET %s", (page_size, offset))
return cursor.fetchall()
@staticmethod
def update(user_id, **kwargs):
if not kwargs:
return False
fields = ", ".join([f"{k} = %s" for k in kwargs.keys()])
values = list(kwargs.values()) + [user_id]
with get_connection() as conn:
with conn.cursor() as cursor:
sql = f"UPDATE users SET {fields} WHERE id = %s"
cursor.execute(sql, values)
conn.commit()
return cursor.rowcount > 0
@staticmethod
def delete(user_id):
with get_connection() as conn:
with conn.cursor() as cursor:
cursor.execute("DELETE FROM users WHERE id = %s", (user_id,))
conn.commit()
return cursor.rowcount > 0
@staticmethod
def search(keyword, page=1, page_size=10):
offset = (page - 1) * page_size
with get_connection() as conn:
with conn.cursor(pymysql.cursors.DictCursor) as cursor:
sql = """
SELECT * FROM users
WHERE username LIKE %s OR email LIKE %s
ORDER BY id LIMIT %s OFFSET %s
"""
cursor.execute(sql, (f"%{keyword}%", f"%{keyword}%", page_size, offset))
return cursor.fetchall()
@staticmethod
def batch_create(users_data):
with get_connection() as conn:
with conn.cursor() as cursor:
sql = "INSERT INTO users (username, email, age) VALUES (%s, %s, %s)"
cursor.executemany(sql, users_data)
conn.commit()
return cursor.rowcount
# 使用示例
if __name__ == "__main__":
# 创建用户
user_id = UserService.create("alice", "alice@example.com", 25)
print(f"创建用户 ID: {user_id}")
# 查询用户
user = UserService.get_by_id(1)
print(f"用户: {user}")
# 更新用户
UserService.update(1, age=26, is_active=True)
# 分页查询
users = UserService.list_all(page=1, page_size=10)
for u in users:
print(u)
# 搜索
results = UserService.search("alice")
print(f"搜索结果: {results}")
# 批量创建
UserService.batch_create([
("bob", "bob@example.com", 30),
("charlie", "charlie@example.com", 28)
])
# 删除用户
UserService.delete(1)