python操作数据库

5.1 数据库连接基础

概念:Python 数据库连接

Python 通过数据库驱动连接数据库。MySQL 常用驱动有 pymysql(同步)和 aiomysql(异步),PostgreSQL 常用 psycopg2(同步)和 asyncpg(异步)。

pymysql 连接 MySQL

概念:pymysql 是纯 Python 实现的 MySQL 驱动,无需编译,适合所有平台。使用 with 语句自动管理连接的开启和关闭。

bash 复制代码
pip install pymysql
python 复制代码
import pymysql

# 创建连接
connection = pymysql.connect(
    host="localhost",
    port=3306,
    user="root",
    password="your_password",
    database="myapp",
    charset="utf8mb4"
)

# 使用 with 自动管理连接
with connection:
    with connection.cursor() as cursor:
        # 执行查询
        cursor.execute("SELECT * FROM users WHERE id = %s", (1,))
        result = cursor.fetchone()
        print(result)

    # 增删改需要提交
    with connection.cursor() as cursor:
        cursor.execute("INSERT INTO users (username, email) VALUES (%s, %s)", ("alice", "alice@example.com"))
        connection.commit()

psycopg2 连接 PostgreSQL

概念:psycopg2 是 PostgreSQL 的主流 Python 驱动,性能高,支持预处理语句和参数化查询。使用方式与 pymysql 类似。

bash 复制代码
pip install psycopg2-binary
python 复制代码
import psycopg2

connection = psycopg2.connect(
    host="localhost",
    port=5432,
    user="postgres",
    password="your_password",
    database="myapp"
)

with connection:
    with connection.cursor() as cursor:
        cursor.execute("SELECT * FROM users WHERE id = %s", (1,))
        result = cursor.fetchone()
        print(result)

连接参数说明

概念:连接参数用于指定数据库服务器的位置、认证信息和默认数据库。正确配置参数是成功连接数据库的前提。

参数 说明 示例
host 数据库地址 localhost / 127.0.0.1
port 端口号 3306 (MySQL) / 5432 (PostgreSQL)
user 用户名 root / postgres
password 密码 your_password
database 数据库名 myapp
charset 字符集 utf8mb4 (MySQL)

5.2 增删改操作

概念:增删改 (INSERT/UPDATE/DELETE)

增删改操作会修改数据库内容,需要调用 connection.commit() 提交事务。

INSERT 插入数据

概念:INSERT 用于向表中添加新记录。cursor.lastrowid 可以获取自增主键的值,executemany 可以批量插入多条数据。

python 复制代码
import pymysql

connection = pymysql.connect(host="localhost", user="root", password="pwd", database="myapp")

with connection:
    with connection.cursor() as cursor:
        # 插入单条
        sql = "INSERT INTO users (username, email, age) VALUES (%s, %s, %s)"
        cursor.execute(sql, ("alice", "alice@example.com", 25))
        connection.commit()
        print(f"插入ID: {cursor.lastrowid}")

    with connection.cursor() as cursor:
        # 批量插入
        sql = "INSERT INTO users (username, email) VALUES (%s, %s)"
        data = [
            ("bob", "bob@example.com"),
            ("charlie", "charlie@example.com"),
            ("dave", "dave@example.com")
        ]
        cursor.executemany(sql, data)
        connection.commit()
        print(f"批量插入 {cursor.rowcount} 条")

UPDATE 更新数据

概念:UPDATE 用于修改表中已存在的记录。cursor.rowcount 返回受影响的行数,可用于判断更新是否成功。

python 复制代码
with connection.cursor() as cursor:
    # 更新单条
    sql = "UPDATE users SET age = %s WHERE username = %s"
    cursor.execute(sql, (26, "alice"))
    connection.commit()
    print(f"更新了 {cursor.rowcount} 条")

with connection.cursor() as cursor:
    # 批量更新
    sql = "UPDATE users SET is_active = %s WHERE created_at < %s"
    cursor.execute(sql, (False, "2024-01-01"))
    connection.commit()
    print(f"更新了 {cursor.rowcount} 条")

DELETE 删除数据

概念:DELETE 用于删除表中符合条件的记录。删除操作要谨慎,通常要带 WHERE 条件避免误删所有数据。

python 复制代码
with connection.cursor() as cursor:
    # 删除单条
    sql = "DELETE FROM users WHERE id = %s"
    cursor.execute(sql, (1,))
    connection.commit()
    print(f"删除了 {cursor.rowcount} 条")

with connection.cursor() as cursor:
    # 批量删除
    sql = "DELETE FROM users WHERE is_active = %s"
    cursor.execute(sql, (False,))
    connection.commit()
    print(f"删除了 {cursor.rowcount} 条")

事务的回滚

概念:事务回滚用于撤销已执行的操作。当发生错误时调用 connection.rollback(),可以回到事务开始前的状态。

python 复制代码
with connection.cursor() as cursor:
    try:
        cursor.execute("INSERT INTO users (username) VALUES ('test')")
        cursor.execute("UPDATE users SET age = 100 WHERE username = 'nonexistent'")
        connection.commit()
    except Exception as e:
        connection.rollback()  # 回滚事务
        print(f"事务回滚: {e}")

5.3 查询操作

概念:查询 (SELECT)

查询操作不会修改数据,不需要 commit。fetchone() 获取一条,fetchall() 获取全部,fetchmany(n) 获取 n 条。

基础查询

概念:基础查询使用 fetch 系列方法获取结果。fetchone() 返回单行(元组或字典),fetchall() 返回所有行,fetchmany(n) 返回 n 行。

python 复制代码
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
    # 查询所有
    cursor.execute("SELECT * FROM users")
    users = cursor.fetchall()
    for user in users:
        print(user)

with connection.cursor() as cursor:
    # 查询单条
    cursor.execute("SELECT * FROM users WHERE id = %s", (1,))
    user = cursor.fetchone()
    print(user)

with connection.cursor() as cursor:
    # 查询多条(限制数量)
    cursor.execute("SELECT * FROM users LIMIT %s", (10,))
    users = cursor.fetchmany(5)
    for user in users:
        print(user)

条件查询

概念:条件查询通过 WHERE 子句筛选数据。支持 AND/OR 组合、LIKE 模糊匹配、IN 列表匹配、BETWEEN 范围查询。

python 复制代码
with connection.cursor() as cursor:
    # AND 条件
    cursor.execute(
        "SELECT * FROM users WHERE age >= %s AND is_active = %s",
        (18, True)
    )
    users = cursor.fetchall()

    # OR 条件
    cursor.execute(
        "SELECT * FROM users WHERE username = %s OR email = %s",
        ("alice", "alice@example.com")
    )
    users = cursor.fetchall()

    # LIKE 模糊查询
    cursor.execute(
        "SELECT * FROM users WHERE username LIKE %s",
        ("a%",)  # 以 a 开头的用户名
    )
    users = cursor.fetchall()

    # IN 查询
    cursor.execute(
        "SELECT * FROM users WHERE id IN (%s, %s, %s)",
        (1, 2, 3)
    )
    users = cursor.fetchall()

    # BETWEEN 范围查询
    cursor.execute(
        "SELECT * FROM users WHERE age BETWEEN %s AND %s",
        (18, 30)
    )
    users = cursor.fetchall()

排序和分页

概念:ORDER BY 用于对结果排序,LIMIT/OFFSET 用于分页。分页计算公式:offset = (page - 1) * page_size。

python 复制代码
with connection.cursor() as cursor:
    # 排序
    cursor.execute("SELECT * FROM users ORDER BY age DESC")
    users = cursor.fetchall()

    cursor.execute("SELECT * FROM users ORDER BY created_at ASC, age DESC")
    users = cursor.fetchall()

    # 分页查询
    page = 2  # 第2页
    page_size = 10  # 每页10条
    offset = (page - 1) * page_size

    cursor.execute(
        "SELECT * FROM users ORDER BY id LIMIT %s OFFSET %s",
        (page_size, offset)
    )
    users = cursor.fetchall()

5.4 复杂查询

概念:复杂查询

复杂查询包括聚合统计、分组、JOIN 连表、子查询等高级查询技巧。

聚合统计

概念:聚合函数对一组值执行计算并返回单一值。COUNT 计数、SUM 求和、AVG 平均、MAX 最大、MIN 最小。

python 复制代码
with connection.cursor() as cursor:
    # COUNT 统计数量
    cursor.execute("SELECT COUNT(*) FROM users")
    count = cursor.fetchone()[0]
    print(f"用户总数: {count}")

    # COUNT 带条件
    cursor.execute("SELECT COUNT(*) FROM users WHERE is_active = %s", (True,))
    active_count = cursor.fetchone()[0]

    # SUM 求和
    cursor.execute("SELECT SUM(price) FROM orders WHERE user_id = %s", (1,))
    total = cursor.fetchone()[0] or 0

    # AVG 平均值
    cursor.execute("SELECT AVG(age) FROM users")
    avg_age = cursor.fetchone()[0]

    # MAX 最大值 / MIN 最小值
    cursor.execute("SELECT MAX(price), MIN(price) FROM products")
    max_price, min_price = cursor.fetchone()

分组查询 GROUP BY

概念:GROUP BY 按一个或多个列分组,配合聚合函数实现分类统计。HAVING 用于过滤分组后的结果。

python 复制代码
with connection.cursor() as cursor:
    # 按分组统计
    sql = """
        SELECT category, COUNT(*) as count, AVG(price) as avg_price
        FROM products
        GROUP BY category
        HAVING COUNT(*) > 5
        ORDER BY count DESC
    """
    cursor.execute(sql)
    results = cursor.fetchall()
    for row in results:
        print(f"分类: {row[0]}, 数量: {row[1]}, 平均价格: {row[2]}")

JOIN 连表查询

概念:JOIN 用于连接多个表获取关联数据。INNER JOIN 只保留两边匹配的记录,LEFT JOIN 保留左边所有记录,多表 JOIN 可连接多个相关表。

python 复制代码
# 表结构: users (id, username), orders (id, user_id, total), order_items (id, order_id, product_id, quantity)

with connection.cursor() as cursor:
    # INNER JOIN - 两表都有的记录
    sql = """
        SELECT u.username, o.id, o.total, o.created_at
        FROM users u
        INNER JOIN orders o ON u.id = o.user_id
        WHERE o.total > 100
        ORDER BY o.created_at DESC
    """
    cursor.execute(sql)
    results = cursor.fetchall()

    # LEFT JOIN - 保留左边所有记录
    sql = """
        SELECT u.username, COUNT(o.id) as order_count, COALESCE(SUM(o.total), 0) as total_spent
        FROM users u
        LEFT JOIN orders o ON u.id = o.user_id
        GROUP BY u.id, u.username
        HAVING COUNT(o.id) > 0
    """
    cursor.execute(sql)
    results = cursor.fetchall()

    # 多表 JOIN
    sql = """
        SELECT u.username, o.id as order_id, p.name as product_name, oi.quantity
        FROM orders o
        INNER JOIN users u ON o.user_id = u.id
        INNER JOIN order_items oi ON o.id = oi.order_id
        INNER JOIN products p ON oi.product_id = p.id
        WHERE o.id = %s
    """
    cursor.execute(sql, (1,))
    items = cursor.fetchall()

子查询

概念:子查询是嵌套在另一个查询中的查询。WHERE 型子查询用于条件判断,IN/EXISTS 用于存在性检查,FROM 型子查询作为临时表。

python 复制代码
with connection.cursor() as cursor:
    # WHERE 子查询 - 查询价格高于平均的产品
    sql = """
        SELECT * FROM products
        WHERE price > (SELECT AVG(price) FROM products)
    """
    cursor.execute(sql)
    expensive_products = cursor.fetchall()

    # IN 子查询 - 查询有订单的用户
    sql = """
        SELECT * FROM users
        WHERE id IN (SELECT DISTINCT user_id FROM orders)
    """
    cursor.execute(sql)
    users_with_orders = cursor.fetchall()

    # EXISTS 子查询 - 查询有产品的分类
    sql = """
        SELECT * FROM categories c
        WHERE EXISTS (
            SELECT 1 FROM products p WHERE p.category_id = c.id
        )
    """
    cursor.execute(sql)
    categories_with_products = cursor.fetchall()

    # FROM 子查询 - 分组统计后再过滤
    sql = """
        SELECT * FROM (
            SELECT category_id, COUNT(*) as cnt, AVG(price) as avg_price
            FROM products
            GROUP BY category_id
        ) AS stats
        WHERE cnt > 10
    """
    cursor.execute(sql)
    result = cursor.fetchall()

UNION 合并查询

概念:UNION 合并两个查询的结果集,自动去重;UNION ALL 不去重但性能更好。合并的查询必须有相同的列数和数据类型。

python 复制代码
with connection.cursor() as cursor:
    # UNION - 合并结果并去重
    sql = """
        SELECT username FROM users WHERE is_active = 1
        UNION
        SELECT username FROM admin_users
    """
    cursor.execute(sql)
    active_usernames = cursor.fetchall()

    # UNION ALL - 不去重
    sql = """
        SELECT 'user' as type, username FROM users
        UNION ALL
        SELECT 'admin' as type, username FROM admin_users
    """
    cursor.execute(sql)
    all_users = cursor.fetchall()

5.5 数据库连接池

概念:连接池

连接池预先创建一定数量的数据库连接,使用时从池中获取,使用完毕后归还。避免频繁创建销毁连接,提升性能和资源利用率。

pymysqlpool(同步连接池)

概念:pymysqlpool 是基于 pymysql 的同步连接池实现,配合 DBUtils 库使用。适合 Flask/Django 等同步 Web 框架。

bash 复制代码
pip install pymysql
# 或者使用 DBUtils
pip install dbutils
python 复制代码
from dbutils.pooled_db import PooledDB
import pymysql

# 创建连接池
pool = PooledDB(
    creator=pymysql,  # 使用 pymysql
    maxconnections=20,  # 最大连接数
    mincached=5,  # 初始化时创建的空闲连接数
    maxcached=10,  # 最多空闲连接数
    blocking=True,  # 连接用完时是否阻塞等待
    maxusage=None,  # 单个连接最大使用次数
    setsession=[],  # 连接前执行的 SQL 语句
    ping=1,  # 检测连接活性频率
    host="localhost",
    port=3306,
    user="root",
    password="pwd",
    database="myapp",
    charset="utf8mb4"
)

# 使用连接
def query_users():
    conn = pool.connection()  # 从池中获取连接
    try:
        with conn.cursor() as cursor:
            cursor.execute("SELECT * FROM users")
            return cursor.fetchall()
    finally:
        conn.close()  # 归还连接到池中

def insert_user(username, email):
    conn = pool.connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("INSERT INTO users (username, email) VALUES (%s, %s)", (username, email))
            conn.commit()
            return cursor.lastrowid
    finally:
        conn.close()

# 批量操作
def batch_insert_users(users):
    conn = pool.connection()
    try:
        with conn.cursor() as cursor:
            sql = "INSERT INTO users (username, email) VALUES (%s, %s)"
            cursor.executemany(sql, users)
            conn.commit()
    finally:
        conn.close()

aiomysql(异步连接池)

概念:aiomysql 是异步 MySQL 驱动,支持 async/await。适合 FastAPI/Quart 等异步 Web 框架,配合 asyncio.gather 实现高并发。

bash 复制代码
pip install aiomysql
python 复制代码
import asyncio
import aiomysql

async def create_pool():
    pool = await aiomysql.create_pool(
        host="localhost",
        port=3306,
        user="root",
        password="pwd",
        db="myapp",
        minsize=5,
        maxsize=20,
        charset="utf8mb4"
    )
    return pool

async def query_users(pool):
    async with pool.acquire() as conn:
        async with conn.cursor(aiomysql.DictCursor) as cursor:
            await cursor.execute("SELECT * FROM users")
            return await cursor.fetchall()

async def insert_user(pool, username, email):
    async with pool.acquire() as conn:
        async with conn.cursor() as cursor:
            await cursor.execute(
                "INSERT INTO users (username, email) VALUES (%s, %s)",
                (username, email)
            )
            await conn.commit()
            return cursor.lastrowid

async def main():
    pool = await create_pool()
    try:
        # 查询
        users = await query_users(pool)
        print(users)

        # 插入
        user_id = await insert_user(pool, "alice", "alice@example.com")
        print(f"插入ID: {user_id}")

        # 批量查询(并发)
        tasks = [query_users(pool) for _ in range(10)]
        results = await asyncio.gather(*tasks)
    finally:
        pool.close()
        await pool.wait_closed()

asyncio.run(main())

SQLAlchemy 连接池

概念:SQLAlchemy 引擎内置连接池功能,pool_size 控制池大小,max_overflow 控制溢出连接数,pool_pre_ping 检测失效连接。

bash 复制代码
pip install sqlalchemy pymysql
python 复制代码
from sqlalchemy import create_engine, text

# 创建引擎(默认带连接池)
engine = create_engine(
    "mysql+pymysql://user:pwd@localhost/myapp",
    pool_size=10,           # 池中连接数
    max_overflow=20,        # 超出 pool_size 的最大连接数
    pool_recycle=3600,      # 连接回收时间(秒)
    pool_pre_ping=True,     # 使用前检测连接
    echo=False              # 是否打印 SQL
)

# 使用连接
with engine.connect() as conn:
    result = conn.execute(text("SELECT * FROM users"))
    users = result.fetchall()

# 事务操作
with engine.connect() as conn:
    with conn.begin():
        conn.execute(text("INSERT INTO users (username) VALUES (:username)"), {"username": "alice"})

连接池参数说明

概念:根据应用场景和数据库服务器配置,合理设置连接池参数可提高性能和资源利用率。

参数 说明 推荐值
pool_size 池中连接数 5-20
max_overflow 最大扩展连接数 10-30
pool_recycle 连接回收时间(秒) 3600
pool_pre_ping 使用前检测连接 True
minsize 最小连接数 2-5
maxsize 最大连接数 10-50

连接池使用场景

python 复制代码
# 场景1: Web 应用(请求结束归还连接)
pool = PooledDB(creator=pymysql, maxconnections=20, ...)

def handle_request(user_id):
    conn = pool.connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
            return cursor.fetchone()
    finally:
        conn.close()

# 场景2: 定时任务(批量处理)
def daily_report():
    conn = pool.connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("SELECT COUNT(*) FROM orders WHERE DATE(created_at) = CURDATE()")
            return cursor.fetchone()
    finally:
        conn.close()

# 场景3: 异步 Web(使用 aiomysql 池)
async def async_query(pool):
    async with pool.acquire() as conn:
        async with conn.cursor() as cursor:
            await cursor.execute("SELECT * FROM users")
            return await cursor.fetchall()

5.6 ORM 操作

概念:ORM

ORM(Object-Relational Mapping)将数据库表映射为 Python 类,用面向对象方式操作数据库。

SQLAlchemy ORM

概念:SQLAlchemy 是 Python 最强大的 ORM 库,支持完整的 SQL 表达式。declarative_base 创建模型基类,sessionmaker 创建会话工厂。

bash 复制代码
pip install sqlalchemy
python 复制代码
from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship
from datetime import datetime

engine = create_engine("mysql+pymysql://user:pwd@localhost/myapp", pool_pre_ping=True)
Base = declarative_base()
Session = sessionmaker(bind=engine)

# 定义模型
class User(Base):
    __tablename__ = "users"

    id = Column(Integer, primary_key=True, autoincrement=True)
    username = Column(String(50), unique=True, nullable=False)
    email = Column(String(100), unique=True, nullable=False)
    age = Column(Integer, default=0)
    is_active = Column(Boolean, default=True)
    created_at = Column(DateTime, default=datetime.utcnow)

    orders = relationship("Order", back_populates="user")

    def __repr__(self):
        return f"<User {self.username}>"

class Order(Base):
    __tablename__ = "orders"

    id = Column(Integer, primary_key=True)
    user_id = Column(Integer, ForeignKey("users.id"))
    total = Column(Integer, default=0)
    created_at = Column(DateTime, default=datetime.utcnow)

    user = relationship("User", back_populates="orders")

# 创建表
Base.metadata.create_all(engine)

# CRUD 操作
session = Session()
try:
    # CREATE
    user = User(username="alice", email="alice@example.com", age=25)
    session.add(user)
    session.commit()

    # READ
    user = session.query(User).filter_by(username="alice").first()
    users = session.query(User).filter(User.age >= 18).all()

    # UPDATE
    user.age = 26
    session.commit()

    # DELETE
    session.delete(user)
    session.commit()

    # 复杂查询
    users_with_orders = session.query(User).join(Order).filter(Order.total > 100).all()

    # 聚合查询
    from sqlalchemy import func
    stats = session.query(
        User.username,
        func.count(Order.id).label("order_count"),
        func.sum(Order.total).label("total_spent")
    ).join(Order).group_by(User.id).all()

finally:
    session.close()

5.7 数据库事务深入

概念:事务

事务是数据库中一组原子性的操作,确保 ACID 特性:原子性(Atomicity)、一致性(Consistency)、隔离性(Isolation)、持久性(Durability)。

事务隔离级别

概念:隔离级别决定事务间的隔离程度。MySQL 支持 4 种隔离级别:READ UNCOMMITTED、READ COMMITTED、REPEATABLE READ(默认)、SERIALIZABLE。

python 复制代码
import pymysql

connection = pymysql.connect(
    host="localhost",
    user="root",
    password="pwd",
    database="myapp",
    autocommit=False  # 手动控制事务
)

with connection:
    # 设置隔离级别
    with connection.cursor() as cursor:
        cursor.execute("SET TRANSACTION ISOLATION LEVEL READ COMMITTED")

    # 开启事务
    connection.begin()

    try:
        with connection.cursor() as cursor:
            # 转账操作
            cursor.execute("UPDATE accounts SET balance = balance - 100 WHERE user_id = 1")
            cursor.execute("UPDATE accounts SET balance = balance + 100 WHERE user_id = 2")

        # 提交事务
        connection.commit()
    except Exception as e:
        # 回滚事务
        connection.rollback()
        print(f"事务回滚: {e}")

事务隔离级别说明

隔离级别 脏读 不可重复读 幻读
READ UNCOMMITTED 可能 可能 可能
READ COMMITTED 不可能 可能 可能
REPEATABLE READ 不可能 不可能 可能
SERIALIZABLE 不可能 不可能 不可能

Savepoint 保存点

概念:Savepoint 允许在事务中创建中间点,可以只回滚到指定保存点而不是整个事务。适合复杂事务中的部分回滚需求。

python 复制代码
with connection:
    connection.begin()

    try:
        with connection.cursor() as cursor:
            cursor.execute("INSERT INTO users (username) VALUES ('a')")
            savepoint1 = "savepoint1"
            cursor.execute(f"SAVEPOINT {savepoint1}")

            cursor.execute("INSERT INTO users (username) VALUES ('b')")
            # 回滚到保存点
            cursor.execute(f"ROLLBACK TO SAVEPOINT {savepoint1}")

        connection.commit()
    except Exception as e:
        connection.rollback()

5.8 SQL 注入防护

概念:SQL 注入

SQL 注入是一种常见攻击手段,攻击者通过在输入中插入恶意 SQL 代码来操作数据库。防护重点是永远不要拼接 SQL 字符串,始终使用参数化查询。

✅ 正确做法:参数化查询

概念:参数化查询使用占位符(%s)传递参数,驱动程序会自动转义特殊字符,防止 SQL 注入攻击。这是唯一安全的 SQL 编写方式。

python 复制代码
# ✅ 正确:使用参数化查询
with connection.cursor() as cursor:
    username = "alice' OR '1'='1"  # 恶意输入
    cursor.execute("SELECT * FROM users WHERE username = %s", (username,))
    # 参数会被转义,安全!

# ✅ 正确:查询参数
with connection.cursor() as cursor:
    cursor.execute("SELECT * FROM users WHERE age > %s AND is_active = %s", (18, True))

# ✅ 正确:LIKE 模糊查询(需要转义)
with connection.cursor() as cursor:
    keyword = "%abc%"  # 用户输入
    cursor.execute("SELECT * FROM users WHERE username LIKE %s", (keyword,))

❌ 错误做法:字符串拼接

概念:字符串拼接 SQL 是 SQL 注入攻击的根本原因。无论是否信任用户输入,都必须使用参数化查询。

python 复制代码
# ❌ 错误:绝对不要这样做!
with connection.cursor() as cursor:
    username = "alice' OR '1'='1"  # 恶意输入
    cursor.execute(f"SELECT * FROM users WHERE username = '{username}'")
    # 这会导致 SQL 注入攻击!

LIKE 查询防注入

概念:LIKE 通配符(%、_)也可能被攻击者利用。需要在转义普通特殊字符后,再将用户输入作为 LIKE 参数传递。

python 复制代码
# LIKE 查询需要转义特殊字符
import pymysql
import re

def escape_like(value):
    # 转义 LIKE 中的特殊字符:% _ \
    return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")

with connection.cursor() as cursor:
    keyword = "100%"  # 用户输入包含 %
    escaped = escape_like(keyword)
    cursor.execute("SELECT * FROM users WHERE username LIKE %s", (f"%{escaped}%",))

5.9 错误处理与重试

概念:错误处理

数据库操作可能遇到网络中断、连接超时、死锁等错误,需要适当的错误处理和重试机制。

错误类型与处理

概念:数据库错误分为操作性错误(网络中断、超时)和数据库错误(约束冲突、语法错误)。不同错误类型需要不同的处理策略。

python 复制代码
import pymysql
from pymysql.err import OperationalError, InterfaceError, DatabaseError
import time

def get_connection():
    return pymysql.connect(
        host="localhost",
        user="root",
        password="pwd",
        database="myapp",
        charset="utf8mb4"
    )

def query_with_retry(sql, params=None, max_retries=3, retry_delay=1):
    """带重试的查询"""
    for attempt in range(max_retries):
        try:
            connection = get_connection()
            with connection:
                with connection.cursor() as cursor:
                    if params:
                        cursor.execute(sql, params)
                    else:
                        cursor.execute(sql)
                    return cursor.fetchall()
        except (OperationalError, InterfaceError) as e:
            # 网络错误、连接断开
            print(f"尝试 {attempt + 1} 失败: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                raise
        except DatabaseError as e:
            # 数据库错误(如死锁),稍后重试
            print(f"数据库错误: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay * (attempt + 1))  # 递增延迟
            else:
                raise

# 使用
results = query_with_retry("SELECT * FROM users")

连接失效自动重连

概念:数据库连接可能因超时或服务器重启而失效。通过 ping() 检测连接活性,失效时自动重建连接池,保证服务可用性。

python 复制代码
from dbutils.pooled_db import PooledDB
import pymysql

class ReconnectPoolDB:
    def __init__(self, **kwargs):
        self.kwargs = kwargs
        self.pool = None
        self._create_pool()

    def _create_pool(self):
        self.pool = PooledDB(
            creator=pymysql,
            maxconnections=20,
            mincached=5,
            ping=1,  # 使用前检测连接
            **self.kwargs
        )

    def connection(self):
        try:
            conn = self.pool.connection()
            # 测试连接
            conn.ping(reconnect=True)
            return conn
        except Exception:
            # 连接失效,重新创建池
            self._create_pool()
            return self.pool.connection()

# 使用
pool = ReconnectPoolDB(
    host="localhost",
    user="root",
    password="pwd",
    database="myapp"
)

5.10 批量插入优化

概念:批量插入

当需要插入大量数据时,循环单条插入效率很低。优化方式包括 executemany、LOAD DATA INFILE、批量提交。

executemany 批量插入

概念:executemany 是单次 API 调用执行多条 SQL 的高效方式。比循环多次 execute 减少网络往返次数,提升插入性能。

python 复制代码
# 普通批量插入
with connection.cursor() as cursor:
    sql = "INSERT INTO users (username, email) VALUES (%s, %s)"
    data = [
        ("user1", "user1@example.com"),
        ("user2", "user2@example.com"),
        # ... 10000 条
    ]
    cursor.executemany(sql, data)
    connection.commit()

批量插入优化策略

python 复制代码
def batch_insert_optimized(table, columns, values_batch, batch_size=1000):
    """
    大批量插入优化:分批插入 + 事务
    """
    total = len(values_batch)
    with connection.cursor() as cursor:
        sql = f"INSERT INTO {table} ({', '.join(columns)}) VALUES ({', '.join(['%s'] * len(columns))})"

        for i in range(0, total, batch_size):
            batch = values_batch[i:i + batch_size]
            cursor.executemany(sql, batch)
            connection.commit()
            print(f"已插入 {min(i + batch_size, total)}/{total} 条")

# 使用
batch_insert_optimized(
    table="users",
    columns=["username", "email", "age"],
    values_batch=[(f"user{i}", f"user{i}@example.com", i % 100) for i in range(100000)],
    batch_size=5000
)

LOAD DATA INFILE(最快)

概念:LOAD DATA INFILE 是 MySQL 服务器端批量导入命令,数据直接由服务器读取文件,避免了客户端与服务器的大量数据传输,是最快的导入方式。

python 复制代码
# MySQL 的 LOAD DATA INFILE 比 INSERT 快 10-20 倍
def load_data_infile(table, columns, filepath):
    """使用 LOAD DATA INFILE 批量导入"""
    with connection.cursor() as cursor:
        cols = ", ".join(columns)
        sql = f"""
            LOAD DATA LOCAL INFILE '{filepath}'
            INTO TABLE {table}
            FIELDS TERMINATED BY ','
            ENCLOSED BY '"'
            LINES TERMINATED BY '\\n'
            ({cols})
        """
        cursor.execute(sql)
        connection.commit()

# 准备 CSV 文件
# username,email,age
# user1,user1@example.com,25
# user2,user2@example.com,30

异步批量插入(aiomysql)

概念:异步批量插入结合 aiomysql 和 asyncio,适用于 FastAPI 等异步框架的高并发场景,可以批量插入大量数据而不阻塞事件循环。

python 复制代码
import asyncio
import aiomysql

async def batch_insert_async(pool, users):
    """异步批量插入"""
    async with pool.acquire() as conn:
        async with conn.cursor() as cursor:
            sql = "INSERT INTO users (username, email) VALUES (%s, %s)"
            # aiomysql 支持 executemany
            await cursor.executemany(sql, users)
            await conn.commit()

async def main():
    pool = await aiomysql.create_pool(host="localhost", user="root", password="pwd", db="myapp")
    try:
        users = [(f"user{i}", f"user{i}@example.com") for i in range(10000)]
        await batch_insert_async(pool, users)
    finally:
        pool.close()
        await pool.wait_closed()

asyncio.run(main())

5.11 分页深入

概念:分页

分页有两种方式:OFFSET 分页(传统)和游标分页(性能更好)。大数据量时推荐游标分页。

OFFSET 分页(传统)

概念:OFFSET 分页通过 LIMIT offset, count 实现。简单直观,但 offset 过大会导致性能问题(数据库需扫描丢弃的行)。

python 复制代码
# 传统 OFFSET 分页
def paginate_offset(page, page_size):
    offset = (page - 1) * page_size
    with connection.cursor() as cursor:
        # 查询数据
        cursor.execute(
            "SELECT * FROM users ORDER BY id LIMIT %s OFFSET %s",
            (page_size, offset)
        )
        data = cursor.fetchall()

        # 查询总数(性能开销大)
        cursor.execute("SELECT COUNT(*) FROM users")
        total = cursor.fetchone()[0]

        return {
            "data": data,
            "page": page,
            "page_size": page_size,
            "total": total,
            "total_pages": (total + page_size - 1) // page_size
        }

游标分页(推荐大数据量)

概念:游标分页基于上一页最后一条的 ID 进行查询,WHERE id > last_id。查询效率稳定,不受数据量影响,适合无限滚动场景。

python 复制代码
# 游标分页:基于上一页最后一条的 ID
def paginate_cursor(last_id, page_size):
    """
    游标分页:性能更好,适合大数据量
    """
    with connection.cursor() as cursor:
        if last_id is None:
            cursor.execute(
                "SELECT * FROM users ORDER BY id LIMIT %s",
                (page_size,)
            )
        else:
            cursor.execute(
                "SELECT * FROM users WHERE id > %s ORDER BY id LIMIT %s",
                (last_id, page_size)
            )
        data = cursor.fetchall()

        # 返回下一页的游标
        next_cursor = data[-1][0] if data else None

        return {
            "data": data,
            "next_cursor": next_cursor,
            "has_more": len(data) == page_size
        }

# 使用
result = paginate_cursor(last_id=None, page_size=10)
while result["has_more"]:
    print(result["data"])
    result = paginate_cursor(last_id=result["next_cursor"], page_size=10)

OFFSET vs 游标分页

概念:两种分页方式各有优劣。OFFSET 适合需要跳页的场景(如点击页码),游标分页适合无限滚动和大数据量性能优化。

特性 OFFSET 分页 游标分页
实现复杂度 简单 稍复杂
大数据量性能 慢(OFFSET 越大越慢) 快(恒定时间)
支持跳页 支持 不支持
适用场景 小数据量、需跳页 大数据量、顺序浏览

5.12 数据库迁移

概念:数据库迁移

数据库迁移用于管理数据库结构的变更(创建表、修改字段等),确保多环境数据库结构一致。Alembic 是 SQLAlchemy 推荐的迁移工具。

Alembic 迁移

概念:Alembic 是 SQLAlchemy 官方推荐的数据库迁移工具,通过版本化的迁移脚本管理数据库结构变更,支持升级、降级和版本历史。

bash 复制代码
pip install alembic
bash 复制代码
# 初始化
alembic init alembic

# 生成迁移文件
alembic revision --autogenerate -m "Add users table"

# 升级
alembic upgrade head

# 降级
alembic downgrade -1

# 查看历史
alembic history

迁移文件示例

python 复制代码
# alembic/versions/xxxx_add_users.py
from alembic import op
import sqlalchemy as sa

def upgrade():
    op.create_table(
        'users',
        sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('username', sa.String(50), nullable=False),
        sa.Column('email', sa.String(100), nullable=False),
        sa.PrimaryKeyConstraint('id'),
        sa.UniqueConstraint('username'),
        sa.UniqueConstraint('email')
    )

def downgrade():
    op.drop_table('users')

Django 迁移

概念:Django 自带 ORM 和迁移系统,makemigrations 自动生成迁移文件,migrate 执行变更。适合 Django 全栈项目快速开发。

bash 复制代码
# Django 自带迁移功能
python manage.py makemigrations  # 创建迁移
python manage.py migrate         # 执行迁移
python manage.py showmigrations  # 查看迁移状态

5.13 异步 ORM

概念:异步 ORM

异步 ORM 允许在异步环境中高效操作数据库,避免阻塞。SQLAlchemy 从 1.4 开始支持异步。

SQLAlchemy 异步

概念:SQLAlchemy 异步使用 create_async_engine 创建异步引擎,async_sessionmaker 创建会话工厂,配合 async/await 实现非阻塞数据库操作。

bash 复制代码
pip install sqlalchemy[asyncio] aiosqlite
python 复制代码
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.orm import declarative_base
from sqlalchemy import Column, Integer, String

Base = declarative_base()

class User(Base):
    __tablename__ = "users"
    id = Column(Integer, primary_key=True)
    username = Column(String(50))

# 创建异步引擎
engine = create_async_engine(
    "sqlite+aiosqlite:///myapp.db",
    echo=True
)

# 创建会话工厂
async_session = async_sessionmaker(engine, class_=AsyncSession)

# CRUD 操作
async def crud_operations():
    async with async_session() as session:
        # CREATE
        user = User(username="alice")
        session.add(user)
        await session.commit()

        # READ
        result = await session.execute(
            select(User).where(User.username == "alice")
        )
        user = result.scalar_one_or_none()

        # UPDATE
        user.username = "alice_new"
        await session.commit()

        # DELETE
        await session.delete(user)
        await session.commit()

asyncio.run(crud_operations())

5.14 实战案例:用户管理系统

完整 CRUD 示例

概念:用户管理系统演示了使用连接池 + contextmanager 的最佳实践,实现用户的注册、登录、信息更新、积分累计等完整业务功能。

python 复制代码
import pymysql
from dbutils.pooled_db import PooledDB
from contextlib import contextmanager

# 连接池
pool = PooledDB(
    creator=pymysql,
    maxconnections=20,
    mincached=5,
    host="localhost",
    user="root",
    password="pwd",
    database="myapp",
    charset="utf8mb4"
)

@contextmanager
def get_connection():
    conn = pool.connection()
    try:
        yield conn
    finally:
        conn.close()

class UserService:
    @staticmethod
    def create(username, email, age=0):
        with get_connection() as conn:
            with conn.cursor() as cursor:
                sql = "INSERT INTO users (username, email, age) VALUES (%s, %s, %s)"
                cursor.execute(sql, (username, email, age))
                conn.commit()
                return cursor.lastrowid

    @staticmethod
    def get_by_id(user_id):
        with get_connection() as conn:
            with conn.cursor(pymysql.cursors.DictCursor) as cursor:
                cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
                return cursor.fetchone()

    @staticmethod
    def get_by_username(username):
        with get_connection() as conn:
            with conn.cursor(pymysql.cursors.DictCursor) as cursor:
                cursor.execute("SELECT * FROM users WHERE username = %s", (username,))
                return cursor.fetchone()

    @staticmethod
    def list_all(page=1, page_size=10):
        offset = (page - 1) * page_size
        with get_connection() as conn:
            with conn.cursor(pymysql.cursors.DictCursor) as cursor:
                cursor.execute("SELECT * FROM users ORDER BY id LIMIT %s OFFSET %s", (page_size, offset))
                return cursor.fetchall()

    @staticmethod
    def update(user_id, **kwargs):
        if not kwargs:
            return False
        fields = ", ".join([f"{k} = %s" for k in kwargs.keys()])
        values = list(kwargs.values()) + [user_id]
        with get_connection() as conn:
            with conn.cursor() as cursor:
                sql = f"UPDATE users SET {fields} WHERE id = %s"
                cursor.execute(sql, values)
                conn.commit()
                return cursor.rowcount > 0

    @staticmethod
    def delete(user_id):
        with get_connection() as conn:
            with conn.cursor() as cursor:
                cursor.execute("DELETE FROM users WHERE id = %s", (user_id,))
                conn.commit()
                return cursor.rowcount > 0

    @staticmethod
    def search(keyword, page=1, page_size=10):
        offset = (page - 1) * page_size
        with get_connection() as conn:
            with conn.cursor(pymysql.cursors.DictCursor) as cursor:
                sql = """
                    SELECT * FROM users
                    WHERE username LIKE %s OR email LIKE %s
                    ORDER BY id LIMIT %s OFFSET %s
                """
                cursor.execute(sql, (f"%{keyword}%", f"%{keyword}%", page_size, offset))
                return cursor.fetchall()

    @staticmethod
    def batch_create(users_data):
        with get_connection() as conn:
            with conn.cursor() as cursor:
                sql = "INSERT INTO users (username, email, age) VALUES (%s, %s, %s)"
                cursor.executemany(sql, users_data)
                conn.commit()
                return cursor.rowcount

# 使用示例
if __name__ == "__main__":
    # 创建用户
    user_id = UserService.create("alice", "alice@example.com", 25)
    print(f"创建用户 ID: {user_id}")

    # 查询用户
    user = UserService.get_by_id(1)
    print(f"用户: {user}")

    # 更新用户
    UserService.update(1, age=26, is_active=True)

    # 分页查询
    users = UserService.list_all(page=1, page_size=10)
    for u in users:
        print(u)

    # 搜索
    results = UserService.search("alice")
    print(f"搜索结果: {results}")

    # 批量创建
    UserService.batch_create([
        ("bob", "bob@example.com", 30),
        ("charlie", "charlie@example.com", 28)
    ])

    # 删除用户
    UserService.delete(1)

相关推荐
被摘下的星星2 小时前
MySQL 别名使用规则详解
数据库·mysql
zhangzeyuaaa2 小时前
Python变量的四种作用域
开发语言·python
范纹杉想快点毕业2 小时前
C语言全能实战教程
c语言·开发语言
墨着染霜华2 小时前
MySQL 重复数据删除语句
数据库·mysql
ego.iblacat2 小时前
PostgreSQL 数据库
数据库·postgresql
Hommy882 小时前
【开源剪映小助手-客户端】桌面客户端
python·开源·node.js·github·剪映小助手
大空大地20262 小时前
程序调试与异常处理
开发语言
二等饼干~za8986683 小时前
源码可控:云罗 GEO 源头工厂,开源搭建 + 二次开发全链路解决方案
服务器·开发语言·开源·php·音视频·ai-native
2501_921649493 小时前
2026个人量化交易免费数据API接入:从选型到实操
经验分享·python·金融·api·个人开发·量化交易