第三专题:三范式与反范式实战
python
from manim import *
from manim_slides import Slide
# 配置中文字体,请根据系统环境调整
config.tex_template.add_to_preamble(r"\usepackage{ctex}")
config.tex_template.add_to_preamble(r"\usepackage{xcolor}")
class TableDesignOptimization(Slide):
"""PostgreSQL 表结构设计基础规范专题幻灯片"""
def construct(self):
# 统一代码样式配置
code_config = {
"language": "sql",
"formatter_style": "fruity",
"background": "window",
"add_line_numbers": True,
"paragraph_config": {
"font": "Milky Han Mono SC",
"font_size": 20
}
}
# ---------- 标题页 ----------
title = Text("PostgreSQL 表结构设计基础规范", font_size=48, color=BLUE)
subtitle = Text("第三专题:三范式与反范式实战", font_size=36, color=GRAY)
authors = Text("少查·快连·精索·常析·避坑", font_size=28, color=GREEN)
VGroup(title, subtitle, authors).arrange(DOWN, buff=0.5)
self.play(Write(title))
self.play(FadeIn(subtitle, shift=UP))
self.play(FadeIn(authors, shift=UP))
self.wait(1)
self.next_slide()
# 清除当前画面
self.clear()
# ---------- 1. 三范式介绍 ----------
nf_title = Text("1. 数据库三范式基础", font_size=40, color=YELLOW).to_edge(UP)
self.play(Write(nf_title))
# 创建范式说明表格
nf_table = Table(
[["第一范式(1NF)", "属性不可再分", "每个字段原子性,不含数组/JSON"],
["第二范式(2NF)", "消除部分依赖", "非主键列完全依赖主键"],
["第三范式(3NF)", "消除传递依赖", "非主键列不依赖其他非主键列"]],
col_labels=[Text("范式"), Text("核心要求"), Text("通俗解释")],
include_outer_lines=True,
line_config={"stroke_width": 1, "color": GRAY},
element_to_mobject_config={"font_size": 24}
).scale(0.7).shift(UP*1)
self.play(Create(nf_table))
self.wait(2)
self.next_slide()
# ---------- 2. 第一范式示例 ----------
self.clear()
nf1_title = Text("2. 第一范式(1NF):属性不可再分", font_size=40, color=YELLOW).to_edge(UP)
self.play(Write(nf1_title))
# 违反1NF的设计
bad_1nf = Code(
code_string='''
-- 违反1NF:使用数组存储多个值
CREATE TABLE orders_bad (
id SERIAL PRIMARY KEY,
order_date DATE,
product_ids INTEGER[], -- 违反原子性,存储多个产品ID
quantities INTEGER[] -- 难以查询和维护
);
-- 查询困难:需要拆解数组
SELECT * FROM orders_bad
WHERE 123 = ANY(product_ids);
''',
**code_config
).scale(0.7).shift(LEFT*3+UP*0.5)
# 符合1NF的设计
good_1nf = Code(
code_string='''
-- 符合1NF:拆分为明细表
CREATE TABLE orders_good (
id SERIAL PRIMARY KEY,
order_date DATE
);
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INT REFERENCES orders_good(id),
product_id INT NOT NULL,
quantity INT NOT NULL
);
-- 查询简单高效
SELECT * FROM order_items
WHERE product_id = 123;
''',
**code_config
).scale(0.7).shift(RIGHT*4+UP*0.5)
self.play(FadeIn(bad_1nf, shift=LEFT))
self.play(FadeIn(good_1nf, shift=RIGHT))
note = Text(
"1NF核心:每个字段只存一个值,避免数组、JSON等复杂类型",
font_size=24, color=BLUE
).to_edge(DOWN)
self.play(Write(note))
self.wait(2)
self.next_slide()
# ---------- 3. 第二范式与第三范式 ----------
self.clear()
nf23_title = Text("3. 第二、三范式:消除依赖", font_size=40, color=YELLOW).to_edge(UP)
self.play(Write(nf23_title))
# 违反2NF/3NF的设计
bad_nf23 = Code(
code_string='''
-- 违反2NF和3NF:大量冗余
CREATE TABLE student_courses_bad (
student_id INT,
course_id INT,
student_name VARCHAR(100), -- 部分依赖:只依赖student_id
course_name VARCHAR(100), -- 部分依赖:只依赖course_id
teacher_name VARCHAR(100), -- 传递依赖:依赖course_name
score DECIMAL,
PRIMARY KEY (student_id, course_id)
);
-- 问题:数据冗余,更新异常
UPDATE student_courses_bad
SET teacher_name = '新老师'
WHERE course_name = '数学'; -- 可能更新多行
''',
**code_config
).scale(0.7).shift(LEFT*3+UP*0.5)
# 符合2NF/3NF的设计
good_nf23 = Code(
code_string='''
-- 符合2NF和3NF:拆分表消除冗余
CREATE TABLE students (
student_id SERIAL PRIMARY KEY,
student_name VARCHAR(100)
);
CREATE TABLE courses (
course_id SERIAL PRIMARY KEY,
course_name VARCHAR(100),
teacher_name VARCHAR(100)
);
CREATE TABLE scores (
student_id INT REFERENCES students(student_id),
course_id INT REFERENCES courses(course_id),
score DECIMAL,
PRIMARY KEY (student_id, course_id)
);
-- 更新一处即可
UPDATE courses SET teacher_name = '新老师'
WHERE course_name = '数学';''',
**code_config
).scale(0.7).shift(RIGHT*4+UP*0)
self.play(FadeIn(bad_nf23, shift=LEFT))
self.play(FadeIn(good_nf23, shift=RIGHT))
note23 = Text(
"2NF: 消除部分依赖 | 3NF: 消除传递依赖",
font_size=24, color=GREEN
).to_edge(DOWN)
self.play(Write(note23))
self.wait(2)
self.next_slide()
# ---------- 4. 适度反范式 ----------
self.clear()
denorm_title = Text("4. 适度反范式:性能与规范平衡", font_size=40, color=YELLOW).to_edge(UP)
self.play(Write(denorm_title))
# 反范式示例
denorm_code = Code(
code_string='''
-- 场景:电商订单需要频繁查询订单及商品名称
-- 方案1(纯范式):每次查询都需要JOIN
SELECT o.id, o.order_date, p.name, oi.quantity
FROM orders o
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
WHERE o.id = 123;
-- 方案2(反范式):在order_items冗余product_name
CREATE TABLE order_items_denorm (
id SERIAL PRIMARY KEY,
order_id INT REFERENCES orders(id),
product_id INT,
product_name VARCHAR(200), -- 冗余字段,避免JOIN
quantity INT
);
-- 查询:一次扫描,无需JOIN
SELECT product_name, quantity
FROM order_items_denorm
WHERE order_id = 123;
-- 维护策略:在product更新时同步更新order_items_denorm
''',
**code_config
).scale(0.6).shift(UP*0)
self.play(FadeIn(denorm_code, shift=UP))
denorm_note = Text(
"反范式适用场景:读多写少 | 冗余字段更新频率低 | 性能瓶颈在JOIN",
font_size=24, color=BLUE
).to_edge(DOWN)
self.play(Write(denorm_note))
self.wait(2)
self.next_slide()
# ---------- 5. 数据类型选择 ----------
self.clear()
data_type_title = Text("5. 合理选择数据类型", font_size=40, color=YELLOW).to_edge(UP)
self.play(Write(data_type_title))
# 数据类型对比表
type_table = Table(
[["整数类型", "INT (4B)", "SMALLINT (2B)", "BIGINT (8B)"],
["字符类型", "VARCHAR(n)", "TEXT (无限)", "CHAR(n) (定长)"],
["时间类型", "TIMESTAMP (8B)", "DATE (4B)", "TIME (8B)"],
["选择原则", "够用就好", "避免过度设计", "考虑未来扩展"]],
col_labels=[Text("类别"), Text("推荐"), Text("备选"), Text("说明")],
include_outer_lines=True,
line_config={"stroke_width": 1, "color": GRAY},
element_to_mobject_config={"font_size": 22}
).scale(0.6).shift(UP*1)
self.play(Create(type_table))
type_example = Code(
code_string='''
-- 好的数据类型选择
CREATE TABLE users_optimized (
id BIGSERIAL PRIMARY KEY, -- 预计用户量千万级
status SMALLINT DEFAULT 1, -- 状态值有限,用SMALLINT
username VARCHAR(50) NOT NULL, -- 用户名长度有限
bio TEXT, -- 简介可能很长,用TEXT
created_at TIMESTAMPTZ DEFAULT NOW(), -- 带时区时间戳
last_login DATE -- 只需要日期
);
-- 不好的选择
CREATE TABLE users_bad (
id VARCHAR(50) PRIMARY KEY, -- 用数字序列但选字符串
status INT, -- 浪费空间
username TEXT, -- 过度设计
created_at TIMESTAMP -- 丢失时区信息
);''',
**code_config
).scale(0.5).to_edge(DOWN).shift(DOWN*0.5)
self.play(FadeIn(type_example, shift=DOWN))
self.wait(2)
self.next_slide()
# ---------- 6. 主键设计策略 ----------
self.clear()
pk_title = Text("6. 主键必设且合理选择", font_size=40, color=YELLOW).to_edge(UP)
self.play(Write(pk_title))
# 主键对比表格
pk_table = Table(
[["自增INT/BIGINT", "4-8B", "顺序写入, 性能好", "分布式冲突"],
["UUID (v4)", "16B", "全局唯一, 适合分布式", "随机IO, 性能较差"],
["ULID", "16B", "可排序, 全局唯一", "较新, 生态支持有限"],
["业务主键", "不定", "无需额外列", "业务变更困难"]],
col_labels=[Text("主键类型"), Text("大小"), Text("优点"), Text("缺点")],
include_outer_lines=True,
line_config={"stroke_width": 1, "color": GRAY},
element_to_mobject_config={"font_size": 22}
).scale(0.6).shift(UP*1)
self.play(Create(pk_table))
# 主键示例
pk_example = Code(
code_string='''
-- 场景1:单库场景 - 使用自增BIGINT
CREATE TABLE orders_local (
id BIGSERIAL PRIMARY KEY,
order_no VARCHAR(50) UNIQUE NOT NULL, -- 业务唯一键
amount DECIMAL(10,2)
);
-- 场景2:分布式场景 - 使用UUID
CREATE TABLE orders_distributed (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
...
);
-- 场景3:使用ULID(需扩展)
CREATE EXTENSION IF NOT EXISTS pg_ulid;
CREATE TABLE orders_ulid (
id ULID PRIMARY KEY DEFAULT gen_ulid(),
...
);''',
**code_config
).scale(0.5).next_to(pk_table, DOWN, buff=0.3)
self.play(FadeIn(pk_example, shift=UP))
pk_note = Text(
"主键原则:NOT NULL, 唯一, 稳定, 尽可能小",
font_size=24, color=BLUE
).to_edge(DOWN)
self.play(Write(pk_note))
self.wait(2)
self.next_slide()
# ---------- 7. 总结 ----------
self.clear()
summary_title = Text("表结构设计五大法则", font_size=44, color=YELLOW).to_edge(UP)
self.play(Write(summary_title))
rules = VGroup(
Text("1️⃣ 范式基础: 先满足3NF,保证数据一致性", font_size=28),
Text("2️⃣ 适度反范: 性能瓶颈时策略性冗余", font_size=28),
Text("3️⃣ 类型精择: 数据类型够用就好,不浪费空间", font_size=28),
Text("4️⃣ 主键必设: 自增INT/BIGINT或UUID,视场景而定", font_size=28),
Text("5️⃣ 约束完备: NOT NULL, UNIQUE, FOREIGN KEY 保证数据质量", font_size=28),
).arrange(DOWN, aligned_edge=LEFT, buff=0.3).shift(UP*1)
for rule in rules:
self.play(Write(rule, lag_ratio=0.1))
self.wait(0.3)
# 最终原则
final_principle = Text(
"少查、快连、精索、常析、避坑",
font_size=36, color=GREEN
).shift(DOWN*1)
self.play(Write(final_principle))
self.wait(3)