结合Django源码详细分析QuerySet的惰性机制。这是Django ORM设计中的一个核心特性。
🎯 QuerySet惰性机制概述
**惰性(Lazy)**意味着QuerySet只有在真正需要数据时才会执行数据库查询,而不是在创建QuerySet时立即执行。
📋 源码层面的惰性实现
1. QuerySet类的核心结构
python
# django/db/models/query.py
class QuerySet:
"""Represent a lazy database lookup for a set of objects."""
def __init__(self, model=None, query=None, using=None, hints=None):
self.model = model
self.query = query or sql.Query(self.model)
self._db = using
self._hints = hints or {}
# 🔑 关键:这些属性控制惰性行为
self._result_cache = None # 结果缓存,None表示未执行查询
self._sticky_filter = False
self._for_write = False
self._prefetch_related_lookups = ()
self._prefetch_done = False
self._known_related_objects = {}
self._iterable_class = ModelIterable
self._fields = None
2. 惰性操作的实现
2.1 链式操作不触发查询
python
# django/db/models/query.py
class QuerySet:
def filter(self, *args, **kwargs):
"""
Return a new QuerySet instance with the args ANDed to the existing set.
"""
# 🔑 关键:返回新的QuerySet,但不执行查询
clone = self._clone() # 克隆当前QuerySet
clone.query.add_q(Q(*args, **kwargs)) # 只是修改查询条件
return clone # 返回新QuerySet,查询尚未执行
def order_by(self, *field_names):
"""Return a new QuerySet instance with the ordering changed."""
# 🔑 同样只是克隆和修改查询,不执行
clone = self._clone()
clone.query.clear_ordering(force_empty=False)
clone.query.add_ordering(*field_names)
return clone
def select_related(self, *fields):
"""
Return a new QuerySet instance that will select related objects.
"""
# 🔑 只是设置关联查询标志,不执行查询
clone = self._clone()
clone.query.add_select_related(fields)
return clone
def _clone(self):
"""
Return a copy of the current QuerySet that's ready for another
operation.
"""
# 🔑 克隆机制:创建新实例但保持惰性
query = self.query.clone() # 克隆查询对象
# 创建新的QuerySet实例
clone = self.__class__(
model=self.model,
query=query,
using=self._db,
hints=self._hints
)
# 复制所有属性,但结果缓存保持为None(未执行状态)
clone._sticky_filter = self._sticky_filter
clone._for_write = self._for_write
clone._prefetch_related_lookups = self._prefetch_related_lookups[:]
clone._known_related_objects = self._known_related_objects
clone._iterable_class = self._iterable_class
clone._fields = self._fields
return clone
2.2 触发查询的操作
python
# django/db/models/query.py
class QuerySet:
def __iter__(self):
"""
The queryset iterator protocol uses three nested iterators in the
default case:
1. sql.Query evaluation which fills cache
2. python iterator over results
3. python iterator over model instances
"""
# 🔑 关键:迭代时才执行查询
self._fetch_all() # 确保查询已执行
return iter(self._result_cache)
def __len__(self):
"""Return the number of objects in this QuerySet."""
# 🔑 关键:计算长度时执行查询
self._fetch_all() # 执行查询获取所有结果
return len(self._result_cache)
def __getitem__(self, k):
"""Retrieve an item or slice from the set of results."""
if not isinstance(k, (int, slice)):
raise TypeError(...)
# 🔑 关键:索引访问时的惰性处理
if isinstance(k, slice):
# 切片操作:克隆QuerySet并添加LIMIT/OFFSET
clone = self._clone()
clone.query.set_limits(k.start, k.stop)
clone.query.clear_ordering(force_empty=True)
return clone # 仍然是惰性的!
else:
# 单个索引访问:立即执行查询
self._fetch_all()
return self._result_cache[k]
def _fetch_all(self):
"""
Fetch all results and cache them.
"""
# 🔑 关键:实际执行数据库查询的方法
if self._result_cache is None:
self._result_cache = list(self._iterable_class(self))
if self._prefetch_related_lookups and not self._prefetch_done:
self._prefetch_related_objects()
3. 查询执行的底层机制
3.1 Iterable类负责实际查询
python
# django/db/models/query.py
class ModelIterable:
"""Iterable that yields model instances."""
def __init__(self, queryset):
self.queryset = queryset
def __iter__(self):
queryset = self.queryset
# 🔑 关键:这里才真正执行SQL查询
compiler = queryset.query.get_compiler(using=queryset.db)
results = compiler.execute_sql(chunked_fetch=self.chunked_fetch,
chunk_size=self.chunk_size)
if not results:
return
# 将数据库结果转换为模型实例
for row in results:
obj = model.from_db(db, init_list, row[model_fields_start:model_fields_end])
yield obj
3.2 SQL编译和执行
python
# django/db/models/sql/compiler.py
class SQLCompiler:
def execute_sql(self, result_type=MULTI, chunked_fetch=False, chunk_size=GET_ITERATOR_CHUNK_SIZE):
"""
Run the query against the database and return the result(s).
"""
# 🔑 关键:编译QuerySet为SQL语句
sql, params = self.as_sql()
if not sql:
raise EmptyResultSet
# 🔑 关键:执行SQL查询
with self.connection.cursor() as cursor:
cursor.execute(sql, params)
if result_type == SINGLE:
try:
return cursor.fetchone()
except IndexError:
return None
elif result_type == MULTI:
return cursor.fetchall()
💡 在Django博客项目中的实际应用
示例1: 惰性链式操作
python
# blog/views.py
from blog.models import Article
def blog_list(request):
# 🔑 以下操作都是惰性的,没有执行数据库查询
articles = Article.objects.all() # QuerySet创建,未查询
articles = articles.filter(status='published') # 添加过滤条件,未查询
articles = articles.select_related('author') # 添加关联查询,未查询
articles = articles.order_by('-created_time') # 添加排序,未查询
articles = articles.prefetch_related('tags') # 添加预获取,未查询
# 📊 到这里为止,仍然没有执行任何数据库查询!
print(f"QuerySet对象: {articles}") # <QuerySet [...]> 但实际上还没查询
# 🔥 只有在这里才真正执行数据库查询
for article in articles: # 迭代触发查询
print(article.title)
# 🔄 再次迭代不会重复查询,使用缓存结果
for article in articles: # 使用缓存,不再查询数据库
print(article.author.username)
示例2: 不同操作触发查询的时机
python
# blog/services.py
def demonstrate_lazy_evaluation():
print("🔍 演示QuerySet的惰性求值")
# 1. 惰性操作 - 不触发查询
print("\n1. 创建QuerySet(惰性):")
qs = Article.objects.filter(status='published')
print(f" QuerySet已创建: {type(qs)}")
# 2. 继续惰性操作
print("\n2. 链式操作(仍然惰性):")
qs = qs.order_by('-created_time')
qs = qs.select_related('author')
print(f" 添加了排序和关联查询,但还未执行")
# 3. 触发查询的操作
print("\n3. 触发查询的操作:")
# 3a. 迭代触发查询
print(" 迭代(触发查询):")
for article in qs[:5]: # 只获取前5条
print(f" - {article.title}")
# 3b. len()触发查询
print(f" 长度计算(触发查询): {len(qs)}")
# 3c. bool()触发查询
print(f" 布尔检查(触发查询): {bool(qs)}")
# 3d. list()触发查询
article_list = list(qs)
print(f" 转换为列表(触发查询): {len(article_list)} 篇文章")
示例3: 切片操作的惰性特性
python
# blog/views.py
def pagination_example(request):
# 🔑 切片操作仍然是惰性的
all_articles = Article.objects.filter(status='published')
# 获取第11-20条记录(仍然惰性)
page2_articles = all_articles[10:20] # 这里不会执行查询!
# 添加排序(仍然惰性)
page2_articles = page2_articles.order_by('-created_time')
# 🔥 只有在这里才执行查询,生成的SQL包含 LIMIT 10 OFFSET 10
for article in page2_articles:
print(article.title)
🔧 惰性机制的调试
调试脚本:观察查询执行时机
python
# debug_queryset_lazy.py
import os
import django
from django.db import connection
from django.db import reset_queries
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'djangoblog.settings')
django.setup()
from blog.models import Article
def debug_lazy_queryset():
print("🔍 调试QuerySet惰性执行")
print("=" * 60)
# 重置查询计数
reset_queries()
print("1. 创建QuerySet")
articles = Article.objects.all()
print(f" 查询次数: {len(connection.queries)}")
print(f" QuerySet类型: {type(articles)}")
print("\n2. 添加过滤条件")
articles = articles.filter(status='published')
print(f" 查询次数: {len(connection.queries)}")
print("\n3. 添加排序")
articles = articles.order_by('-created_time')
print(f" 查询次数: {len(connection.queries)}")
print("\n4. 添加关联查询")
articles = articles.select_related('author')
print(f" 查询次数: {len(connection.queries)}")
print("\n5. 切片操作")
first_five = articles[:5]
print(f" 查询次数: {len(connection.queries)}")
print(f" 切片结果类型: {type(first_five)}")
print("\n6. 🔥 第一次迭代(触发查询)")
for i, article in enumerate(first_five):
if i == 0: # 只显示第一次迭代的查询
print(f" 查询次数: {len(connection.queries)}")
print(f" 执行的SQL: {connection.queries[-1]['sql'][:100]}...")
print(f" {i+1}. {article.title}")
print(f"\n 总查询次数: {len(connection.queries)}")
print("\n7. 🔄 第二次迭代(使用缓存)")
pre_count = len(connection.queries)
for article in first_five:
pass
print(f" 新增查询次数: {len(connection.queries) - pre_count}")
print(" ✅ 使用了缓存,没有重新查询")
if __name__ == '__main__':
debug_lazy_queryset()
🎯 惰性机制的性能优势
1. 避免不必要的数据库访问
python
def performance_example():
# ❌ 非惰性的话,每次操作都会查询数据库
# articles = Article.objects.all() # 查询1:获取所有文章
# filtered = articles.filter(status='pub') # 查询2:再次查询过滤
# ordered = filtered.order_by('-date') # 查询3:再次查询排序
# ✅ 惰性机制:所有操作合并为一次查询
articles = Article.objects.all().filter(
status='published'
).order_by('-created_time').select_related('author')
# 🔥 只在这里执行一次优化的查询:
# SELECT * FROM blog_article
# INNER JOIN accounts_bloguser ON (blog_article.author_id = accounts_bloguser.id)
# WHERE blog_article.status = 'published'
# ORDER BY blog_article.created_time DESC
return list(articles[:10])
2. 查询优化
python
# django/db/models/sql/query.py
class Query:
def add_q(self, q_object):
"""
Add a Q-object to the current filter.
"""
# 🔑 查询条件会被合并优化,而不是执行多次查询
clause, _ = self._add_q(
q_object,
self.used_aliases,
branch_negated=False,
current_negated=False,
allow_joins=True,
split_subq=True,
)
if clause:
self.where.add(clause, AND) # 合并到WHERE条件中
def add_ordering(self, *ordering):
"""
Add items from the 'ordering' sequence to the query's "order by"
clause.
"""
# 🔑 排序条件也会被合并,生成最优SQL
errors = []
for item in ordering:
# ... 处理排序字段
self.order_by.append(OrderBy(col, order))
📊 惰性机制的内存管理
结果缓存策略
python
# django/db/models/query.py
class QuerySet:
def _fetch_all(self):
if self._result_cache is None:
# 🔑 第一次查询:执行并缓存结果
self._result_cache = list(self._iterable_class(self))
if self._prefetch_related_lookups and not self._prefetch_done:
# 🔑 处理预取关联对象
self._prefetch_related_objects()
def _prefetch_related_objects(self):
# 预取相关对象,避免N+1查询问题
prefetch_related_objects(self._result_cache, *self._prefetch_related_lookups)
self._prefetch_done = True
大数据集的处理
python
def handle_large_dataset():
# ✅ 使用iterator()避免缓存大量数据
for article in Article.objects.filter(status='published').iterator():
process_article(article) # 逐个处理,不占用大量内存
# ✅ 使用chunk_size控制批次大小
for article in Article.objects.filter(status='published').iterator(chunk_size=1000):
process_article(article)
📝 总结
QuerySet惰性机制的核心特征:
- 🔄 延迟执行 - 只有在真正需要数据时才查询数据库
- ⛓️ 链式操作 - 多个操作合并为单次优化查询
- 💾 结果缓存 - 查询结果被缓存,避免重复查询
- 🎯 查询优化 - Django自动优化生成的SQL语句
- 📈 性能提升 - 减少数据库访问次数和网络开销
触发查询的操作:
for obj in queryset:
(迭代)len(queryset)
(长度计算)list(queryset)
(转换为列表)bool(queryset)
(布尔检查)queryset[0]
(索引访问)str(queryset)
(字符串表示)
保持惰性的操作:
filter()
,exclude()
,order_by()
select_related()
,prefetch_related()
values()
,values_list()
,distinct()
queryset[1:5]
(切片操作)
这种惰性设计让Django ORM既灵活又高效,是现代Web框架中优秀的设计模式!🚀