DuckDB执行计划默认是方块形式,复杂的计划很难辨认,它还支持json格式,不过对人类同样不太可读,所以让DeepSeek照着PostgreSQL格式的执行计划编写了一个转换程序,代码如下:
python
import json
def json_to_postgresql_plan(json_str):
# 解析JSON
plan_data = json.loads(json_str)
# 递归处理执行计划节点
def process_node(node, level=0):
indent = " " * (level * 2)
result = []
node_name = node["name"]
extra_info = node.get("extra_info", {})
# 构建节点描述
node_desc = f"{indent}{node_name}"
# 添加额外信息
info_parts = []
# 处理投影信息
if "Projections" in extra_info:
projections = extra_info["Projections"]
if projections:
info_parts.append(f"Projections: {', '.join(projections)}")
# 处理分组信息
if "Groups" in extra_info:
groups = extra_info["Groups"]
if groups:
if isinstance(groups, list):
info_parts.append(f"Group Key: {', '.join(groups)}")
else:
info_parts.append(f"Group Key: {groups}")
# 处理聚合信息
if "Aggregates" in extra_info and extra_info["Aggregates"]:
info_parts.append(f"Aggregates: {extra_info['Aggregates']}")
# 处理预估基数
if "Estimated Cardinality" in extra_info:
cardinality = extra_info["Estimated Cardinality"]
info_parts.append(f"Estimated Rows: {cardinality}")
# 如果有额外信息,添加到节点描述中
if info_parts:
node_desc += f" ({', '.join(info_parts)})"
result.append(node_desc)
# 递归处理子节点
for child in node.get("children", []):
result.extend(process_node(child, level + 1))
return result
# 生成完整的执行计划
plan_lines = ["QUERY PLAN", "-" * 50]
for node in plan_data:
plan_lines.extend(process_node(node))
return "\n".join(plan_lines)
def convert_json_file_to_plan(json_file_path):
"""
从外部JSON文件读取执行计划并转换为PostgreSQL格式
Args:
json_file_path (str): JSON文件路径
Returns:
str: PostgreSQL格式的执行计划
"""
try:
# 读取JSON文件
with open(json_file_path, 'r', encoding='utf-8') as file:
json_content = file.read()
# 转换为PostgreSQL格式
postgresql_plan = json_to_postgresql_plan(json_content)
return postgresql_plan
except FileNotFoundError:
return f"错误:找不到文件 {json_file_path}"
except json.JSONDecodeError as e:
return f"错误:JSON格式不正确 - {e}"
except Exception as e:
return f"错误:处理文件时发生异常 - {e}"
def convert_json_file_to_plan_and_save(json_file_path, output_file_path=None):
"""
从外部JSON文件读取执行计划,转换为PostgreSQL格式,并可选择保存到文件
Args:
json_file_path (str): 输入的JSON文件路径
output_file_path (str, optional): 输出的文本文件路径
Returns:
str: PostgreSQL格式的执行计划
"""
postgresql_plan = convert_json_file_to_plan(json_file_path)
# 输出到控制台
# print(postgresql_plan)
# 如果指定了输出文件,则保存到文件
if output_file_path:
try:
with open(output_file_path, 'w', encoding='utf-8') as file:
file.write(postgresql_plan)
print(f"\n执行计划已保存到: {output_file_path}")
except Exception as e:
print(f"保存文件时出错: {e}")
return postgresql_plan
# 使用示例
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
json_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else None
if output_file:
convert_json_file_to_plan_and_save(json_file, output_file)
else:
plan = convert_json_file_to_plan(json_file)
print(plan)
else:
print("使用方法: python script.py <json文件路径> [输出文件路径]")
使用步骤
1.在DuckDB中生成json格式执行计划
sql
.mode list
.output plan.json
.explain (format json) select count(*) from (values(1),(2))t(a);
.output
.exit
然后手工编辑plan.json文件,去掉开头的非json格式部分。例如
json
[
{
"name": "UNGROUPED_AGGREGATE",
"children": [
{
"name": "PROJECTION",
"children": [
{
"name": "COLUMN_DATA_SCAN",
"children": [],
"extra_info": {
"Estimated Cardinality": "2"
}
}
],
"extra_info": {
"Projections": "42",
"Estimated Cardinality": "2"
}
}
],
"extra_info": {
"Aggregates": "count_star()"
}
}
]
2.使用python执行程序
python convplan.py plan.json planpg.txt
输出的planpg.txt如下
QUERY PLAN
--------------------------------------------------
UNGROUPED_AGGREGATE (Aggregates: count_star())
PROJECTION (Projections: 4, 2, Estimated Rows: 2)
COLUMN_DATA_SCAN (Estimated Rows: 2)
这样就容易阅读多了。左侧是操作名称,括号中是补充信息。而执行过程是从缩进较深的操作逐步到缩进较浅的操作。