一、项目背景
1.1 痛点分析
数据可视化是汇报的核心环节,但传统方式效率极低:
| 环节 | 手工方式 | 时间 |
|---|---|---|
| 选择图表类型 | 纠结对比 | 20分钟 |
| 调整配色 | 反复尝试 | 1小时 |
| 排版布局 | 手动对齐 | 1小时 |
| 数据标注 | 逐个添加 | 30分钟 |
| 总计 | - | 3小时 |
做12个图表就是36小时,将近一周。
1.2 技术需求
核心需求:
- AI根据数据特征自动推荐图表类型
- 自动配色方案(商务/科技/活力)
- 自动排版布局(多图表对齐)
- 支持静态图表(matplotlib)和交互式图表(pyecharts)
- 一键导出PNG/HTML/PDF
二、技术架构
Excel数据 → 数据分析 → AI推荐 → 图表生成 → 排版导出
↑ ↑ ↑ ↑ ↑
pandas 统计特征 Qwen3 matplotlib reportlab
pyecharts
技术栈:
- **pandas**:数据读取和分析
- **matplotlib**:静态图表生成
- **pyecharts**:交互式图表生成
- **DashScope/Qwen3**:AI智能推荐
- **reportlab**:PDF报告导出
---
## 三、环境准备
### 3.1 安装依赖
```bash
pip install pandas matplotlib pyecharts dashscope openpyxl reportlab
3.2 配置
# config.py
DASHSCOPE_API_KEY = "your-api-key-here"
# 配色方案
COLOR_SCHEMES = {
'business': ['#2C3E50', '#3498DB', '#2ECC71', '#E74C3C', '#F39C12', '#9B59B6'],
'tech': ['#0D1B2A', '#1B2838', '#2196F3', '#00BCD4', '#4CAF50', '#FF5722'],
'vibrant': ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96E6A1', '#DDA0DD', '#F7DC6F']
}
四、核心模块实现
4.1 数据分析模块
自动分析数据特征,为图表推荐提供依据:
import pandas as pd
import numpy as np
class DataAnalyzer:
def __init__(self):
self.features = {}
def analyze(self, df):
"""分析数据特征"""
self.features = {
'row_count': len(df),
'col_count': len(df.columns),
'numeric_cols': list(df.select_dtypes(include=[np.number]).columns),
'category_cols': list(df.select_dtypes(include=['object']).columns),
'datetime_cols': list(df.select_dtypes(include=['datetime64']).columns),
'has_time_series': self._detect_time_series(df),
'has_categories': len(df.select_dtypes(include=['object']).columns) > 0,
'category_counts': {},
'correlations': {}
}
# 分类列的唯一值数量
for col in self.features['category_cols']:
self.features['category_counts'][col] = df[col].nunique()
# 数值列相关性
numeric_df = df.select_dtypes(include=[np.number])
if len(numeric_df.columns) > 1:
self.features['correlations'] = numeric_df.corr().to_dict()
return self.features
def _detect_time_series(self, df):
"""检测是否为时间序列数据"""
for col in df.columns:
try:
pd.to_datetime(df[col])
return True
except (ValueError, TypeError):
continue
return False
def get_summary(self):
"""生成数据摘要(供AI分析)"""
summary = f"""
数据概况:
- 行数:{self.features['row_count']}
- 列数:{self.features['col_count']}
- 数值列:{', '.join(self.features['numeric_cols'])}
- 分类列:{', '.join(self.features['category_cols'])}
- 是否时间序列:{self.features['has_time_series']}
- 分类列唯一值:{self.features['category_counts']}
"""
return summary
4.2 AI图表推荐模块
用Qwen3根据数据特征推荐最佳图表类型:
import dashscope
import json
class ChartRecommender:
def __init__(self, api_key):
dashscope.api_key = api_key
def recommend(self, data_summary, purpose="汇报"):
"""AI推荐图表类型"""
prompt = f"""
你是一位数据可视化专家。根据以下数据特征,推荐最合适的图表组合。
数据摘要:
{data_summary}
用途:{purpose}
请返回JSON格式,包含推荐的图表列表:
{{
"charts": [
{{
"type": "图表类型(line/bar/pie/scatter/radar/heatmap)",
"title": "图表标题",
"x_col": "X轴数据列名",
"y_col": "Y轴数据列名",
"reason": "推荐理由"
}}
],
"color_scheme": "推荐配色方案(business/tech/vibrant)",
"layout": "布局建议(2x2/3x2/1x3)"
}}
规则:
1. 时间序列数据优先用折线图
2. 分类占比用饼图(分类≤6个)
3. 分类对比用柱状图
4. 两个数值变量的关系用散点图
5. 多维度评估用雷达图
6. 最多推荐6个图表
"""
response = dashscope.Generation.call(
model="qwen3-72b",
messages=[{"role": "user", "content": prompt}],
result_format="message"
)
content = response.output.choices[0].message.content
import re
json_match = re.search(r'\{.*\}', content, re.DOTALL)
if json_match:
return json.loads(json_match.group())
return None
4.3 静态图表生成模块(matplotlib)
根据AI推荐自动生成图表:
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
class StaticChartGenerator:
def __init__(self, color_scheme='business'):
self.colors = COLOR_SCHEMES.get(color_scheme, COLOR_SCHEMES['business'])
def generate(self, df, chart_config, output_path="dashboard.png"):
"""根据配置生成图表"""
charts = chart_config['charts']
layout = chart_config.get('layout', '2x2')
rows, cols = map(int, layout.split('x'))
fig, axes = plt.subplots(rows, cols, figsize=(7*cols, 5*rows))
fig.suptitle('数据分析报告', fontsize=18, fontweight='bold')
if rows * cols == 1:
axes = [[axes]]
elif rows == 1 or cols == 1:
axes = [axes] if rows == 1 else [[ax] for ax in axes]
for idx, chart in enumerate(charts):
if idx >= rows * cols:
break
r, c = divmod(idx, cols)
ax = axes[r][c] if isinstance(axes[r], list) else axes[r]
chart_type = chart['type']
title = chart['title']
if chart_type == 'line':
self._draw_line(ax, df, chart)
elif chart_type == 'bar':
self._draw_bar(ax, df, chart)
elif chart_type == 'pie':
self._draw_pie(ax, df, chart)
elif chart_type == 'scatter':
self._draw_scatter(ax, df, chart)
ax.set_title(title, fontsize=13, fontweight='bold', pad=10)
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
print(f"图表已生成:{output_path}")
def _draw_line(self, ax, df, config):
"""折线图"""
x = df[config['x_col']]
y = df[config['y_col']]
ax.plot(x, y, marker='o', color=self.colors[0], linewidth=2)
ax.fill_between(range(len(x)), y, alpha=0.15, color=self.colors[0])
ax.set_xlabel(config['x_col'])
ax.set_ylabel(config['y_col'])
ax.grid(True, alpha=0.3)
# 标注最大值
max_idx = y.idxmax()
ax.annotate(f'{y[max_idx]:.0f}', xy=(max_idx, y[max_idx]),
fontsize=10, color=self.colors[3], fontweight='bold')
def _draw_bar(self, ax, df, config):
"""柱状图"""
if config.get('x_col') in df.columns:
data = df.groupby(config['x_col'])[config['y_col']].sum()
else:
data = df[config['y_col']]
bars = ax.bar(range(len(data)), data.values, color=self.colors[:len(data)])
ax.set_xticks(range(len(data)))
ax.set_xticklabels(data.index, rotation=30, ha='right')
# 数据标签
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:,.0f}', ha='center', va='bottom', fontsize=9)
def _draw_pie(self, ax, df, config):
"""饼图"""
data = df.groupby(config['x_col'])[config['y_col']].sum()
wedges, texts, autotexts = ax.pie(
data.values, labels=data.index, autopct='%1.1f%%',
colors=self.colors[:len(data)], startangle=90,
textprops={'fontsize': 10}
)
for autotext in autotexts:
autotext.set_fontweight('bold')
def _draw_scatter(self, ax, df, config):
"""散点图"""
x = df[config['x_col']]
y = df[config['y_col']]
ax.scatter(x, y, c=self.colors[0], alpha=0.6, s=50, edgecolors='white')
# 趋势线
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
ax.plot(x.sort_values(), p(x.sort_values()),
color=self.colors[3], linestyle='--', alpha=0.8)
ax.set_xlabel(config['x_col'])
ax.set_ylabel(config['y_col'])
ax.grid(True, alpha=0.3)
4.4 交互式图表生成模块(pyecharts)
生成可交互的HTML图表:
from pyecharts.charts import Line, Bar, Pie, Scatter, Grid, Page
from pyecharts import options as opts
class InteractiveChartGenerator:
def __init__(self, color_scheme='business'):
self.colors = COLOR_SCHEMES.get(color_scheme, COLOR_SCHEMES['business'])
def generate_line(self, df, x_col, y_col, title):
"""交互式折线图"""
chart = (
Line()
.add_xaxis(df[x_col].tolist())
.add_yaxis(
y_col, df[y_col].tolist(),
is_smooth=True,
areastyle_opts=opts.AreaStyleOpts(opacity=0.3),
linestyle_opts=opts.LineStyleOpts(width=3)
)
.set_global_opts(
title_opts=opts.TitleOpts(title=title),
tooltip_opts=opts.TooltipOpts(trigger="axis"),
toolbox_opts=opts.ToolboxOpts(is_show=True),
datazoom_opts=opts.DataZoomOpts()
)
.set_colors(self.colors)
)
return chart
def generate_bar(self, df, x_col, y_col, title):
"""交互式柱状图"""
data = df.groupby(x_col)[y_col].sum()
chart = (
Bar()
.add_xaxis(data.index.tolist())
.add_yaxis(
y_col, data.values.tolist(),
itemstyle_opts=opts.ItemStyleOpts(
color=self.colors[0]
)
)
.set_global_opts(
title_opts=opts.TitleOpts(title=title),
tooltip_opts=opts.TooltipOpts(trigger="axis"),
toolbox_opts=opts.ToolboxOpts(is_show=True)
)
)
return chart
def generate_dashboard(self, charts, output_path="dashboard.html"):
"""生成仪表盘"""
page = Page(layout=Page.SimplePageLayout)
for chart in charts:
page.add(chart)
page.render(output_path)
print(f"交互式图表已生成:{output_path}")
五、完整流程整合
def create_report(excel_path, api_key, output_format="static"):
"""生成数据分析报告"""
print("开始生成报告...")
# 1. 读取数据
df = pd.read_excel(excel_path)
print(f"✅ 数据读取完成,{len(df)}行 × {len(df.columns)}列")
# 2. 分析数据特征
analyzer = DataAnalyzer()
features = analyzer.analyze(df)
summary = analyzer.get_summary()
print("✅ 数据分析完成")
# 3. AI推荐图表
recommender = ChartRecommender(api_key)
chart_config = recommender.recommend(summary)
print(f"✅ AI推荐{len(chart_config['charts'])}个图表")
# 4. 生成图表
if output_format == "static":
generator = StaticChartGenerator(chart_config.get('color_scheme', 'business'))
generator.generate(df, chart_config, "report.png")
else:
generator = InteractiveChartGenerator(chart_config.get('color_scheme', 'business'))
charts = []
for config in chart_config['charts']:
if config['type'] == 'line':
charts.append(generator.generate_line(df, config['x_col'], config['y_col'], config['title']))
elif config['type'] == 'bar':
...(truncated)...