01-编程基础与数学基石:Matplotlib & Seaborn

Matplotlib & Seaborn：AI开发者的可视化利器

一、为什么需要数据可视化？

1.1 可视化 vs 纯数字

python 复制代码

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 经典案例：安斯库姆四重奏（Anscombe's Quartet）
# 这四组数据有着几乎完全相同的统计指标，但分布完全不同
anscombe_data = sns.load_dataset('anscombe')

print("安斯库姆四重奏统计指标:")
for dataset in ['I', 'II', 'III', 'IV']:
    subset = anscombe_data[anscombe_data['dataset'] == dataset]
    print(f"\n数据集 {dataset}:")
    print(f"  x均值: {subset['x'].mean():.2f}, x方差: {subset['x'].var():.2f}")
    print(f"  y均值: {subset['y'].mean():.2f}, y方差: {subset['y'].var():.2f}")
    print(f"  相关系数: {subset['x'].corr(subset['y']):.3f}")

# 可视化后才能看出差异
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
colors = ['blue', 'red', 'green', 'purple']

for i, dataset in enumerate(['I', 'II', 'III', 'IV']):
    subset = anscombe_data[anscombe_data['dataset'] == dataset]
    ax = axes[i // 2, i % 2]
    ax.scatter(subset['x'], subset['y'], color=colors[i], s=80, alpha=0.7)
    ax.set_title(f'数据集 {dataset}', fontsize=14)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.grid(True, alpha=0.3)

plt.suptitle('安斯库姆四重奏：统计相同，分布完全不同！', fontsize=16)
plt.tight_layout()
plt.savefig('anscombe_quartet.png', dpi=100, bbox_inches='tight')
plt.show()

print("\n结论：仅靠统计指标无法完全理解数据！可视化揭示真相！")

1.2 可视化的核心价值

复制代码

可视化的作用：
1. 探索性分析 → 发现数据模式、异常值、分布特征
2. 诊断分析 → 检查模型假设、残差分析
3. 沟通展示 → 向他人传达洞察和结果
4. 监控系统 → 实时监控模型性能指标

二、Matplotlib基础：构建块的原理

2.1 Figure和Axes：画布和画纸

python 复制代码

import matplotlib.pyplot as plt
import numpy as np

# 核心概念：
# Figure: 整个画布（可以包含多个子图）
# Axes: 一个坐标系（一张画纸）
# Axis: x轴、y轴

# === 创建Figure和Axes的方式 ===

# 方式1：快速绘图（自动创建Figure和Axes）
plt.figure(figsize=(10, 6))
plt.plot([1, 2, 3, 4], [1, 4, 2, 3])
plt.title('快速绘图')
plt.show()

# 方式2：显式创建（更灵活，推荐）
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
ax.set_title('显式创建')
ax.set_xlabel('X轴')
ax.set_ylabel('Y轴')
plt.show()

# 理解Figure和Axes的关系
fig = plt.figure(figsize=(12, 8))
print(f"Figure类型: {type(fig)}")
print(f"Figure大小: {fig.get_size_inches()}")

ax1 = fig.add_subplot(2, 2, 1)  # 2x2网格，第1个
ax2 = fig.add_subplot(2, 2, 2)  # 2x2网格，第2个
ax3 = fig.add_subplot(2, 2, 3)  # 2x2网格，第3个
ax4 = fig.add_subplot(2, 2, 4)  # 2x2网格，第4个

print(f"Axes类型: {type(ax1)}")
print(f"所有Axes: {fig.axes}")

# 在每个子图上绘图
ax1.plot([0, 1], [0, 1], 'r-')
ax2.plot([0, 1], [0, 1], 'b--')
ax3.plot([0, 1], [0, 1], 'g-.')
ax4.plot([0, 1], [0, 1], 'm:')

ax1.set_title('子图1')
ax2.set_title('子图2')
ax3.set_title('子图3')
ax4.set_title('子图4')

plt.tight_layout()  # 自动调整间距
plt.show()

2.2 折线图：追踪变化趋势

python 复制代码

# 生成示例数据
x = np.linspace(0, 10, 100)
y1 = np.sin(x)
y2 = np.cos(x)
y3 = np.sin(x) * np.exp(-x/5)  # 阻尼振荡
y4 = x ** 2 / 20

# 创建画布
fig, ax = plt.subplots(figsize=(12, 6))

# === 基础折线图 ===
ax.plot(x, y1, label='sin(x)', color='blue', linewidth=2, linestyle='-')
ax.plot(x, y2, label='cos(x)', color='red', linewidth=2, linestyle='--')
ax.plot(x, y3, label='阻尼振荡', color='green', linewidth=2, linestyle='-.')
ax.plot(x, y4, label='二次函数', color='orange', linewidth=2, linestyle=':')

# === 添加装饰 ===
ax.set_title('多种函数曲线对比', fontsize=16, fontweight='bold')
ax.set_xlabel('x 值', fontsize=12)
ax.set_ylabel('y 值', fontsize=12)
ax.legend(loc='upper right', fontsize=10, framealpha=0.9)
ax.grid(True, alpha=0.3, linestyle='--')

# 添加注释
ax.annotate('峰值点', xy=(np.pi/2, 1), xytext=(3, 1.2),
            arrowprops=dict(arrowstyle='->', color='red'),
            fontsize=10)

# 设置坐标轴范围
ax.set_xlim(0, 10)
ax.set_ylim(-1.5, 2)

# 添加背景色
ax.set_facecolor('#f5f5f5')

plt.tight_layout()
plt.savefig('line_plot_example.png', dpi=150, bbox_inches='tight')
plt.show()

# === AI实战：训练损失曲线 ===
np.random.seed(42)
epochs = np.arange(1, 101)

# 模拟训练过程
train_loss = 2.5 / np.sqrt(epochs) + np.random.normal(0, 0.05, 100)
val_loss = 2.8 / np.sqrt(epochs) + np.random.normal(0, 0.08, 100)
train_acc = 0.6 + 0.3 * (1 - np.exp(-epochs/30)) + np.random.normal(0, 0.02, 100)
val_acc = 0.55 + 0.35 * (1 - np.exp(-epochs/25)) + np.random.normal(0, 0.03, 100)

# 创建双轴图
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# 损失曲线
ax1.plot(epochs, train_loss, 'b-', label='训练损失', linewidth=2)
ax1.plot(epochs, val_loss, 'r-', label='验证损失', linewidth=2)
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Loss', fontsize=12)
ax1.set_title('训练过程损失曲线', fontsize=14)
ax1.legend()
ax1.grid(True, alpha=0.3)

# 标记最佳验证损失点
best_epoch = np.argmin(val_loss)
best_loss = val_loss[best_epoch]
ax1.plot(best_epoch, best_loss, 'ro', markersize=10, markeredgecolor='black')
ax1.annotate(f'最佳模型\nEpoch {best_epoch}', 
             xy=(best_epoch, best_loss),
             xytext=(best_epoch+10, best_loss+0.1),
             arrowprops=dict(arrowstyle='->'))

# 准确率曲线
ax2.plot(epochs, train_acc, 'g-', label='训练准确率', linewidth=2)
ax2.plot(epochs, val_acc, 'orange', label='验证准确率', linewidth=2)
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('Accuracy', fontsize=12)
ax2.set_title('训练过程准确率曲线', fontsize=14)
ax2.legend()
ax2.grid(True, alpha=0.3)

# 添加过拟合指示
if val_acc[-20:].mean() < train_acc[-20:].mean() - 0.05:
    ax2.fill_between(epochs[-20:], train_acc[-20:], val_acc[-20:], 
                      alpha=0.3, color='red', label='过拟合区域')

plt.tight_layout()
plt.savefig('training_curves.png', dpi=150, bbox_inches='tight')
plt.show()

2.3 散点图：揭示变量关系

python 复制代码

# === 散点图基础 ===
np.random.seed(42)
n_points = 200

# 生成相关数据
x_corr = np.random.randn(n_points)
y_corr = 2 * x_corr + np.random.randn(n_points) * 0.5

# 生成聚类数据
cluster1 = np.random.randn(50, 2) + [2, 2]
cluster2 = np.random.randn(50, 2) + [-2, -2]
cluster3 = np.random.randn(50, 2) + [2, -2]
cluster4 = np.random.randn(50, 2) + [-2, 2]

# 创建子图
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

# 1. 基本散点图
axes[0, 0].scatter(x_corr, y_corr, alpha=0.6, c='blue', s=50)
axes[0, 0].set_title('正相关关系', fontsize=14)
axes[0, 0].set_xlabel('特征 X')
axes[0, 0].set_ylabel('特征 Y')
axes[0, 0].grid(True, alpha=0.3)

# 添加回归线
z = np.polyfit(x_corr, y_corr, 1)
p = np.poly1d(z)
axes[0, 0].plot(x_corr, p(x_corr), "r--", alpha=0.8, label=f'拟合线: y={z[0]:.2f}x+{z[1]:.2f}')
axes[0, 0].legend()

# 2. 彩色散点图（按类别）
colors = ['red', 'blue', 'green', 'purple']
clusters = [cluster1, cluster2, cluster3, cluster4]
labels = ['Cluster A', 'Cluster B', 'Cluster C', 'Cluster D']

for i, (cluster, label, color) in enumerate(zip(clusters, labels, colors)):
    axes[0, 1].scatter(cluster[:, 0], cluster[:, 1], 
                       c=color, label=label, alpha=0.7, s=60)
axes[0, 1].set_title('聚类分布', fontsize=14)
axes[0, 1].set_xlabel('特征 1')
axes[0, 1].set_ylabel('特征 2')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# 3. 气泡图（点大小表示第三个维度）
size = np.random.rand(n_points) * 100
scatter = axes[1, 0].scatter(x_corr, y_corr, s=size, c=size, 
                              cmap='viridis', alpha=0.6)
axes[1, 0].set_title('气泡图：点大小表示数值', fontsize=14)
axes[1, 0].set_xlabel('特征 X')
axes[1, 0].set_ylabel('特征 Y')
plt.colorbar(scatter, ax=axes[1, 0], label='数值大小')

# 4. 边际分布图（结合直方图）
axes[1, 1].scatter(x_corr, y_corr, alpha=0.5, s=30)
axes[1, 1].set_title('散点图', fontsize=14)
axes[1, 1].set_xlabel('特征 X')
axes[1, 1].set_ylabel('特征 Y')

plt.tight_layout()
plt.savefig('scatter_plots.png', dpi=150, bbox_inches='tight')
plt.show()

# === AI实战：特征相关性分析 ===
# 创建特征数据集
np.random.seed(42)
n_samples = 500

data = pd.DataFrame({
    'feature_1': np.random.randn(n_samples),
    'feature_2': np.random.randn(n_samples),
    'feature_3': np.random.randn(n_samples),
    'feature_4': np.random.randn(n_samples),
    'target': np.random.randn(n_samples)
})

# 构建相关性
data['feature_2'] = 0.7 * data['feature_1'] + 0.3 * data['feature_2']
data['feature_3'] = -0.5 * data['feature_1'] + 0.5 * data['feature_3']
data['target'] = 0.8 * data['feature_1'] + 0.3 * data['feature_2'] - 0.2 * data['feature_3'] + 0.1 * data['feature_4']

# 绘制相关性散点图矩阵
fig, axes = plt.subplots(4, 4, figsize=(14, 12))

features = ['feature_1', 'feature_2', 'feature_3', 'target']

for i, feat_x in enumerate(features):
    for j, feat_y in enumerate(features):
        ax = axes[i, j]
        if i == j:
            # 对角线：绘制直方图
            ax.hist(data[feat_x], bins=30, alpha=0.7, color='blue')
            ax.set_title(feat_x, fontsize=10)
        else:
            # 非对角线：绘制散点图
            ax.scatter(data[feat_x], data[feat_y], alpha=0.3, s=10)
            
            # 计算相关系数
            corr = data[feat_x].corr(data[feat_y])
            ax.text(0.05, 0.95, f'r={corr:.2f}', 
                    transform=ax.transAxes, fontsize=9,
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
        
        if i == 3:
            ax.set_xlabel(feat_y, fontsize=8)
        if j == 0:
            ax.set_ylabel(feat_x, fontsize=8)

plt.suptitle('特征相关性矩阵散点图', fontsize=16)
plt.tight_layout()
plt.savefig('correlation_scatter_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

2.4 直方图：理解数据分布

python 复制代码

# === 直方图基础 ===
np.random.seed(42)

# 生成不同分布的数据
normal_data = np.random.randn(10000)
uniform_data = np.random.rand(10000)
exponential_data = np.random.exponential(2, 10000)
bimodal_data = np.concatenate([np.random.randn(5000) - 2, np.random.randn(5000) + 2])

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. 正态分布
axes[0, 0].hist(normal_data, bins=50, color='blue', alpha=0.7, edgecolor='black')
axes[0, 0].set_title('正态分布', fontsize=14)
axes[0, 0].set_xlabel('值')
axes[0, 0].set_ylabel('频数')
axes[0, 0].axvline(normal_data.mean(), color='red', linestyle='--', label=f'均值: {normal_data.mean():.2f}')
axes[0, 0].axvline(normal_data.median(), color='green', linestyle='--', label=f'中位数: {normal_data.median():.2f}')
axes[0, 0].legend()

# 2. 均匀分布
axes[0, 1].hist(uniform_data, bins=30, color='green', alpha=0.7, edgecolor='black')
axes[0, 1].set_title('均匀分布', fontsize=14)
axes[0, 1].set_xlabel('值')
axes[0, 1].set_ylabel('频数')

# 3. 指数分布
axes[1, 0].hist(exponential_data, bins=50, color='orange', alpha=0.7, edgecolor='black')
axes[1, 0].set_title('指数分布（长尾）', fontsize=14)
axes[1, 0].set_xlabel('值')
axes[1, 0].set_ylabel('频数')
axes[1, 0].axvline(exponential_data.mean(), color='red', linestyle='--', 
                   label=f'均值: {exponential_data.mean():.2f}')
axes[1, 0].legend()

# 4. 双峰分布
axes[1, 1].hist(bimodal_data, bins=50, color='purple', alpha=0.7, edgecolor='black')
axes[1, 1].set_title('双峰分布（两个聚类）', fontsize=14)
axes[1, 1].set_xlabel('值')
axes[1, 1].set_ylabel('频数')

plt.tight_layout()
plt.savefig('histograms.png', dpi=150, bbox_inches='tight')
plt.show()

# === 核密度估计(KDE) ===
from scipy import stats

fig, ax = plt.subplots(figsize=(12, 6))

# 绘制直方图
ax.hist(normal_data, bins=50, density=True, alpha=0.5, color='blue', label='直方图')

# 绘制KDE曲线
kde = stats.gaussian_kde(normal_data)
x_range = np.linspace(normal_data.min(), normal_data.max(), 1000)
ax.plot(x_range, kde(x_range), 'r-', linewidth=2, label='KDE曲线')

# 绘制理论正态分布曲线
mu, std = stats.norm.fit(normal_data)
x_theory = np.linspace(normal_data.min(), normal_data.max(), 100)
ax.plot(x_theory, stats.norm.pdf(x_theory, mu, std), 'g--', 
        linewidth=2, label=f'理论正态分布 N({mu:.2f}, {std:.2f})')

ax.set_title('直方图 + 核密度估计', fontsize=14)
ax.set_xlabel('值')
ax.set_ylabel('密度')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('histogram_with_kde.png', dpi=150, bbox_inches='tight')
plt.show()

# === AI实战：图像像素分布分析 ===
from scipy import misc
from sklearn.datasets import load_digits

# 加载手写数字数据集
digits = load_digits()
sample_image = digits.images[0]

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# 显示原始图像
axes[0].imshow(sample_image, cmap='gray')
axes[0].set_title('原始图像', fontsize=14)
axes[0].axis('off')

# 像素值直方图
pixels = sample_image.flatten()
axes[1].hist(pixels, bins=16, color='gray', alpha=0.7, edgecolor='black')
axes[1].set_title('像素值分布', fontsize=14)
axes[1].set_xlabel('像素强度')
axes[1].set_ylabel('频数')

# 累积分布
sorted_pixels = np.sort(pixels)
cumulative = np.cumsum(sorted_pixels) / np.sum(sorted_pixels)
axes[2].plot(sorted_pixels, cumulative, 'b-', linewidth=2)
axes[2].set_title('累积分布函数', fontsize=14)
axes[2].set_xlabel('像素强度')
axes[2].set_ylabel('累积比例')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('image_pixel_distribution.png', dpi=150, bbox_inches='tight')
plt.show()

2.5 箱线图：识别异常值

python 复制代码

# === 箱线图基础 ===
np.random.seed(42)

# 生成多组数据
data_groups = {
    '正常组': np.random.normal(100, 10, 200),
    '偏高组': np.random.normal(120, 15, 200),
    '偏低组': np.random.normal(80, 12, 200),
    '有异常组': np.concatenate([np.random.normal(100, 10, 190), [50, 150, 160, 170, 180, 190, 200, 210, 220, 230]])
}

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# 1. 基础箱线图
data_list = [data_groups[key] for key in data_groups.keys()]
bp = axes[0].boxplot(data_list, labels=data_groups.keys(), patch_artist=True)

# 自定义箱线图颜色
colors = ['lightblue', 'lightgreen', 'lightcoral', 'lightyellow']
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)

axes[0].set_title('箱线图：比较各组分布', fontsize=14)
axes[0].set_ylabel('数值')
axes[0].grid(True, alpha=0.3, axis='y')

# 添加解释文本
axes[0].text(0.02, 0.98, '箱线图解读:\n- 中位数（中间线）\n- 四分位距（箱子）\n- 须（1.5*IQR）\n- 异常值（圆圈）',
             transform=axes[0].transAxes, fontsize=10,
             verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# 2. 水平箱线图
bp2 = axes[1].boxplot(data_list, labels=data_groups.keys(), vert=False, patch_artist=True)
for patch, color in zip(bp2['boxes'], colors):
    patch.set_facecolor(color)

axes[1].set_title('水平箱线图', fontsize=14)
axes[1].set_xlabel('数值')
axes[1].grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig('boxplots.png', dpi=150, bbox_inches='tight')
plt.show()

# === AI实战：模型性能比较 ===
np.random.seed(42)

# 模拟5个模型的交叉验证结果
models = ['线性回归', '决策树', '随机森林', 'XGBoost', '神经网络']
cv_scores = {
    '线性回归': np.random.normal(0.75, 0.05, 10),
    '决策树': np.random.normal(0.72, 0.08, 10),
    '随机森林': np.random.normal(0.82, 0.04, 10),
    'XGBoost': np.random.normal(0.85, 0.03, 10),
    '神经网络': np.random.normal(0.83, 0.06, 10)
}

# 添加一些异常值
cv_scores['决策树'][-1] = 0.55
cv_scores['神经网络'][-2] = 0.92

fig, ax = plt.subplots(figsize=(12, 6))

# 绘制箱线图
bp = ax.boxplot([cv_scores[model] for model in models], 
                labels=models, patch_artist=True)

# 自定义样式
colors = plt.cm.Set3(np.linspace(0, 1, len(models)))
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)

# 添加均值点
for i, model in enumerate(models):
    mean_score = np.mean(cv_scores[model])
    ax.plot(i+1, mean_score, 'ro', markersize=8, label='均值' if i == 0 else '')

ax.set_title('模型性能比较（10折交叉验证）', fontsize=16, fontweight='bold')
ax.set_ylabel('准确率', fontsize=12)
ax.set_ylim(0.5, 1.0)
ax.grid(True, alpha=0.3, axis='y')
ax.legend()

# 添加最佳模型标注
best_model = models[np.argmax([np.mean(cv_scores[m]) for m in models])]
best_mean = np.max([np.mean(cv_scores[m]) for m in models])
ax.text(0.98, 0.98, f'最佳模型: {best_model}\n平均准确率: {best_mean:.3f}',
        transform=ax.transAxes, fontsize=11,
        verticalalignment='top', horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.8))

plt.tight_layout()
plt.savefig('model_comparison_boxplot.png', dpi=150, bbox_inches='tight')
plt.show()

三、Seaborn：统计可视化利器

3.1 Heatmap：相关性矩阵可视化

python 复制代码

import seaborn as sns

# === 基础热力图 ===
# 创建相关性矩阵
np.random.seed(42)
n_features = 10
n_samples = 500

# 构建具有相关性的特征
data = np.random.randn(n_samples, n_features)
# 添加相关性
for i in range(n_features):
    for j in range(i+1, n_features):
        if np.random.rand() > 0.7:
            correlation = np.random.uniform(0.5, 0.9)
            data[:, j] += correlation * data[:, i]

# 计算相关系数矩阵
corr_matrix = np.corrcoef(data.T)

# 创建DataFrame
feature_names = [f'特征_{i+1}' for i in range(n_features)]
df_corr = pd.DataFrame(corr_matrix, index=feature_names, columns=feature_names)

fig, axes = plt.subplots(2, 2, figsize=(16, 14))

# 1. 基础热力图
sns.heatmap(df_corr, annot=True, fmt='.2f', cmap='coolwarm', 
            center=0, square=True, ax=axes[0, 0],
            cbar_kws={'shrink': 0.8})
axes[0, 0].set_title('特征相关性热力图', fontsize=14, fontweight='bold')

# 2. 带mask的上三角矩阵（只显示下三角）
mask = np.triu(np.ones_like(df_corr, dtype=bool))
sns.heatmap(df_corr, mask=mask, annot=True, fmt='.2f', 
            cmap='RdBu_r', center=0, square=True, ax=axes[0, 1],
            cbar_kws={'shrink': 0.8})
axes[0, 1].set_title('下三角相关性矩阵', fontsize=14, fontweight='bold')

# 3. 聚类热力图（自动重排）
sns.clustermap(df_corr, annot=True, fmt='.2f', cmap='viridis',
               figsize=(10, 10), dendrogram_ratio=0.2)
plt.suptitle('聚类热力图（自动分组）', fontsize=14, fontweight='bold', y=1.02)

# 4. 自定义热力图（显示特定区域）
# 创建分组标签
groups = ['组A'] * 3 + ['组B'] * 4 + ['组C'] * 3
df_corr_grouped = df_corr.copy()
# 添加分组边框
sns.heatmap(df_corr, annot=False, cmap='coolwarm', center=0, 
            square=True, ax=axes[1, 0],
            cbar_kws={'shrink': 0.8})
axes[1, 0].set_title('无注释热力图', fontsize=14, fontweight='bold')

# 5. 带有颜色条定制
sns.heatmap(df_corr, annot=False, cmap='Spectral', center=0,
            square=True, ax=axes[1, 1],
            cbar_kws={'label': '相关系数', 'shrink': 0.8,
                      'ticks': [-1, -0.5, 0, 0.5, 1]})
axes[1, 1].set_title('自定义颜色映射', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.savefig('heatmaps.png', dpi=150, bbox_inches='tight')
plt.show()

# === AI实战：股票收益率相关性分析 ===
# 模拟多只股票收益率
np.random.seed(42)
dates = pd.date_range('2024-01-01', periods=252, freq='D')
n_stocks = 8
stock_names = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META', 'TSLA', 'NVDA', 'JPM']

# 生成相关收益率
returns = np.random.randn(len(dates), n_stocks)
# 构建行业相关性
# 科技股（前5只）高度相关
tech_corr = 0.7
for i in range(5):
    for j in range(5):
        if i != j:
            returns[:, j] += tech_corr * returns[:, i]

# 创建DataFrame
returns_df = pd.DataFrame(returns, index=dates, columns=stock_names)

# 计算收益率相关性
corr_returns = returns_df.corr()

# 绘制热力图
fig, ax = plt.subplots(figsize=(12, 10))

# 创建三角形mask
mask = np.triu(np.ones_like(corr_returns, dtype=bool))

# 绘制热力图
sns.heatmap(corr_returns, mask=mask, annot=True, fmt='.2f',
            cmap='RdYlBu_r', center=0, square=True,
            linewidths=0.5, cbar_kws={'shrink': 0.8,
                                      'label': '相关系数'},
            ax=ax)

ax.set_title('股票收益率相关性矩阵', fontsize=16, fontweight='bold', pad=20)

plt.tight_layout()
plt.savefig('stock_correlation_heatmap.png', dpi=150, bbox_inches='tight')
plt.show()

# 找出高度相关的股票对
high_corr = []
for i in range(len(stock_names)):
    for j in range(i+1, len(stock_names)):
        corr_value = corr_returns.iloc[i, j]
        if abs(corr_value) > 0.7:
            high_corr.append((stock_names[i], stock_names[j], corr_value))

print("\n高度相关的股票对 (|r| > 0.7):")
for s1, s2, corr in sorted(high_corr, key=lambda x: -abs(x[2])):
    print(f"  {s1} - {s2}: {corr:.3f}")

3.2 Pairplot：多维特征探索

python 复制代码

# === Pairplot基础 ===
# 加载经典数据集
iris = sns.load_dataset('iris')
tips = sns.load_dataset('tips')

# 1. Iris数据集pairplot
fig = plt.figure(figsize=(12, 10))
pairplot1 = sns.pairplot(iris, hue='species', palette='Set2',
                          diag_kind='hist', markers=['o', 's', 'D'],
                          plot_kws={'alpha': 0.6})
pairplot1.fig.suptitle('Iris数据集特征分布与关系', fontsize=16, y=1.02)
plt.savefig('pairplot_iris.png', dpi=150, bbox_inches='tight')
plt.show()

# 2. 自定义pairplot
g = sns.pairplot(tips, hue='sex', palette='Set1',
                 diag_kind='kde',  # 对角线用核密度估计
                 kind='reg',       # 非对角线用带回归线的散点图
                 plot_kws={'scatter_kws': {'alpha': 0.5},
                          'line_kws': {'color': 'red'}},
                 diag_kws={'shade': True})

g.fig.suptitle('Tips数据集：消费行为分析', fontsize=16, y=1.02)
plt.savefig('pairplot_tips.png', dpi=150, bbox_inches='tight')
plt.show()

# === AI实战：客户细分分析 ===
np.random.seed(42)
n_customers = 300

# 生成客户数据
customer_data = pd.DataFrame({
    '年龄': np.random.normal(35, 12, n_customers),
    '年收入': np.random.normal(80000, 30000, n_customers),
    '消费频率': np.random.exponential(10, n_customers),
    '客单价': np.random.normal(500, 200, n_customers),
    '会员年限': np.random.exponential(3, n_customers)
})

# 创建客户细分（3个群体）
segment = []
for i in range(n_customers):
    if customer_data.loc[i, '年收入'] > 100000 and customer_data.loc[i, '消费频率'] > 15:
        segment.append('高价值客户')
    elif customer_data.loc[i, '年收入'] < 50000:
        segment.append('价格敏感型')
    else:
        segment.append('普通客户')

customer_data['客户类型'] = segment

# 限制范围
customer_data['年龄'] = customer_data['年龄'].clip(18, 80)
customer_data['年收入'] = customer_data['年收入'].clip(20000, 200000)

# 绘制pairplot
g = sns.pairplot(customer_data, hue='客户类型', 
                 vars=['年龄', '年收入', '消费频率', '客单价'],
                 palette={'高价值客户': 'gold', 
                         '价格敏感型': 'blue', 
                         '普通客户': 'gray'},
                 diag_kind='hist',
                 plot_kws={'alpha': 0.6, 's': 30},
                 diag_kws={'bins': 20})

g.fig.suptitle('客户细分分析：特征分布与关系', fontsize=16, y=1.02)
plt.savefig('customer_segmentation_pairplot.png', dpi=150, bbox_inches='tight')
plt.show()

# 添加统计摘要
print("\n客户细分统计摘要:")
print(customer_data.groupby('客户类型').agg({
    '年龄': ['mean', 'std'],
    '年收入': ['mean', 'std'],
    '消费频率': ['mean', 'std'],
    '客单价': ['mean', 'std']
}).round(2))

四、子图（Subplot）高级技巧

4.1 创建复杂布局

python 复制代码

# === 多种子图布局方式 ===

# 方式1：plt.subplots() 网格布局
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.flatten()  # 扁平化以便索引

for i, ax in enumerate(axes):
    ax.plot(np.random.randn(100).cumsum())
    ax.set_title(f'子图 {i+1}')
    ax.grid(True, alpha=0.3)

plt.suptitle('2x3网格布局', fontsize=16)
plt.tight_layout()
plt.show()

# 方式2：GridSpec 自定义布局
fig = plt.figure(figsize=(12, 8))
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# 大图占据左上角
ax1 = fig.add_subplot(gs[0:2, 0:2])
ax1.plot(np.random.randn(100).cumsum(), 'b-', linewidth=2)
ax1.set_title('主图区域')
ax1.grid(True)

# 右侧两个小图
ax2 = fig.add_subplot(gs[0, 2])
ax2.hist(np.random.randn(1000), bins=30, color='green', alpha=0.7)
ax2.set_title('分布图')

ax3 = fig.add_subplot(gs[1, 2])
ax3.scatter(np.random.randn(100), np.random.randn(100), alpha=0.5)
ax3.set_title('散点图')

# 底部大图
ax4 = fig.add_subplot(gs[2, :])
ax4.plot(np.random.randn(100).cumsum(), 'r-', linewidth=2)
ax4.set_title('底部趋势图')
ax4.grid(True)

plt.suptitle('GridSpec自定义布局', fontsize=16)
plt.show()

# === 共享轴 ===
fig, axes = plt.subplots(3, 1, figsize=(10, 10), sharex=True, sharey=True)

x = np.linspace(0, 10, 100)
for i, ax in enumerate(axes):
    freq = i + 1
    y = np.sin(freq * x) * np.exp(-x/5)
    ax.plot(x, y, linewidth=2)
    ax.set_ylabel(f'频率 {freq}')
    ax.grid(True, alpha=0.3)
    if i == 2:
        ax.set_xlabel('时间')

axes[0].set_title('共享X轴和Y轴的子图', fontsize=14)
plt.tight_layout()
plt.show()

4.2 实战：训练过程监控面板

python 复制代码

# === AI实战：完整的训练监控Dashboard ===
np.random.seed(42)

# 模拟训练数据
epochs = np.arange(1, 101)
train_loss = 2.5 / np.sqrt(epochs) + np.random.normal(0, 0.03, 100)
val_loss = 2.8 / np.sqrt(epochs) + np.random.normal(0, 0.05, 100)
train_acc = 0.5 + 0.4 * (1 - np.exp(-epochs/30)) + np.random.normal(0, 0.02, 100)
val_acc = 0.48 + 0.42 * (1 - np.exp(-epochs/25)) + np.random.normal(0, 0.03, 100)
learning_rate = 0.01 * (0.95 ** epochs)
gradient_norm = np.exp(-epochs/50) + np.random.normal(0, 0.05, 100)

# 创建复杂仪表板
fig = plt.figure(figsize=(16, 10))
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# 1. 损失曲线（左上，占2x2）
ax_loss = fig.add_subplot(gs[0:2, 0:2])
ax_loss.plot(epochs, train_loss, 'b-', label='训练损失', linewidth=2)
ax_loss.plot(epochs, val_loss, 'r-', label='验证损失', linewidth=2)
ax_loss.set_xlabel('Epoch')
ax_loss.set_ylabel('Loss')
ax_loss.set_title('训练损失曲线', fontsize=14, fontweight='bold')
ax_loss.legend(loc='upper right')
ax_loss.grid(True, alpha=0.3)

# 标记最佳点
best_epoch = np.argmin(val_loss)
best_loss = val_loss[best_epoch]
ax_loss.plot(best_epoch, best_loss, 'ro', markersize=10)
ax_loss.annotate(f'最佳: {best_loss:.4f}', 
                 xy=(best_epoch, best_loss),
                 xytext=(best_epoch+10, best_loss+0.1),
                 arrowprops=dict(arrowstyle='->', color='red'))

# 2. 准确率曲线（右上）
ax_acc = fig.add_subplot(gs[0, 2])
ax_acc.plot(epochs, train_acc, 'g-', label='训练', linewidth=2)
ax_acc.plot(epochs, val_acc, 'orange', label='验证', linewidth=2)
ax_acc.set_xlabel('Epoch')
ax_acc.set_ylabel('Accuracy')
ax_acc.set_title('准确率')
ax_acc.legend(fontsize=8)
ax_acc.grid(True, alpha=0.3)

# 3. 学习率变化（中右）
ax_lr = fig.add_subplot(gs[1, 2])
ax_lr.plot(epochs, learning_rate, 'purple', linewidth=2)
ax_lr.set_xlabel('Epoch')
ax_lr.set_ylabel('Learning Rate')
ax_lr.set_title('学习率衰减')
ax_lr.set_yscale('log')
ax_lr.grid(True, alpha=0.3)

# 4. 梯度范数（下左）
ax_grad = fig.add_subplot(gs[2, 0])
ax_grad.plot(epochs, gradient_norm, 'brown', linewidth=2)
ax_grad.set_xlabel('Epoch')
ax_grad.set_ylabel('Gradient Norm')
ax_grad.set_title('梯度范数')
ax_grad.grid(True, alpha=0.3)

# 5. 损失分布直方图（下中）
ax_hist = fig.add_subplot(gs[2, 1])
final_losses = val_loss[-20:]
ax_hist.hist(final_losses, bins=15, color='skyblue', edgecolor='black', alpha=0.7)
ax_hist.axvline(np.mean(final_losses), color='red', linestyle='--', 
                label=f'均值: {np.mean(final_losses):.4f}')
ax_hist.set_xlabel('Loss')
ax_hist.set_ylabel('频数')
ax_hist.set_title('最终20轮损失分布')
ax_hist.legend()

# 6. 性能摘要（下右）
ax_text = fig.add_subplot(gs[2, 2])
ax_text.axis('off')
summary_text = f"""
训练完成摘要
{'='*30}
最佳验证损失: {best_loss:.4f}
最佳验证准确率: {val_acc.max():.4f}
最终训练准确率: {train_acc[-1]:.4f}
最终验证准确率: {val_acc[-1]:.4f}
训练轮数: {len(epochs)}
学习率衰减: {learning_rate[0]:.5f} → {learning_rate[-1]:.5f}
"""
ax_text.text(0.1, 0.5, summary_text, transform=ax_text.transAxes,
             fontsize=12, verticalalignment='center',
             bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.8))

plt.suptitle('深度学习训练监控仪表板', fontsize=18, fontweight='bold', y=0.98)
plt.savefig('training_dashboard.png', dpi=150, bbox_inches='tight')
plt.show()

五、样式与美化

5.1 内置样式

python 复制代码

# === Matplotlib内置样式 ===
print("可用的样式:", plt.style.available[:10])

# 创建数据
x = np.linspace(0, 10, 100)
y1 = np.sin(x)
y2 = np.cos(x)
y3 = np.sin(x) * np.exp(-x/5)

# 比较不同样式
styles = ['default', 'seaborn-v0_8-darkgrid', 'ggplot', 'fivethirtyeight']
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

for ax, style in zip(axes.flatten(), styles):
    with plt.style.context(style):
        ax.plot(x, y1, label='sin(x)', linewidth=2)
        ax.plot(x, y2, label='cos(x)', linewidth=2)
        ax.plot(x, y3, label='阻尼', linewidth=2)
        ax.set_title(f'样式: {style}', fontsize=14)
        ax.set_xlabel('x')
        ax.set_ylabel('y')
        ax.legend()
        ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('style_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

# === 自定义样式 ===
def set_custom_style():
    """设置自定义样式"""
    plt.rcParams['figure.figsize'] = (12, 6)
    plt.rcParams['figure.dpi'] = 100
    plt.rcParams['font.size'] = 12
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['axes.titlesize'] = 14
    plt.rcParams['axes.labelsize'] = 12
    plt.rcParams['axes.grid'] = True
    plt.rcParams['grid.alpha'] = 0.3
    plt.rcParams['lines.linewidth'] = 2
    plt.rcParams['legend.fontsize'] = 10

set_custom_style()

# 使用自定义样式绘图
fig, ax = plt.subplots()
x = np.linspace(0, 10, 100)
for i in range(5):
    ax.plot(x, np.sin(x + i) * np.exp(-x/5), label=f'曲线 {i+1}')
ax.set_title('自定义样式示例')
ax.set_xlabel('X轴')
ax.set_ylabel('Y轴')
ax.legend()
plt.tight_layout()
plt.show()

5.2 颜色与调色板

python 复制代码

# === Seaborn调色板 ===
# 查看Seaborn调色板
palettes = ['deep', 'muted', 'pastel', 'bright', 'dark', 'colorblind']

fig, axes = plt.subplots(2, 3, figsize=(15, 8))

for ax, palette in zip(axes.flatten(), palettes):
    # 生成数据
    data = np.random.randn(50, 6)
    
    # 使用调色板
    colors = sns.color_palette(palette, 6)
    
    # 绘制箱线图
    bp = ax.boxplot(data, patch_artist=True)
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    ax.set_title(f'调色板: {palette}')
    ax.set_xlabel('类别')
    ax.set_ylabel('数值')

plt.suptitle('Seaborn调色板对比', fontsize=16)
plt.tight_layout()
plt.savefig('palette_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

# === 自定义连续色板 ===
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# 1. 顺序色板
sequential = sns.color_palette("Blues", 10)
sns.barplot(x=list(range(10)), y=[1]*10, palette=sequential, ax=axes[0])
axes[0].set_title('顺序色板（Sequential）')

# 2. 发散色板
diverging = sns.color_palette("RdBu_r", 10)
sns.barplot(x=list(range(10)), y=[1]*10, palette=diverging, ax=axes[1])
axes[1].set_title('发散色板（Diverging）')

# 3. 分类色板
categorical = sns.color_palette("Set2", 10)
sns.barplot(x=list(range(10)), y=[1]*10, palette=categorical, ax=axes[2])
axes[2].set_title('分类色板（Qualitative）')

plt.tight_layout()
plt.show()

六、实战案例：完整的数据分析报告

python 复制代码

# === 综合实战：房价数据分析与可视化 ===
from sklearn.datasets import make_regression

# 生成模拟房价数据
np.random.seed(42)
n_houses = 1000

housing_data = pd.DataFrame({
    '面积': np.random.normal(120, 40, n_houses).clip(40, 300),
    '卧室数': np.random.randint(1, 6, n_houses),
    '房龄': np.random.exponential(15, n_houses).clip(0, 50),
    '距地铁站': np.random.exponential(1, n_houses).clip(0, 5),
    '楼层': np.random.randint(1, 30, n_houses),
    '评分': np.random.normal(4, 0.5, n_houses).clip(1, 5)
})

# 构建房价（带非线性关系）
housing_data['房价'] = (
    5000 * housing_data['面积'] +
    30000 * housing_data['卧室数'] -
    1000 * housing_data['房龄'] -
    20000 * housing_data['距地铁站'] +
    500 * housing_data['楼层'] +
    20000 * housing_data['评分'] +
    np.random.normal(0, 50000, n_houses)
)
housing_data['房价'] = housing_data['房价'].clip(100000, 1000000)

# 添加分类变量
housing_data['户型'] = pd.cut(housing_data['卧室数'], 
                               bins=[0, 2, 3, 5], 
                               labels=['小户型', '中户型', '大户型'])
housing_data['房龄段'] = pd.cut(housing_data['房龄'], 
                                 bins=[0, 5, 15, 50], 
                                 labels=['新房', '次新房', '老房'])

print("房价数据预览:")
print(housing_data.head())
print(f"\n数据形状: {housing_data.shape}")

# 创建完整的分析报告
fig = plt.figure(figsize=(20, 16))
gs = fig.add_gridspec(4, 4, hspace=0.3, wspace=0.3)

# 1. 房价分布（左上）
ax1 = fig.add_subplot(gs[0, 0:2])
ax1.hist(housing_data['房价'], bins=50, color='steelblue', edgecolor='black', alpha=0.7)
ax1.axvline(housing_data['房价'].mean(), color='red', linestyle='--', 
            label=f'均值: {housing_data["房价"].mean():.0f}')
ax1.axvline(housing_data['房价'].median(), color='green', linestyle='--', 
            label=f'中位数: {housing_data["房价"].median():.0f}')
ax1.set_xlabel('房价（万元）')
ax1.set_ylabel('频数')
ax1.set_title('房价分布', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. 面积 vs 房价（中上）
ax2 = fig.add_subplot(gs[0, 2:4])
scatter = ax2.scatter(housing_data['面积'], housing_data['房价'], 
                      c=housing_data['评分'], cmap='viridis', 
                      alpha=0.6, s=30)
ax2.set_xlabel('面积（㎡）')
ax2.set_ylabel('房价（万元）')
ax2.set_title('面积与房价关系（颜色=评分）', fontsize=14, fontweight='bold')
plt.colorbar(scatter, ax=ax2, label='小区评分')
ax2.grid(True, alpha=0.3)

# 3. 房价与房龄关系（左中）
ax3 = fig.add_subplot(gs[1, 0:2])
sns.boxplot(data=housing_data, x='房龄段', y='房价', palette='Set2', ax=ax3)
ax3.set_xlabel('房龄')
ax3.set_ylabel('房价（万元）')
ax3.set_title('房龄对房价的影响', fontsize=14, fontweight='bold')

# 4. 房价与地铁站距离（右中）
ax4 = fig.add_subplot(gs[1, 2:4])
sns.regplot(data=housing_data, x='距地铁站', y='房价', 
            scatter_kws={'alpha': 0.3, 's': 20},
            line_kws={'color': 'red', 'linewidth': 2},
            ax=ax4)
ax4.set_xlabel('距地铁站距离（km）')
ax4.set_ylabel('房价（万元）')
ax4.set_title('地铁距离对房价的影响', fontsize=14, fontweight='bold')
ax4.grid(True, alpha=0.3)

# 5. 特征相关性热力图（左下）
ax5 = fig.add_subplot(gs[2:4, 0:2])
numeric_cols = ['面积', '卧室数', '房龄', '距地铁站', '楼层', '评分', '房价']
corr_matrix = housing_data[numeric_cols].corr()
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm',
            center=0, square=True, ax=ax5,
            cbar_kws={'shrink': 0.8})
ax5.set_title('特征相关性矩阵', fontsize=14, fontweight='bold')

# 6. 户型分析（右下）
ax6 = fig.add_subplot(gs[2, 2:4])
户型均价 = housing_data.groupby('户型')['房价'].mean().sort_values()
户型均价.plot(kind='bar', ax=ax6, color=['gold', 'silver', 'coral'])
ax6.set_xlabel('户型')
ax6.set_ylabel('平均房价（万元）')
ax6.set_title('不同户型的平均房价', fontsize=14, fontweight='bold')
ax6.tick_params(axis='x', rotation=0)

# 7. 关键统计摘要（右下）
ax7 = fig.add_subplot(gs[3, 2:4])
ax7.axis('off')

# 计算关键统计
stats_text = f"""
{'='*40}
房价数据分析摘要
{'='*40}

基本统计:
  样本数量: {len(housing_data)}
  平均房价: {housing_data['房价'].mean():,.0f} 万元
  房价中位数: {housing_data['房价'].median():,.0f} 万元
  房价标准差: {housing_data['房价'].std():,.0f} 万元

相关性分析:
  面积与房价: {corr_matrix.loc['面积', '房价']:.3f}
  评分与房价: {corr_matrix.loc['评分', '房价']:.3f}
  地铁距离与房价: {corr_matrix.loc['距地铁站', '房价']:.3f}

户型均价:
  大户型: {户型均价.get('大户型', 0):,.0f} 万元
  中户型: {户型均价.get('中户型', 0):,.0f} 万元
  小户型: {户型均价.get('小户型', 0):,.0f} 万元

{'='*40}
"""
ax7.text(0.1, 0.5, stats_text, transform=ax7.transAxes,
         fontsize=10, verticalalignment='center',
         family='monospace',
         bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))

plt.suptitle('房价数据分析完整报告', fontsize=20, fontweight='bold', y=0.98)
plt.savefig('housing_analysis_report.png', dpi=150, bbox_inches='tight')
plt.show()

print("\n分析报告已生成！✅")

七、最佳实践与技巧

7.1 快速参考卡片

python 复制代码

# === 常用图表选择指南 ===
chart_guide = """
图表选择决策树:

想要展示什么？
│
├─ 趋势变化 → 折线图 (plot)
│  例：损失曲线、时间序列
│
├─ 数据分布 → 直方图 (hist) / 箱线图 (boxplot)
│  例：年龄分布、分数分布
│
├─ 变量关系 → 散点图 (scatter)
│  例：特征相关性
│
├─ 比例构成 → 饼图 (pie) / 条形图 (bar)
│  例：市场份额、类别占比
│
├─ 多维对比 → 热力图 (heatmap) / pairplot
│  例：多个特征相关性
│
└─ 比较多个组 → 箱线图 / 小提琴图 (violinplot)
   例：不同模型的性能对比
"""

print(chart_guide)

7.2 性能优化技巧

python 复制代码

# 1. 避免重复绘图
# ❌ 慢
# for i in range(100):
#     plt.plot(x, y[:, i])

# ✅ 快
plt.plot(x, y)

# 2. 使用NumPy向量化
# ❌ 慢
# for i in range(len(x)):
#     plt.scatter(x[i], y[i])

# ✅ 快
plt.scatter(x, y)

# 3. 减少数据点
# 对于大数据集，可以采样
n_points = 10000
x_large = np.random.randn(n_points)
y_large = np.random.randn(n_points)

# 随机采样10%的数据
sample_idx = np.random.choice(n_points, size=n_points//10, replace=False)
plt.scatter(x_large[sample_idx], y_large[sample_idx], alpha=0.5)

7.3 保存高质量图片

python 复制代码

# 保存图片的最佳实践
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(x, y)

# 高DPI保存（适合论文）
plt.savefig('high_quality.png', dpi=300, bbox_inches='tight')

# 矢量图格式（可缩放）
plt.savefig('vector_figure.pdf', bbox_inches='tight')

# 透明背景
plt.savefig('transparent.png', dpi=150, transparent=True, bbox_inches='tight')

八、学习检查清单

基础掌握（必须）

创建Figure和Axes（plt.subplots()）
绘制折线图（plot）
绘制散点图（scatter）
绘制直方图（hist）
绘制箱线图（boxplot）
添加标题、标签、图例
保存图片（savefig）

进阶掌握（重要）

创建子图（subplots, GridSpec）
使用Seaborn绘制热力图（heatmap）
使用Seaborn绘制pairplot（pairplot）
自定义颜色和样式
处理大数据集的可视化

扩展了解（按需）

绘制小提琴图（violinplot）
绘制3D图（mplot3d）
动画制作（animation）
交互式图表（plotly）

九、总结

Matplotlib & Seaborn的核心价值：

Matplotlib：底层绘图库，灵活强大
- 精细控制每个元素
- 适合定制化需求
Seaborn：统计可视化，美观便捷
- 一行代码完成复杂统计图
- 与Pandas无缝集成

学习建议：

先用Seaborn快速探索数据
需要精细控制时用Matplotlib定制
记住最常用的5-10个函数
多练习真实数据集

记住：一图胜千言，好的可视化是数据科学家的超能力！