数据可视化 - 技术栈

一.Jupyterlab

1.快捷键

ctrl + enter : 运行程序
esc + M : 将当前 cell 从代码 markdown 转成文本
esc + Y : 将当前 cell 从文本 markdown 转成代码
esc + Y : 在当前 cell 上方插入新 cell
esc + B : 在当前 cell 下方插入新 cell

二.绘图

1.可视化

（1）使用 Matplotlib 绘制线图

正弦，余弦函数：

python 复制代码

import numpy as np
import matplotlib.pyplot as plt

# 生成 x 坐标（等差数列），得出 y 坐标
x_array = np.linspace(0, 2*np.pi, 100)
sin_y = np.sin(x_array)
cos_y = np.cos(x_array)

# 设置图片大小为 （8,6）
# 返回的 fig 和 ax 对象分别代表了整个图形和其中的一个子图
fig, ax = plt.subplots(figsize=(8, 6))

# 绘制正弦余弦图像（x,y 坐标，标签，颜色，线宽）
ax.plot(x_array,sin_y,
        label='sin',color='b',linewidth=2)
ax.plot(x_array,cos_y,
        label='cos',color='r',linewidth=2)

# 设置标题、横轴和纵轴标签
ax.set_title('Sine and cosine functions')
ax.set_xlabel('x')
ax.set_ylabel('f(x)')

# 添加图例，用于标识不同曲线或数据系列
ax.legend()

# 设置横轴,纵轴范围
ax.set_xlim(0,2*np.pi)
ax.set_ylim(-1.5,1.5)

# 生成 x 轴每个刻度位置，(0 - 5/2π)间隔为 π/2,不包括右端点
x_ticks = np.arange(0, 2*np.pi + np.pi/2, np.pi/2)
ax.set_xticks(x_ticks)

# 生成 x 轴每个刻度标识，分别为 0, π/2, π, 3π/2, 2π
x_ticklabels = [r'$0$', r'$\frac{\pi}{2}$', 
                r'$\pi$', r'$\frac{3\pi}{2}$', 
                r'$2\pi$']
ax.set_xticklabels(x_ticklables)

# 横纵轴比例相同，保持图形在绘制时不会因为坐标轴的比例问题而产生形变
ax.set_aspect('equal')

# 显示网格线
plt.grid()
# 将图片存成SVG格式，输出的图片可以无限放大而不失真
plt.savefig('正弦_余弦函数曲线.svg', format='svg')
# 显示图形
plt.show()

构建单个子图：add_subplot

python 复制代码

import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 2*np.pi, 100)
y = np.sin(x)

# 生成了一个 Figure 对象,可以将子图轴对象 ax 添加到其中
# 这里生成的是 1x1 的对象，只有一个子图
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

ax.plot(x, y)
plt.show()

构建多个子图：

python 复制代码

import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 2*np.pi, 100)

# 创建一个新的图形窗口，即画板
fig = plt.figure()

# 这里构建了一个 2x2 的网格，分别插入四个子图
# 创建第一个子图
ax1 = fig.add_subplot(2, 2, 1)
ax1.plot(x, y)

# 创建第二个子图
ax2 = fig.add_subplot(2, 2, 2)
ax2.plot(x, np.cos(x))

# 创建第三个子图
ax3 = fig.add_subplot(2, 2, 3)
ax3.plot(x, np.tan(x))

# 创建第四个子图
ax4 = fig.add_subplot(2, 2, 4)
ax4.plot(x, np.exp(x))

plt.show()

构建一行两列子图（ax1, ax2, ------）：

python 复制代码

import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 2 * np.pi, 100)
y_sin = np.sin(x)
y_cos = np.cos(x)

# 创建图形对象和子图布局, 1,2 表示一行两列
# sharey 即 share_y,确保两个子图的 y 轴范围相同
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,4), sharey=True)

# 在左子图中绘制正弦函数曲线，设置为蓝色
ax1.plot(x, y_sin, color='blue')
ax1.set_title('Sine function')
ax1.set_xlabel('x') 
ax1.set_ylabel('f(x)', rotation='horizontal', ha='right') 
ax1.set_xlim(0, 2*np.pi) 
ax1.set_ylim(-1.5, 1.5) 
x_ticks = np.arange(0, 2*np.pi + np.pi/2, np.pi) 
x_ticklabels = [r'$0$', r'$\pi$', r'$2\pi$'] 
ax1.set_xticks(x_ticks) 
ax1.set_xticklabels(x_ticklabels) 
ax1.grid(True)
ax1.set_aspect('equal') 

# 在右子图中绘制余弦函数曲线，设置为红色
ax2.plot(x, y_cos, color='red')
ax2.set_title('Cosine function')
ax2.set_xlabel('x') 
ax2.set_ylabel('f(x)', rotation='horizontal', ha='right') 
ax2.set_xlim(0, 2*np.pi) 
ax2.set_ylim(-1.5, 1.5) 
ax2.set_xticks(x_ticks) 
ax2.set_xticklabels(x_ticklabels) 
ax2.grid(True)
ax2.set_aspect('equal') 

# 调整子图之间的间距
plt.tight_layout()
# 显示图形
plt.show()

（2）使用 Plotly 绘制线图

python 复制代码

# 导入包
import plotly.express as px
import numpy as np
import pandas as pd

# 生成横轴数据
x = np.linspace(0, 2 * np.pi, 100)

# 生成正弦和余弦曲线的数据
y_sin = np.sin(x)
y_cos = np.cos(x)

# 生成Pandas数据帧
df = pd.DataFrame({'x': x, 'Sine': y_sin, 'Cosine': y_cos})

# 创建图表
fig = px.line(df, x='x', y=['Sine', 'Cosine'], 
              labels={'value': 'f(x)', 'X': 'x'})

# 显示图表
fig.show()

2.二维可视化

（1）平面散点

用 matplotlib.pyplot：

python 复制代码

# load_iris 函数的作用是加载鸢尾花数据集
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import numpy as np

# 加载鸢尾花数据
iris = load_iris()

# 提取花萼长度和花萼宽度作为变量
sepal_length = iris.data[:, 0] # 第一列所有行的数据
sepal_width = iris.data[:, 1] # 第二列所有行的数据

# 提取鸢尾花数据集标签数组
# 用 np.unique(iris.target)获取数组独特值，结果为 array([0, 1, 2])
# 因为有三种颜色的鸢尾花，标签分别为 0,1,2
target = iris.target

# 创建图形对象 fig、轴对象 ax（画板）
fig, ax = plt.subplots()

# 创建散点图
# c = color, cmap='rainbow' 指定了颜色映射
# target 中的三个值 (0、1、2) 将通过'rainbow'映射到三个颜色，表达鸢尾花三个类别
plt.scatter(sepal_length, sepal_width, c=target, cmap='rainbow')

# 添加标题和轴标签
ax.set_title('Iris sepal length vs width')
ax.set_xlabel('Sepal length (cm)')
ax.set_ylabel('Sepal width (cm)')

# 设置横纵轴刻度
ax.set_xticks(np.arange(4, 8 + 1, step=1))
ax.set_yticks(np.arange(1, 5 + 1, step=1))

# 设定横纵轴尺度1:1
ax.axis('scaled')

# 增加刻度网格，设置线型，线宽，颜色为浅灰
ax.grid(linestyle='--', linewidth=0.25, color=[0.7,0.7,0.7])

# 设置横纵轴范围
ax.set_xbound(lower = 4, upper = 8)
ax.set_ybound(lower = 1, upper = 5)

plt.show()

用 plotly.express：

python 复制代码

import numpy as np
import plotly.express as px

# 从 Ploly 中导入鸢尾花样本数据
iris_df = px.data.iris() 

# 绘制散点图，不渲染 marker
fig = px.scatter(iris_df, x="sepal_length", y="sepal_width", 
                 width = 600, height = 600,
                 labels={"sepal_length": "Sepal length (cm)",
                         "sepal_width": "Sepal width (cm)"})

# 修饰图像
# update_layout 方法来调整横纵轴的取值范围,横纵轴刻度
fig.update_layout(xaxis_range=[4, 8], yaxis_range=[1, 5])
xticks = np.arange(4,8+1)
yticks = np.arange(1,5+1)
fig.update_layout(xaxis = dict(tickmode = 'array',
                               tickvals = xticks))
fig.update_layout(yaxis = dict(tickmode = 'array',
                               tickvals = yticks))
fig.show()

# 绘制散点图，渲染 marker 展示鸢尾花分类
# 指定 color="species"渲染散点颜色，可视化鸢尾花分类
fig = px.scatter(iris_df, x="sepal_length", y="sepal_width", 
                 color="species",
                 width = 600, height = 600,
                 labels={"sepal_length": "Sepal length (cm)",
                         "sepal_width": "Sepal width (cm)"})

# 修饰图像
fig.update_layout(xaxis_range=[4, 8], yaxis_range=[1, 5])
fig.update_layout(xaxis = dict(tickmode = 'array',
                               tickvals = xticks))
fig.update_layout(yaxis = dict(tickmode = 'array',
                               tickvals = yticks))

# yanchor="top"设置图例的垂直锚点为顶部,y=0.99 指定图例上边缘相对于图形区域顶部的位置
# xanchor="left"设置图例的水平锚点为左侧,x=0.01 指定图例左边缘相对于图形区域左侧的位置
fig.update_layout(legend=dict(yanchor="top", y=0.99,
                              xanchor="left",x=0.01))

fig.show()

（2）平面等高线

用 matplotlib.pyplot：

python 复制代码

import matplotlib.pyplot as plt 
import numpy as np 

# 创建二维数据 
x = np.linspace(-2,2,100)
y = np.linspace(-2,2,100)

# 构造网格坐标点
X,Y = np.meshgrid(x,y)
Z = X**2 + Y**2

# 绘制等高线图
plt.contour(X,Y,Z,levels = np.linspace(0,8,16+1),cmap = 'RdYlBu_r')

# 在图形对象上添加颜色条,颜色条将基于这个对象的颜色映射进行创建
plt.colorbar()

plt.show()

用 contourf() 方法可以在 ax 上绘制平面填充等高线（plt.contourf）：

可视化二元函数：

python 复制代码

import numpy as np
import matplotlib.pyplot as plt 

# 生成数据
x1_array = np.linspace(-3,3,121)
x2_array = np.linspace(-3,3,121)

xx1, xx2 = np.meshgrid(x1_array, x2_array)
ff = xx1 * np.exp(- xx1**2 - xx2**2)

# 等高线,levels=20 是指定等高线的数量
fig, ax = plt.subplots()
CS = ax.contour(xx1, xx2, ff, levels = 20,
                cmap = 'RdYlBu_r', linewidths = 1)

fig.colorbar(CS)
ax.set_xlabel(r'$\it{x_1}$'); ax.set_ylabel(r'$\it{x_2}$')
ax.set_xticks([]); ax.set_yticks([])
ax.set_xlim(xx1.min(), xx1.max())
ax.set_ylim(xx2.min(), xx2.max())
ax.grid(False)
ax.set_aspect('equal', adjustable='box')

# 填充等高线
fig, ax = plt.subplots()
CS = ax.contourf(xx1, xx2, ff, levels = 20,
                 cmap = 'RdYlBu_r')

fig.colorbar(CS)
ax.set_xlabel(r'$\it{x_1}$'); ax.set_ylabel(r'$\it{x_2}$')
ax.set_xticks([]); ax.set_yticks([])
ax.set_xlim(xx1.min(), xx1.max())
ax.set_ylim(xx2.min(), xx2.max())
ax.grid(False)
ax.set_aspect('equal', adjustable='box')

用 plotly.graph_objects.Contour()：

python 复制代码

import numpy as np
import matplotlib.pyplot as plt 
import plotly.graph_objects as go

# 生成数据
x1_array = np.linspace(-3,3,121)
x2_array = np.linspace(-3,3,121)

xx1, xx2 = np.meshgrid(x1_array, x2_array)
ff = xx1 * np.exp(- xx1**2 - xx2 **2)

# 等高线设置,包括开始值,结束值和间隔大小  
levels = dict(start=-0.5,end=0.5,size=0.05)
data = go.Contour(x=x1_array,y=x2_array,z=ff, 
                  contours_coloring='lines',
                  line_width=2,
                  colorscale = 'RdYlBu_r',
                  contours=levels)

# 创建布局
layout = go.Layout(
         width=600, # 设置图形宽度
         height=600, # 设置图形高度
         xaxis=dict(title=r'$x_1$'),
         yaxis=dict(title=r'$x_2$'))

# 创建图形对象,data即等高线对象,layout是包含图形布局信息的对象
fig = go.Figure(data=data, layout=layout)

fig.show()

（3）热图

用 Seaborn：

python 复制代码

import seaborn as sns 
import numpy as np 
 
# 创建二维数据 
data = np.random.rand(10,10) 
 
# 绘制热图 
sns.heatmap(data, vmin=0, vmax=1, 
            cmap='viridis', 
            annot=True, 
            xticklabels=True, 
            yticklabels=True)

鸢尾花数据集：

python 复制代码

import matplotlib.pyplot as plt 
import seaborn as sns

# 从seaborn中导入鸢尾花样本数据
iris_sns = sns.load_dataset("iris") 

# 绘制热图
fig, ax = plt.subplots()

# ax = ax 将图形绘制在预先定义的轴对象上
sns.heatmap(data=iris_sns.iloc[:,0:-1],
            vmin = 0, vmax = 8,
            ax = ax,
            yticklabels = False,
            xticklabels = ['Sepal length', 'Sepal width', 
                           'Petal length', 'Petal width'],
            cmap = 'RdYlBu_r')

用 plotly：

python 复制代码

import plotly.express as px

# 从Plotly中导入鸢尾花样本数据
df = px.data.iris() 

# 创建Plotly热图
fig = px.imshow(df.iloc[:,0:-2], text_auto=False,
                 width = 600, height = 600,
                 x = None, zmin=0, zmax=8,
                 color_continuous_scale = 'viridis')

# 隐藏 y 轴刻度标签
fig.update_layout(yaxis=dict(tickmode='array',tickvals=[]))

# 修改 x 轴刻度标签
x_labels = ['Sepal length', 'Sepal width', 
             'Petal length', 'Petal width']
x_ticks = list(range(len(x_labels))) # [0, 1, 2, 3]
fig.update_xaxes(tickmode='array',tickvals=x_ticks,
                 ticktext=x_labels)

fig.show()

3.三维可视化

（1）散点

三维视图视角：

python 复制代码

import matplotlib.pyplot as plt

# 创建一个新的图形窗口
fig = plt.figure()

# 在图形窗口中添加一个3D坐标轴子图
ax = fig.add_subplot(projection='3d')
 
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')

# 设置投影类型为正交投影 (orthographic projection)
ax.set_proj_type('ortho')

# 设置观察者的仰角为30度，方位角为30度，即改变三维图形的视角
ax.view_init(elev=30, azim=30)

# 设置三个坐标轴的比例一致，使得图形在三个方向上等比例显示
ax.set_box_aspect([1,1,1])

plt.show()

三维散点：

python 复制代码

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

# 加载鸢尾花数据集
iris = datasets.load_iris()

# 取出前三个特征作为横纵坐标和高度,标签为颜色
X = iris.data[:, :3]
y = iris.target

# 创建3D图像对象
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection='3d')

# 绘制散点图,输入 xyz 坐标以及 color
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y)

ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')
ax.set_zlabel('Petal length')

# 设置坐标轴取值范围
ax.set_xlim(4,8); ax.set_ylim(1,5); ax.set_zlim(0,8)

# 设置正交投影
ax.set_proj_type('ortho')

plt.show()

python 复制代码

import plotly.express as px

# 导入鸢尾花数据
df = px.data.iris()

# 创建 3D 散点图
fig = px.scatter_3d(df, 
                     x='sepal_length', 
                     y='sepal_width', 
                     z='petal_length',
                     size = 'petal_width',
                     color='species')
# 设置图形尺寸
fig.update_layout(autosize=False,width=500,height=500)

# 设置正投影视角
fig.layout.scene.camera.projection.type = "orthographic"

fig.show()

（2）线图

随机漫步：

python 复制代码

import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go

# 生成随机游走数据
num_steps = 300
t = np.arange(num_steps)
x = np.cumsum(np.random.standard_normal(num_steps))
y = np.cumsum(np.random.standard_normal(num_steps))
z = np.cumsum(np.random.standard_normal(num_steps))

# 构建画板，添加子图
fig = plt.figure()
ax = fig.add_subplot(1,1,1, projection='3d')

# 创建散点图，取消刻度
ax.plot(x,y,z,color = 'darkblue')
ax.scatter(x,y,z,c = t, cmap = 'viridis')
ax.set_xticks([]); ax.set_yticks([]); ax.set_zticks([])

# 设置正交投影
ax.set_proj_type('ortho')

# 设置相机视角
ax.view_init(elev = 30, azim = 120)
 
plt.show()

# 用 Plotly 可视化
fig = go.Figure(data=go.Scatter3d(
                 x=x, y=y, z=z,
                 marker=dict(size=4,color=t,colorscale='Viridis'),
                 line=dict(color='darkblue', width=2)))
# 设置正交投影
fig.layout.scene.camera.projection.type = "orthographic"
# 调整图像大小
fig.update_layout(width=800,height=700) 

fig.show()

（3）网格曲面

用fig.add_subplot：

python 复制代码

import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go

# 生成曲面数据
x1_array = np.linspace(-3,3,121)
x2_array = np.linspace(-3,3,121)
xx1, xx2 = np.meshgrid(x1_array, x2_array)
ff = xx1 * np.exp(- xx1**2 - xx2 **2)

# 用 Matplotlib 可视化三维曲面
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(xx1, xx2, ff, cmap='RdYlBu_r')

# 设置坐标轴标签
ax.set_xlabel('x1'); ax.set_ylabel('x2'); 
ax.set_zlabel('f(x1,x2)')

# 设置坐标轴取值范围
ax.set_xlim(-3,3); ax.set_ylim(-3,3); ax.set_zlim(-0.5,0.5)

# 设置正交投影
ax.set_proj_type('ortho')

# 设置相机视角
ax.view_init(elev = 30, azim = 150)
plt.tight_layout()
plt.show()

# 用 Plotly 可视化三维曲面
fig = go.Figure(data=[go.Surface(z=ff, x=xx1, y=xx2, 
                 colorscale='RdYlBu_r')])
fig.layout.scene.camera.projection.type = "orthographic"
fig.update_layout(width=800,height=700)
fig.show()

（4）等高线

用plotly.graph_objects：

python 复制代码

import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go

# 生成曲面数据
x1_array = np.linspace(-3,3,121)
x2_array = np.linspace(-3,3,121)
xx1, xx2 = np.meshgrid(x1_array, x2_array)
ff = xx1 * np.exp(- xx1**2 - xx2 **2)

# 构建画板，添加子图，创建等高线
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.contour(xx1, xx2, ff, cmap='RdYlBu_r', levels = 20)

# 设置坐标轴标签
ax.set_xlabel('x1'); ax.set_ylabel('x2'); ax.set_zlabel('f(x1,x2)')
# 设置坐标轴取值范围
ax.set_xlim(-3,3); ax.set_ylim(-3,3); ax.set_zlim(-0.5,0.5)

# 设置正交投影
ax.set_proj_type('ortho')
# 设置相机视角
ax.view_init(elev = 30, azim = 150)
plt.tight_layout()
plt.show()

contour_settings = {"z": {"show":True,"start":-0.5,
                     "end":0.5, "size": 0.05}}
fig = go.Figure(data=[go.Surface(x=xx1,y=xx2,z=ff,
                                 colorscale='RdYlBu_r',
                                 contours = contour_settings)])

fig.layout.scene.camera.projection.type = "orthographic"
fig.update_layout(width=800, height=700)
fig.show()

（5）箭头图

用 matplotlib.pyplot.quiver：

python 复制代码

import matplotlib.pyplot as plt

# 定义二维列表
A = [[0,5],
     [3,4],
     [5,0]]
 
# 自定义可视化函数
def draw_vector(vector,RBG): 
    plt.quiver(0, 0, vector[0], vector[1],angles='xy', 
               scale_units='xy',scale=1,color = RBG,
               zorder = 1e5)
 
fig, ax = plt.subplots()
v1 = A[0] # 第一行向量
draw_vector(v1,'#FFC000')
v2 = A[1] # 第二行向量
draw_vector(v2,'#00CC00')
v3 = A[2] # 第三行向量
draw_vector(v3,'#33A8FF')

ax.axvline(x = 0, c = 'k')
ax.axhline(y = 0, c = 'k')
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.grid()
ax.set_aspect('equal', adjustable='box')
ax.set_xbound(lower = -0.5, upper = 5)
ax.set_ybound(lower = -0.5, upper = 5)

三维箭头图：

python 复制代码

# 自定义可视化函数
def draw_vector_3D(vector,RBG): 
    plt.quiver(0, 0, 0, vector[0], vector[1], vector[2],
               arrow_length_ratio=0, color = RBG,
               zorder = 1e5)
    
fig = plt.figure(figsize = (6,6))
ax = fig.add_subplot(111, projection='3d', proj_type = 'ortho')

# 第一列向量
v_1 = [row[0] for row in A]
draw_vector_3D(v_1,'#FF6600')
# 第二列向量
v_2 = [row[1] for row in A]
draw_vector_3D(v_2,'#FFBBFF')

ax.set_xlim(0,5)
ax.set_ylim(0,5)
ax.set_zlim(0,5)
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('x3')
ax.view_init(azim = 30, elev = 25)
ax.set_box_aspect([1,1,1])