R语言数值关系数据可视化 - 完整知识点
目录
- 散点图
- 三维散点图
- 线性拟合与置信区间
- 带标定区域的散点图
- viridis包绘制散点图
- 气泡图
- 等高线图
- 三元相图
- 瀑布图
- 火山图
一、散点图
知识点概述
散点图用于展示两个连续变量之间的关系,每个点代表一个观测值。
核心语法
plot(x, y):基础散点图ggplot2::geom_point():ggplot2系统散点图- 参数:
color,size,shape,alpha
完整案例代码
r
# 1. 准备数据
# 创建示例数据框,包含学生的学习和测试成绩
set.seed(123) # 设置随机种子,确保结果可重复
study_hours <- runif(100, min = 0, max = 10) # 生成100个0-10小时的学习时间
test_scores <- 50 + 5 * study_hours + rnorm(100, mean = 0, sd = 5) # 成绩=50+5*学习时间+随机误差
student_data <- data.frame(study_hours, test_scores) # 合并为数据框
# 2. 基础散点图(Base R)
# 使用plot()函数创建基础散点图
plot(x = student_data$study_hours, # x轴:学习时间
y = student_data$test_scores, # y轴:测试成绩
main = "学习时间与测试成绩的关系", # 图表标题
xlab = "每日学习时间(小时)", # x轴标签
ylab = "测试成绩(分)", # y轴标签
pch = 19, # 点的形状(19为实心圆)
col = "blue", # 点的颜色
cex = 1.2) # 点的大小缩放因子
# 3. 使用ggplot2创建高级散点图
# 加载必要的包
library(ggplot2) # 加载ggplot2绘图系统
# 创建基础图形对象
p <- ggplot(data = student_data, # 指定数据源
mapping = aes(x = study_hours, # 映射x轴:学习时间
y = test_scores)) # 映射y轴:测试成绩
# 添加散点图层
p <- p + geom_point( # 添加散点图层
color = "darkred", # 设置点颜色为暗红色
size = 3, # 设置点大小为3
alpha = 0.6, # 设置透明度0.6(0=透明,1=不透明)
shape = 16 # 设置点的形状(16为实心圆)
)
# 添加标题和标签
p <- p + labs(
title = "学习时间与测试成绩散点图", # 主标题
subtitle = "数据来源:模拟数据集", # 副标题
x = "每日学习时间(小时)", # x轴标签
y = "测试成绩(分)", # y轴标签
caption = "图1:散点图展示正相关关系" # 图注
)
# 应用主题美化
p <- p + theme_minimal() # 使用简洁主题
# 显示图形
print(p)
# 4. 分组散点图
# 添加分组变量
student_data$gender <- sample(c("男", "女"), 100, replace = TRUE) # 随机分配性别
# 创建按性别分组的散点图
p_grouped <- ggplot(student_data,
aes(x = study_hours,
y = test_scores,
color = gender)) + # 按性别着色
geom_point(size = 2.5, alpha = 0.7) + # 添加散点
scale_color_manual(values = c("男" = "blue", "女" = "red")) + # 自定义颜色
labs(title = "按性别分组的学习成绩散点图",
x = "学习时间(小时)",
y = "测试成绩(分)",
color = "性别") + # 图例标题
theme_bw() # 使用黑白主题
print(p_grouped)
二、三维散点图
知识点概述
三维散点图用于展示三个连续变量之间的关系,可以从立体角度观察数据分布。
核心语法
scatterplot3d::scatterplot3d():三维散点图plot3d():交互式三维图rgl包:提供交互式3D可视化
完整案例代码
r
# 1. 安装和加载必要的包
# install.packages("scatterplot3d") # 首次使用需安装
# install.packages("rgl") # 交互式3D包
library(scatterplot3d) # 加载三维散点图包
library(rgl) # 加载交互式3D图形包
# 2. 准备三维数据
# 创建包含三个变量的数据集
set.seed(456) # 设置随机种子
n <- 150 # 数据点数量
# 生成三个相关的变量
height <- rnorm(n, mean = 170, sd = 10) # 身高(cm)
weight <- 0.8 * (height - 170) + 70 + rnorm(n, 0, 5) # 体重(kg),与身高相关
age <- runif(n, min = 18, max = 65) # 年龄(岁)
body_data <- data.frame(height, weight, age) # 合并为数据框
# 3. 基础三维散点图(使用scatterplot3d)
# 创建三维散点图对象
s3d <- scatterplot3d(
x = body_data$height, # x轴:身高
y = body_data$weight, # y轴:体重
z = body_data$age, # z轴:年龄
main = "身高、体重与年龄的三维关系", # 主标题
xlab = "身高 (cm)", # x轴标签
ylab = "体重 (kg)", # y轴标签
zlab = "年龄 (岁)", # z轴标签
color = "blue", # 点的颜色
pch = 16, # 点的形状
highlight.3d = TRUE, # 高亮3D效果
angle = 45, # 视角角度(度)
type = "p", # 绘图类型(p=点,l=线,h=垂线)
grid = TRUE, # 显示网格
box = TRUE # 显示边框
)
# 4. 添加回归平面到三维图
# 拟合线性回归模型
model <- lm(weight ~ height + age, data = body_data) # 多元线性回归
# 创建网格点用于绘制平面
height_grid <- seq(min(height), max(height), length = 20) # 身高的网格序列
age_grid <- seq(min(age), max(age), length = 20) # 年龄的网格序列
grid_points <- expand.grid(height = height_grid, age = age_grid) # 扩展为网格点
grid_points$weight <- predict(model, newdata = grid_points) # 预测体重值
# 将预测值转换为矩阵格式(scatterplot3d需要)
weight_matrix <- matrix(grid_points$weight,
nrow = length(height_grid),
ncol = length(age_grid))
# 添加回归平面
s3d$plane3d(model, col = "red", alpha = 0.5) # 添加回归平面
# 5. 交互式三维散点图(使用rgl包)
# 打开新的3D绘图窗口
open3d() # 打开rgl设备
# 绘制交互式三维散点图
plot3d(
x = body_data$height, # x轴数据
y = body_data$weight, # y轴数据
z = body_data$age, # z轴数据
col = rainbow(n)[rank(body_data$age)], # 根据年龄着色
size = 5, # 点的大小
type = "s", # 类型(s=球体,p=点)
xlab = "身高 (cm)", # x轴标签
ylab = "体重 (kg)", # y轴标签
zlab = "年龄 (岁)" # z轴标签
)
# 添加标题
title3d("交互式三维散点图")
# 添加坐标轴标签
axes3d() # 添加坐标轴
# 使用鼠标可以旋转、缩放图形
# 添加颜色图例
legend3d("topright", legend = c("年轻", "年长"),
col = c("red", "purple"), pch = 16)
# 6. 保存三维图
# 保存为PNG(需要调整设备)
rgl.snapshot("3d_scatterplot.png") # 保存当前视图为PNG
# 保存为交互式HTML(需要rglwidget)
if(interactive()) {
rglwidget() # 创建交互式HTML控件
}
三、线性拟合与置信区间
知识点概述
在线性拟合中展示数据点、回归线和置信区间,帮助理解变量间的关系强度和不确定性。
核心语法
lm():线性回归模型predict():预测值及置信区间geom_smooth():ggplot2自动添加平滑线confint():计算参数置信区间
完整案例代码
r
# 1. 准备数据
set.seed(789) # 设置随机种子
x <- seq(1, 100, by = 1) # 创建1-100的序列
y <- 2 * x + 50 + rnorm(length(x), mean = 0, sd = 20) # 线性关系加噪声
data_fit <- data.frame(x = x, y = y) # 创建数据框
# 2. 线性回归模型拟合
# 拟合线性回归模型
lm_model <- lm(y ~ x, data = data_fit) # 公式:y = β0 + β1*x
# 查看模型摘要
summary(lm_model) # 显示回归系数、R-squared、p值等
# 提取回归系数
coefficients <- coef(lm_model) # 提取系数
intercept <- coefficients[1] # 截距β0
slope <- coefficients[2] # 斜率β1
cat(sprintf("回归方程: y = %.2f + %.2f * x\n", intercept, slope))
# 3. 计算预测值和置信区间
# 创建新数据用于预测
new_x <- data.frame(x = seq(min(x), max(x), length.out = 100)) # 预测点
# 预测均值及其置信区间
predictions <- predict(lm_model,
newdata = new_x,
interval = "confidence", # 置信区间类型
level = 0.95) # 置信水平95%
# 预测个体值及其预测区间
predictions_ind <- predict(lm_model,
newdata = new_x,
interval = "prediction", # 预测区间
level = 0.95)
# 将预测结果合并到数据框
new_x$fit <- predictions[, "fit"] # 拟合值
new_x$lwr <- predictions[, "lwr"] # 置信区间下限
new_x$upr <- predictions[, "upr"] # 置信区间上限
new_x$lwr_ind <- predictions_ind[, "lwr"] # 预测区间下限
new_x$upr_ind <- predictions_ind[, "upr"] # 预测区间上限
# 4. 使用Base R绘制带置信区间的散点图
# 设置图形参数
par(mfrow = c(1, 1), mar = c(4, 4, 3, 2)) # 设置图形边距
# 绘制散点图
plot(x, y,
main = "线性回归拟合及置信区间",
xlab = "X变量",
ylab = "Y变量",
pch = 19,
col = "gray50",
cex = 0.8)
# 添加回归线
lines(new_x$x, new_x$fit, col = "blue", lwd = 3) # 回归线
# 添加置信区间带
polygon(c(new_x$x, rev(new_x$x)), # x坐标(正向和反向)
c(new_x$lwr, rev(new_x$upr)), # y坐标(下限和上限)
col = rgb(0, 0, 1, alpha = 0.2), # 半透明蓝色填充
border = NA) # 无边框
# 添加预测区间带(可选)
polygon(c(new_x$x, rev(new_x$x)),
c(new_x$lwr_ind, rev(new_x$upr_ind)),
col = rgb(0, 1, 0, alpha = 0.1),
border = NA)
# 添加图例
legend("topleft",
legend = c("数据点", "回归线", "95%置信区间"),
col = c("gray50", "blue", "lightblue"),
pch = c(19, NA, 15),
lwd = c(NA, 3, NA),
pt.cex = c(0.8, NA, 2),
bg = "white")
# 5. 使用ggplot2绘制(更美观)
library(ggplot2)
# 方法1:使用geom_smooth自动添加
p_auto <- ggplot(data_fit, aes(x = x, y = y)) +
geom_point(color = "darkgray", alpha = 0.6, size = 2) + # 散点
geom_smooth(method = "lm", # 使用线性回归方法
formula = y ~ x, # 公式
level = 0.95, # 置信水平
se = TRUE, # 显示置信区间
color = "blue", # 线的颜色
fill = "lightblue", # 置信区间填充色
size = 1.2) + # 线宽
labs(title = "线性回归拟合与95%置信区间",
subtitle = "使用geom_smooth自动添加",
x = "自变量 X",
y = "因变量 Y") +
theme_minimal()
print(p_auto)
# 方法2:手动计算并绘制(更灵活)
# 计算回归模型
model_plot <- lm(y ~ x, data = data_fit)
# 创建预测数据框
pred_data <- data.frame(x = seq(min(x), max(x), length = 100))
pred_data$pred <- predict(model_plot, newdata = pred_data)
pred_data$conf_low <- predict(model_plot, newdata = pred_data,
interval = "confidence")[, "lwr"]
pred_data$conf_high <- predict(model_plot, newdata = pred_data,
interval = "confidence")[, "upr"]
# 手动绘制
p_manual <- ggplot() +
# 原始数据点
geom_point(data = data_fit, aes(x = x, y = y),
color = "gray50", alpha = 0.7, size = 2) +
# 回归线
geom_line(data = pred_data, aes(x = x, y = pred),
color = "red", size = 1.5) +
# 置信区间带
geom_ribbon(data = pred_data,
aes(x = x, ymin = conf_low, ymax = conf_high),
fill = "red", alpha = 0.2) +
# 标签和主题
labs(title = "手动计算的线性回归及置信区间",
x = "自变量 X",
y = "因变量 Y") +
theme_classic()
print(p_manual)
# 6. 残差分析图
# 计算残差
data_fit$fitted <- fitted(lm_model) # 拟合值
data_fit$residuals <- residuals(lm_model) # 残差
data_fit$std_residuals <- rstandard(lm_model) # 标准化残差
# 残差图
p_residual <- ggplot(data_fit, aes(x = fitted, y = residuals)) +
geom_point(color = "darkblue", size = 2, alpha = 0.6) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed", size = 1) +
geom_smooth(method = "loess", se = FALSE, color = "gray") +
labs(title = "残差 vs 拟合值图",
x = "拟合值",
y = "残差") +
theme_minimal()
print(p_residual)
# Q-Q图检验正态性
p_qq <- ggplot(data_fit, aes(sample = residuals)) +
stat_qq(color = "blue", size = 2, alpha = 0.7) +
stat_qq_line(color = "red", size = 1) +
labs(title = "Q-Q图:残差正态性检验",
x = "理论分位数",
y = "样本分位数") +
theme_minimal()
print(p_qq)
四、带标定区域的散点图
知识点概述
在散点图中标记特定区域(如高密度区、异常值区、分类区域),便于识别数据子集。
核心语法
rect():添加矩形区域polygon():添加多边形区域annotate():ggplot2中添加注释区域geom_rect():ggplot2矩形图层
完整案例代码
r
# 1. 准备数据
set.seed(111)
# 生成三个不同簇的数据
n_per_cluster <- 100
cluster1 <- data.frame(x = rnorm(n_per_cluster, mean = 2, sd = 0.8),
y = rnorm(n_per_cluster, mean = 3, sd = 0.8),
cluster = "A")
cluster2 <- data.frame(x = rnorm(n_per_cluster, mean = 6, sd = 0.8),
y = rnorm(n_per_cluster, mean = 5, sd = 0.8),
cluster = "B")
cluster3 <- data.frame(x = rnorm(n_per_cluster, mean = 4, sd = 0.8),
y = rnorm(n_per_cluster, mean = 1.5, sd = 0.8),
cluster = "C")
# 合并数据
data_clusters <- rbind(cluster1, cluster2, cluster3)
# 2. Base R实现标定区域
# 设置图形参数
plot(data_clusters$x, data_clusters$y,
main = "带标定区域的散点图",
xlab = "X轴坐标",
ylab = "Y轴坐标",
pch = 19,
col = c("red", "green", "blue")[as.numeric(data_clusters$cluster)],
cex = 0.8)
# 添加矩形区域(标定区域1)
rect(xleft = 1, ybottom = 2, # 左下角坐标
xright = 3, ytop = 4, # 右上角坐标
col = rgb(1, 0, 0, alpha = 0.2), # 半透明红色
border = "red", # 边框颜色
lwd = 2, # 边框宽度
lty = 2) # 虚线边框
# 添加椭圆/圆形区域(标定区域2)
# 使用多边形近似椭圆
theta <- seq(0, 2 * pi, length = 100) # 角度序列
cx <- 6 # 中心x坐标
cy <- 5 # 中心y坐标
rx <- 1.5 # x轴半径
ry <- 1.2 # y轴半径
ellipse_x <- cx + rx * cos(theta) # 椭圆x坐标
ellipse_y <- cy + ry * sin(theta) # 椭圆y坐标
polygon(ellipse_x, ellipse_y,
col = rgb(0, 1, 0, alpha = 0.2),
border = "green",
lwd = 2)
# 添加多边形区域(标定区域3)
polygon_x <- c(3, 5, 4.5, 3.5) # 多边形x顶点
polygon_y <- c(0.5, 1, 2.5, 2) # 多边形y顶点
polygon(polygon_x, polygon_y,
col = rgb(0, 0, 1, alpha = 0.2),
border = "blue",
lwd = 2)
# 添加文本标签
text(x = 2, y = 3, labels = "区域A:低值区", cex = 0.8)
text(x = 6, y = 6, labels = "区域B:中值区", cex = 0.8)
text(x = 4, y = 1.5, labels = "区域C:高值区", cex = 0.8)
# 添加图例
legend("topright",
legend = c("簇A", "簇B", "簇C", "区域A", "区域B", "区域C"),
col = c("red", "green", "blue", "red", "green", "blue"),
pch = c(19, 19, 19, NA, NA, NA),
lwd = c(NA, NA, NA, 2, 2, 2),
fill = c(NA, NA, NA, rgb(1,0,0,0.2), rgb(0,1,0,0.2), rgb(0,0,1,0.2)),
border = c(NA, NA, NA, "red", "green", "blue"),
bg = "white")
# 3. ggplot2实现标定区域
library(ggplot2)
# 定义标定区域的数据框
rect_region <- data.frame(xmin = 1, xmax = 3, ymin = 2, ymax = 4,
region = "区域A")
ellipse_region <- data.frame(x = cx, y = cy, rx = rx, ry = ry, region = "区域B")
polygon_region <- data.frame(x = c(3, 5, 4.5, 3.5),
y = c(0.5, 1, 2.5, 2),
region = "区域C")
# 创建ggplot图形
p_regions <- ggplot(data_clusters, aes(x = x, y = y, color = cluster)) +
# 添加散点
geom_point(size = 2, alpha = 0.7) +
# 添加矩形区域
annotate("rect",
xmin = 1, xmax = 3, ymin = 2, ymax = 4,
alpha = 0.2, fill = "red", color = "red", linetype = "dashed") +
# 添加椭圆区域(使用geom_ellipse需要ggforce包)
# 如果没有ggforce,使用geom_polygon创建近似椭圆
annotate("polygon",
x = ellipse_x, y = ellipse_y,
alpha = 0.2, fill = "green", color = "green") +
# 添加多边形区域
annotate("polygon",
x = polygon_x, y = polygon_y,
alpha = 0.2, fill = "blue", color = "blue") +
# 添加文本标签
annotate("text", x = 2, y = 3, label = "区域A", fontface = "bold", size = 5) +
annotate("text", x = 6, y = 6, label = "区域B", fontface = "bold", size = 5) +
annotate("text", x = 4, y = 1.5, label = "区域C", fontface = "bold", size = 5) +
# 设置颜色
scale_color_manual(values = c("A" = "red", "B" = "green", "C" = "blue")) +
# 标签和主题
labs(title = "带标定区域的散点图(ggplot2)",
subtitle = "矩形、椭圆和多边形标定区域",
x = "X轴坐标",
y = "Y轴坐标",
color = "数据簇") +
theme_bw() +
theme(legend.position = "bottom")
print(p_regions)
# 4. 密度等高线区域标定
library(MASS) # 用于核密度估计
# 计算二维核密度
kde <- kde2d(data_clusters$x, data_clusters$y, n = 100)
# 创建密度数据框
density_df <- expand.grid(x = kde$x, y = kde$y)
density_df$z <- as.vector(kde$z)
# 绘制密度等高线区域
p_density <- ggplot(data_clusters, aes(x = x, y = y)) +
# 添加密度等高线填充
geom_contour_filled(data = density_df, aes(x = x, y = y, z = z, fill = after_stat(level)),
alpha = 0.5, bins = 10) +
# 添加原始数据点
geom_point(size = 1.5, alpha = 0.8, color = "black") +
# 高密度区域标定
geom_contour(data = density_df, aes(x = x, y = y, z = z),
breaks = quantile(kde$z, 0.9), # 10%最高密度区域
color = "red", size = 1.5, linetype = "dashed") +
labs(title = "密度等高线标定区域",
subtitle = "红色等高线表示10%最高密度区域",
x = "X轴",
y = "Y轴",
fill = "密度等级") +
theme_minimal()
print(p_density)
五、利用viridis包绘制散点图
知识点概述
viridis包提供色觉友好、感知均匀的颜色方案,特别适合连续数据的可视化。
核心语法
viridis():主要颜色映射函数scale_color_viridis_c():连续变量颜色标度scale_color_viridis_d():离散变量颜色标度scale_fill_viridis_c():填充颜色标度
完整案例代码
r
# 1. 安装和加载viridis包
# install.packages("viridis") # 首次使用需安装
# install.packages("viridisLite") # 轻量版
library(viridis)
library(ggplot2)
# 2. 准备数据
set.seed(222)
n <- 500
# 创建带第三维连续变量的数据
x <- rnorm(n, mean = 0, sd = 1)
y <- 0.5 * x + rnorm(n, mean = 0, sd = 0.5)
z <- exp(x) * 10 + rnorm(n, mean = 0, sd = 5) # 第三个连续变量(颜色映射用)
data_viridis <- data.frame(x = x, y = y, z = z)
# 3. 基础viridis颜色使用
# 查看viridis调色板
viridis_pal <- viridis(10) # 生成10个颜色的调色板
print(viridis_pal)
# 显示调色板
# 创建调色板可视化函数
show_colors <- function(colors, labels = NULL) {
n <- length(colors)
plot(0, 0, type = "n", xlim = c(0, n), ylim = c(0, 1),
xlab = "", ylab = "", axes = FALSE)
for (i in 1:n) {
rect(i-1, 0, i, 1, col = colors[i], border = NA)
if (!is.null(labels)) {
text(i-0.5, 0.5, labels[i], cex = 0.8)
}
}
}
show_colors(viridis_pal, 1:10)
# 4. Base R中使用viridis颜色
# 将连续变量z映射到viridis颜色
# 首先将z标准化到[0,1]区间
z_norm <- (data_viridis$z - min(data_viridis$z)) /
(max(data_viridis$z) - min(data_viridis$z))
# 获取颜色
colors_viridis <- viridis(100)[as.numeric(cut(z_norm, breaks = 100))]
# 绘制散点图
plot(data_viridis$x, data_viridis$y,
main = "使用viridis颜色映射的散点图",
xlab = "X变量",
ylab = "Y变量",
col = colors_viridis,
pch = 19,
cex = 1.2,
cex.main = 1.5)
# 添加颜色条
# 简化方法:使用图例表示颜色范围
legend("topright",
legend = c("低z值", "中z值", "高z值"),
col = viridis(3),
pch = 19,
title = "z变量",
cex = 0.8)
# 5. ggplot2中使用viridis(推荐方式)
# 连续变量颜色映射
p_continuous <- ggplot(data_viridis, aes(x = x, y = y, color = z)) +
geom_point(size = 2.5, alpha = 0.8) +
# 使用viridis颜色方案
scale_color_viridis_c(
option = "viridis", # 可选: "viridis", "magma", "plasma", "inferno", "cividis"
direction = 1, # 1=正向,-1=反向
begin = 0, # 颜色起始点(0-1)
end = 1, # 颜色结束点(0-1)
name = "Z值", # 图例标题
breaks = c(min(z), median(z), max(z)), # 图例断点
labels = c("低", "中", "高") # 图例标签
) +
labs(title = "使用viridis颜色方案的散点图",
subtitle = "viridis方案(色觉友好、感知均匀)",
x = "X变量",
y = "Y变量") +
theme_minimal() +
theme(legend.position = "right",
legend.key.height = unit(1.5, "cm"))
print(p_continuous)
# 6. 尝试不同的viridis方案
# 创建多面板图形比较不同方案
options_list <- c("viridis", "magma", "plasma", "inferno", "cividis")
plots_list <- list()
for (i in seq_along(options_list)) {
opt <- options_list[i]
p <- ggplot(data_viridis, aes(x = x, y = y, color = z)) +
geom_point(size = 2, alpha = 0.7) +
scale_color_viridis_c(option = opt, name = "Z值") +
labs(title = paste(opt, "方案"),
x = "X", y = "Y") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 12),
legend.position = "bottom")
plots_list[[i]] <- p
}
# 组合图形(需要patchwork包)
# install.packages("patchwork")
library(patchwork)
combined_plots <- (plots_list[[1]] + plots_list[[2]]) /
(plots_list[[3]] + plots_list[[4]]) /
(plots_list[[5]] + plot_spacer())
print(combined_plots)
# 7. 离散变量的viridis应用
# 将连续变量z转换为分类变量
data_viridis$z_category <- cut(data_viridis$z,
breaks = 3,
labels = c("低", "中", "高"))
p_discrete <- ggplot(data_viridis, aes(x = x, y = y, color = z_category)) +
geom_point(size = 2.5, alpha = 0.8) +
scale_color_viridis_d(
option = "plasma",
name = "Z等级",
labels = c("低 (<25%)", "中 (25%-75%)", "高 (>75%)")
) +
labs(title = "离散变量使用viridis颜色",
subtitle = "将连续变量分层后使用viridis离散方案",
x = "X变量",
y = "Y变量") +
theme_bw()
print(p_discrete)
# 8. 结合透明度和大小的多维度映射
p_multidim <- ggplot(data_viridis, aes(x = x, y = y, color = z, size = abs(x))) +
geom_point(alpha = 0.7) +
scale_color_viridis_c(option = "inferno", name = "Z值") +
scale_size_continuous(name = "|X|值", range = c(1, 6)) +
labs(title = "多维度数据映射",
subtitle = "颜色:Z值,大小:|X|值",
x = "X变量",
y = "Y变量") +
theme_dark() +
theme(legend.position = "right")
print(p_multidim)
# 9. 在三维散点图中使用viridis
library(scatterplot3d)
# 创建三维散点图
s3d_viridis <- scatterplot3d(
x = data_viridis$x,
y = data_viridis$y,
z = data_viridis$z,
color = viridis(n)[rank(data_viridis$z)], # 根据z值排序着色
pch = 16,
main = "三维散点图使用viridis颜色",
xlab = "X",
ylab = "Y",
zlab = "Z"
)
六、气泡图
知识点概述
气泡图是散点图的扩展,通过点的大小表示第三个数值变量,可用于多维度数据展示。
核心语法
symbols():Base R气泡图geom_point(aes(size = variable)):ggplot2气泡图scale_size_continuous():控制气泡大小范围
完整案例代码
r
# 1. 准备数据
# 创建国家发展指标数据集
countries <- c("中国", "印度", "美国", "印尼", "巴西", "巴基斯坦",
"尼日利亚", "孟加拉国", "俄罗斯", "墨西哥")
population <- c(1441, 1380, 331, 273, 213, 220, 211, 166, 146, 129) # 百万人
gdp_per_capita <- c(10500, 2200, 63500, 4200, 6800, 1500, 2200, 1900, 11500, 9200) # 美元
life_expectancy <- c(77, 69, 79, 72, 76, 67, 55, 73, 72, 75) # 岁
co2_emissions <- c(10.1, 1.9, 15.5, 2.3, 2.2, 0.8, 0.7, 0.5, 11.2, 3.8) # 吨/人
# 创建数据框
bubble_data <- data.frame(
country = countries,
population = population,
gdp_per_capita = gdp_per_capita,
life_expectancy = life_expectancy,
co2_emissions = co2_emissions
)
# 2. Base R气泡图
# 使用symbols()函数创建气泡图
# 设置图形参数
par(mar = c(5, 5, 4, 2)) # 设置边距
# 计算气泡大小(缩放因子)
bubble_size <- sqrt(bubble_data$population / pi) / 15 # 半径缩放
# 绘制气泡图
symbols(x = bubble_data$gdp_per_capita, # x轴:人均GDP
y = bubble_data$life_expectancy, # y轴:预期寿命
circles = bubble_size, # 气泡半径
inches = FALSE, # 不使用英寸缩放
bg = rgb(0.2, 0.5, 0.8, alpha = 0.5), # 填充色(半透明蓝)
fg = "blue", # 边框色
xlab = "人均GDP(美元)", # x轴标签
ylab = "预期寿命(岁)", # y轴标签
main = "国家发展指标气泡图", # 标题
xlim = c(0, 70000), # x轴范围
ylim = c(50, 85)) # y轴范围
# 添加国家标签
text(x = bubble_data$gdp_per_capita,
y = bubble_data$life_expectancy,
labels = bubble_data$country,
cex = 0.7,
pos = 3) # pos=3表示标签在上方
# 添加图例
legend("bottomright",
legend = c("人口100M", "人口500M", "人口1000M", "人口1500M"),
pt.cex = c(0.5, 1, 1.5, 2),
pch = 21,
pt.bg = rgb(0.2, 0.5, 0.8, alpha = 0.5),
title = "人口规模",
bg = "white")
# 3. 使用ggplot2创建高级气泡图
library(ggplot2)
# 基础气泡图
p_bubble_base <- ggplot(bubble_data,
aes(x = gdp_per_capita,
y = life_expectancy,
size = population,
color = co2_emissions)) +
# 添加气泡点
geom_point(alpha = 0.7) +
# 控制气泡大小范围
scale_size_continuous(
name = "人口(百万人)",
range = c(3, 20), # 气泡大小范围(最小到最大)
breaks = c(100, 500, 1000, 1500),
labels = c("100M", "500M", "1000M", "1500M")
) +
# 控制颜色(CO2排放)
scale_color_viridis_c(
name = "CO2排放\n(吨/人)",
option = "plasma"
) +
# 添加国家标签
geom_text(aes(label = country),
size = 3,
vjust = -0.5, # 垂直偏移
hjust = 0.5, # 水平偏移
check_overlap = TRUE) + # 避免标签重叠
# 标签和主题
labs(title = "国家发展指标气泡图",
subtitle = "气泡大小=人口,颜色=CO2排放",
x = "人均GDP(美元)",
y = "预期寿命(岁)") +
theme_bw() +
theme(legend.position = "right",
legend.box = "vertical")
print(p_bubble_base)
# 4. 美化气泡图
library(RColorBrewer)
# 添加更多美化元素
p_bubble_beautiful <- ggplot(bubble_data,
aes(x = gdp_per_capita,
y = life_expectancy,
size = population,
fill = co2_emissions,
label = country)) +
# 使用带边框的气泡
geom_point(shape = 21, alpha = 0.8, color = "black", stroke = 0.5) +
# 自定义颜色填充
scale_fill_gradient2(
name = "CO2排放\n(吨/人)",
low = "green", # 低值颜色
mid = "yellow", # 中值颜色
high = "red", # 高值颜色
midpoint = 5, # 中点值
space = "Lab"
) +
# 气泡大小
scale_size_continuous(
name = "人口(百万人)",
range = c(5, 25),
guide = guide_legend(override.aes = list(fill = "gray50"))
) +
# 添加国家标签(带背景)
geom_label(aes(fill = NULL),
size = 3,
alpha = 0.7,
label.padding = unit(0.2, "lines"),
nudge_y = 1) + # 向上偏移
# 添加参考线(平均值线)
geom_hline(yintercept = mean(bubble_data$life_expectancy),
linetype = "dashed", color = "gray", alpha = 0.5) +
geom_vline(xintercept = mean(bubble_data$gdp_per_capita),
linetype = "dashed", color = "gray", alpha = 0.5) +
# 添加回归线
geom_smooth(method = "lm", se = FALSE, color = "darkblue",
linetype = "solid", size = 0.8, alpha = 0.5) +
# 坐标轴转换(使用对数刻度)
scale_x_log10(labels = scales::comma) +
# 标签
labs(title = "国家发展指标综合分析",
subtitle = "气泡大小=人口 | 颜色=CO2排放 | 虚线=平均值 | 实线=回归线",
x = "人均GDP(美元,对数刻度)",
y = "预期寿命(岁)",
caption = "数据来源:世界银行(示例数据)") +
# 主题美化
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5, color = "gray50"),
legend.position = "right",
legend.key.width = unit(1, "cm"),
panel.grid.minor = element_blank(),
panel.border = element_rect(fill = NA, color = "gray80", size = 0.5)
)
print(p_bubble_beautiful)
# 5. 分组气泡图(添加分类变量)
# 添加区域分类
bubble_data$region <- c("亚洲", "亚洲", "美洲", "亚洲", "美洲",
"亚洲", "非洲", "亚洲", "欧洲", "美洲")
# 创建分组气泡图
p_bubble_grouped <- ggplot(bubble_data,
aes(x = gdp_per_capita,
y = life_expectancy,
size = population,
color = region)) +
# 添加气泡
geom_point(alpha = 0.8) +
# 按区域着色
scale_color_manual(
name = "区域",
values = c("亚洲" = "red", "美洲" = "blue", "非洲" = "green", "欧洲" = "purple")
) +
# 大小控制
scale_size_continuous(
name = "人口(百万人)",
range = c(3, 20)
) +
# 添加文本标签
geom_text_repel(
aes(label = country),
size = 3.5,
force = 10,
box.padding = 0.5,
point.padding = 0.3
) +
# 标签和主题
labs(title = "按区域分组的气泡图",
subtitle = "不同颜色代表不同区域",
x = "人均GDP(美元)",
y = "预期寿命(岁)") +
theme_classic()
print(p_bubble_grouped)
# 6. 动态气泡图(使用plotly实现交互)
library(plotly)
# 创建交互式气泡图
p_interactive <- plot_ly(
data = bubble_data,
x = ~gdp_per_capita,
y = ~life_expectancy,
size = ~population,
color = ~co2_emissions,
text = ~paste("国家:", country,
"<br>人均GDP:", gdp_per_capita, "美元",
"<br>预期寿命:", life_expectancy, "岁",
"<br>人口:", population, "百万人",
"<br>CO2排放:", co2_emissions, "吨/人"),
hoverinfo = "text",
type = "scatter",
mode = "markers",
marker = list(sizeref = 2.5, sizemode = "area")
) %>%
layout(
title = "交互式国家发展指标气泡图",
xaxis = list(title = "人均GDP(美元)", type = "log"),
yaxis = list(title = "预期寿命(岁)"),
showlegend = TRUE
)
# 显示交互式图形
p_interactive
七、等高线图
知识点概述
等高线图通过等值线展示三维数据的分布,常用于展示密度、地形或响应面。
核心语法
contour():Base R等高线图filled.contour():填充等高线图geom_contour():ggplot2等高线图层geom_contour_filled():填充等高线图stat_density2d():二维密度等高线
完整案例代码
r
# 1. 准备数据
# 创建网格数据
x <- seq(-3, 3, length.out = 50) # x轴网格
y <- seq(-3, 3, length.out = 50) # y轴网格
# 创建网格点
grid_data <- expand.grid(x = x, y = y)
# 计算z值(使用二元正态分布)
# 公式:z = exp(-(x^2 + y^2)/2) / (2*pi)
z_matrix <- matrix(0, nrow = length(x), ncol = length(y))
for (i in 1:length(x)) {
for (j in 1:length(y)) {
z_matrix[i, j] <- exp(-(x[i]^2 + y[j]^2) / 2) / (2 * pi)
}
}
# 另一种数据:Rosenbrock函数(测试用)
rosenbrock <- function(x, y) {
(1 - x)^2 + 100 * (y - x^2)^2
}
z_rosenbrock <- outer(x, y, rosenbrock)
# 2. Base R等高线图
# 设置图形参数
par(mfrow = c(2, 2), mar = c(4, 4, 3, 2))
# 基础等高线图
contour(x = x, y = y, z = z_matrix,
main = "基础等高线图",
xlab = "X轴",
ylab = "Y轴",
col = "blue",
lwd = 1.5,
levels = seq(0.01, 0.15, by = 0.02)) # 指定等高线层级
# 添加颜色和标签
contour(x = x, y = y, z = z_matrix,
main = "带标签的等高线图",
xlab = "X轴",
ylab = "Y轴",
col = "darkred",
lwd = 2,
labcex = 0.8, # 标签大小
drawlabels = TRUE) # 显示标签
# 填充等高线图
filled.contour(x = x, y = y, z = z_matrix,
main = "填充等高线图",
xlab = "X轴",
ylab = "Y轴",
color.palette = heat.colors, # 颜色方案
levels = seq(0, 0.15, length.out = 20))
# 自定义颜色填充
filled.contour(x = x, y = y, z = z_rosenbrock,
main = "Rosenbrock函数等高线图",
xlab = "X轴",
ylab = "Y轴",
color.palette = terrain.colors,
nlevels = 30)
# 3. 使用ggplot2创建等高线图
library(ggplot2)
library(viridis)
# 将矩阵转换为数据框格式(用于ggplot2)
contour_df <- expand.grid(x = x, y = y)
contour_df$z <- as.vector(z_matrix)
# 基础等高线图
p_contour_base <- ggplot(contour_df, aes(x = x, y = y, z = z)) +
geom_contour(
color = "blue",
size = 0.8,
bins = 20 # 等高线数量
) +
labs(title = "ggplot2等高线图",
x = "X轴",
y = "Y轴") +
theme_minimal()
print(p_contour_base)
# 带填充的等高线图
p_contour_filled <- ggplot(contour_df, aes(x = x, y = y, z = z)) +
# 填充等高线
geom_contour_filled(
aes(fill = after_stat(level)),
bins = 15,
alpha = 0.8
) +
# 添加等高线
geom_contour(
color = "black",
size = 0.3,
bins = 15,
alpha = 0.5
) +
# 颜色方案
scale_fill_viridis_d(name = "Z值", direction = -1) +
# 标签和主题
labs(title = "填充等高线图",
subtitle = "使用viridis颜色方案",
x = "X轴",
y = "Y轴") +
theme_bw()
print(p_contour_filled)
# 4. 二维密度等高线图(散点图密度估计)
# 生成散点数据
set.seed(333)
n_points <- 1000
scatter_x <- c(rnorm(n_points/2, mean = -1, sd = 1),
rnorm(n_points/2, mean = 2, sd = 0.8))
scatter_y <- c(rnorm(n_points/2, mean = 0, sd = 1),
rnorm(n_points/2, mean = 1, sd = 0.7))
scatter_data <- data.frame(x = scatter_x, y = scatter_y)
# 二维密度等高线
p_density_contour <- ggplot(scatter_data, aes(x = x, y = y)) +
# 散点图
geom_point(alpha = 0.3, size = 0.8, color = "gray50") +
# 密度等高线
geom_density2d(
aes(color = after_stat(level)),
size = 0.8,
bins = 10
) +
# 颜色方案
scale_color_viridis_c(name = "密度等级", option = "plasma") +
# 标签和主题
labs(title = "二维密度等高线图",
subtitle = "展示数据点的密度分布",
x = "X轴",
y = "Y轴") +
theme_classic()
print(p_density_contour)
# 带填充的二维密度图
p_density_filled <- ggplot(scatter_data, aes(x = x, y = y)) +
# 二维密度填充
stat_density2d(
aes(fill = after_stat(density)),
geom = "raster",
contour = FALSE,
alpha = 0.8
) +
# 等高线叠加
geom_density2d(color = "white", size = 0.3, alpha = 0.5) +
# 颜色方案
scale_fill_viridis_c(name = "密度", option = "magma") +
# 标签和主题
labs(title = "二维密度填充图",
subtitle = "热力图形式展示密度分布",
x = "X轴",
y = "Y轴") +
theme_dark()
print(p_density_filled)
# 5. 高级等高线图(添加点、标签等)
# 创建带标记的等高线图
p_advanced_contour <- ggplot(contour_df, aes(x = x, y = y, z = z)) +
# 填充等高线
geom_contour_filled(bins = 20, alpha = 0.7) +
# 添加等高线
geom_contour(color = "white", size = 0.2, bins = 20, alpha = 0.3) +
# 添加特定等高线(如0.05和0.1)
geom_contour(
breaks = c(0.05, 0.1),
color = "red",
size = 1.2,
linetype = "dashed"
) +
# 添加中心点
annotate("point", x = 0, y = 0, color = "red", size = 3, shape = 23, fill = "yellow") +
annotate("text", x = 0, y = 0.2, label = "峰值点", color = "red", size = 4) +
# 颜色方案
scale_fill_viridis_d(name = "Z值范围", direction = -1) +
# 坐标轴
scale_x_continuous(breaks = seq(-3, 3, by = 1)) +
scale_y_continuous(breaks = seq(-3, 3, by = 1)) +
# 标签和主题
labs(title = "高级等高线图",
subtitle = "红色虚线:特定等高线(0.05, 0.1) | 黄点:峰值位置",
x = "X轴",
y = "Y轴",
caption = "基于二元正态分布") +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "right"
)
print(p_advanced_contour)
# 6. 在地理空间数据中使用等高线
# 模拟地形数据
lat <- seq(20, 50, length.out = 100) # 纬度
lon <- seq(80, 130, length.out = 100) # 经度
terrain_data <- expand.grid(lon = lon, lat = lat)
# 创建模拟地形高度(多个山峰)
terrain_data$elevation <-
# 山峰1
2000 * exp(-((terrain_data$lon - 100)^2 + (terrain_data$lat - 35)^2) / 200) +
# 山峰2
1500 * exp(-((terrain_data$lon - 110)^2 + (terrain_data$lat - 40)^2) / 150) +
# 山峰3
1000 * exp(-((terrain_data$lon - 90)^2 + (terrain_data$lat - 30)^2) / 100) +
# 基础海拔
500
# 地形等高线图
p_terrain <- ggplot(terrain_data, aes(x = lon, y = lat, z = elevation)) +
# 填充地形
geom_contour_filled(bins = 20, alpha = 0.9) +
# 等高线
geom_contour(color = "black", size = 0.2, alpha = 0.3, bins = 20) +
# 颜色方案(地形颜色)
scale_fill_manual(
values = colorRampPalette(c("darkgreen", "lightgreen", "yellow", "orange", "brown", "white"))(20),
name = "海拔(m)"
) +
# 标签和主题
labs(title = "地形等高线图",
subtitle = "模拟中国西部地区地形",
x = "经度",
y = "纬度",
caption = "颜色越浅海拔越高") +
theme_minimal() +
theme(
panel.grid = element_line(color = "gray80", size = 0.2),
panel.background = element_rect(fill = "lightblue", color = NA)
)
print(p_terrain)
八、三元相图
知识点概述
三元相图用于展示三个变量之和为常数的数据(如百分比数据),适合成分数据分析。
核心语法
TernaryPlot():三元相图基础函数TernaryPoints():添加点AddToTernary():添加图形元素ggtern包:ggplot2的三元相图扩展
完整案例代码
r
# 1. 安装和加载必要的包
# install.packages("Ternary") # 三元相图包
# install.packages("ggtern") # ggplot2三元相图扩展
library(Ternary)
library(ggtern)
library(ggplot2)
# 2. 准备数据
# 创建成分数据(三个变量和为100%)
set.seed(444)
n_samples <- 50
# 生成随机成分数据(使用Dirichlet分布)
# 方法:生成三个随机数并归一化
composition_data <- matrix(0, nrow = n_samples, ncol = 3)
for (i in 1:n_samples) {
raw <- runif(3)
composition_data[i, ] <- raw / sum(raw)
}
colnames(composition_data) <- c("成分A", "成分B", "成分C")
# 添加一些结构化的数据(特定区域)
structured_data <- rbind(
c(0.8, 0.1, 0.1), # A占主导
c(0.1, 0.8, 0.1), # B占主导
c(0.1, 0.1, 0.8), # C占主导
c(0.33, 0.33, 0.34) # 均衡
)
# 合并数据
all_compositions <- rbind(composition_data, structured_data)
sample_ids <- c(rep("随机", n_samples), rep("特殊", nrow(structured_data)))
# 创建响应变量(如产品性能)
performance <- 50 +
30 * all_compositions[,1] +
20 * all_compositions[,2] +
10 * all_compositions[,3] +
rnorm(nrow(all_compositions), sd = 5)
composition_df <- data.frame(
A = all_compositions[,1],
B = all_compositions[,2],
C = all_compositions[,3],
type = sample_ids,
performance = performance
)
# 3. 使用Ternary包绘制三元相图
# 设置图形参数
par(mar = c(2, 2, 2, 2))
# 创建三元相图
TernaryPlot(
main = "基础三元相图",
point = "UP", # 三角形顶点方向
xlab = "成分A", # x轴标签
ylab = "成分B", # y轴标签
alab = "A", # 顶点A标签
blab = "B", # 顶点B标签
clab = "C", # 顶点C标签
grid.lines = 10, # 网格线数量
grid.col = "lightgray", # 网格线颜色
grid.minor.lines = 5 # 次网格线数量
)
# 添加点
TernaryPoints(all_compositions,
col = c("blue", "red")[as.factor(sample_ids)],
pch = 19,
cex = 1.2)
# 添加图例
legend("topright",
legend = c("随机样本", "特殊样本"),
col = c("blue", "red"),
pch = 19,
bg = "white")
# 4. 高级Ternary图(添加等高线和颜色映射)
# 创建新的绘图区域
TernaryPlot(
main = "性能等高线三元相图",
alab = "成分A",
blab = "成分B",
clab = "成分C",
grid.lines = 20,
grid.col = "gray90",
grid.minor.lines = 0
)
# 创建性能预测模型
# 使用回归模型预测整个空间的性能
library(mgcv) # 广义加性模型
gam_model <- gam(performance ~ s(A, B, C), data = composition_df)
# 创建网格预测
# 生成三元网格点
ternary_grid <- createTernaryGrid(seq(0, 1, length.out = 30))
grid_points <- data.frame(A = ternary_grid$X,
B = ternary_grid$Y,
C = ternary_grid$Z)
grid_points <- grid_points[rowSums(grid_points) == 1, ]
# 预测性能值
grid_points$pred_performance <- predict(gam_model, newdata = grid_points)
# 绘制性能等高线
AddToTernary(contour,
TernaryX = grid_points$A,
TernaryY = grid_points$B,
TernaryZ = grid_points$pred_performance,
levels = seq(min(grid_points$pred_performance),
max(grid_points$pred_performance),
length.out = 10),
col = "red")
# 添加原始数据点(根据性能着色)
AddToTernary(points,
composition_df$A, composition_df$B, composition_df$C,
col = viridis(100)[cut(composition_df$performance, 100)],
pch = 16,
cex = 1)
# 添加颜色条
ColorLegend(viridis(10),
seq(min(composition_df$performance),
max(composition_df$performance),
length.out = 11),
title = "性能")
# 5. 使用ggtern包(更美观的ggplot2风格)
library(ggtern)
# 基础三元相图(ggtern)
p_tern_base <- ggtern(data = composition_df, aes(x = A, y = B, z = C)) +
# 添加点
geom_point(aes(color = type, size = performance), alpha = 0.7) +
# 颜色方案
scale_color_manual(values = c("随机" = "blue", "特殊" = "red")) +
# 大小范围
scale_size_continuous(range = c(2, 6)) +
# 标签和主题
labs(title = "三元相图(ggtern)",
color = "样本类型",
size = "性能值") +
theme_tern_bw() +
theme_showarrows() # 显示箭头
print(p_tern_base)
# 6. 高级ggtern图形
p_tern_advanced <- ggtern(data = composition_df, aes(x = A, y = B, z = C)) +
# 添加密度等高线
stat_density_tern(
geom = 'polygon',
aes(fill = after_stat(level)),
bins = 10,
alpha = 0.5,
color = "white"
) +
# 添加原始点
geom_point(aes(color = performance), size = 3, alpha = 0.8) +
# 颜色方案
scale_fill_viridis_c(name = "密度", option = "plasma") +
scale_color_viridis_c(name = "性能", option = "magma") +
# 添加均值点
geom_point(data = data.frame(A = mean(composition_df$A),
B = mean(composition_df$B),
C = mean(composition_df$C)),
aes(x = A, y = B, z = C),
color = "red", size = 5, shape = 8, stroke = 2) +
# 标签
labs(title = "高级三元相图",
subtitle = "填充:数据密度 | 颜色:性能值 | 星号:均值点") +
theme_tern_bw() +
theme(
tern.axis.title = element_text(size = 12, face = "bold"),
plot.title = element_text(hjust = 0.5, size = 14, face = "bold")
)
print(p_tern_advanced)
# 7. 带置信区域的三元相图
# 计算各组数据的置信椭圆
library(car) # 用于数据椭圆
# 分别计算随机样本和特殊样本的均值和协方差
random_data <- composition_df[composition_df$type == "随机", 1:3]
special_data <- composition_df[composition_df$type == "特殊", 1:3]
# 转换为 ilr 坐标(等距对数比转换)
ilr_transform <- function(comp) {
# 使用中心化对数比变换
log_comp <- log(comp)
clr <- log_comp - rowMeans(log_comp)
return(clr[, 1:2])
}
# 创建三元相图
p_tern_confint <- ggtern(data = composition_df, aes(x = A, y = B, z = C)) +
# 基础点
geom_point(aes(color = type), alpha = 0.6, size = 2) +
# 添加置信椭圆(需要手动计算)
stat_ellipse_tern(aes(color = type), level = 0.95, type = "norm") +
# 颜色
scale_color_manual(values = c("随机" = "blue", "特殊" = "red")) +
# 标签
labs(title = "带95%置信区域的三元相图",
subtitle = "椭圆表示各组数据的95%置信区域",
color = "样本类型") +
theme_tern_bw() +
theme_showarrows()
print(p_tern_confint)
# 8. 成分变化轨迹图
# 创建时间序列成分数据
time_points <- 1:20
composition_trajectory <- data.frame(
time = time_points,
A = 0.3 + 0.2 * sin(time_points * pi / 10),
B = 0.3 + 0.1 * cos(time_points * pi / 8),
C = 0.4 + 0.15 * sin(time_points * pi / 12)
)
# 确保和为1
composition_trajectory$C <- 1 - composition_trajectory$A - composition_trajectory$B
# 绘制轨迹图
p_trajectory <- ggtern(data = composition_trajectory, aes(x = A, y = B, z = C)) +
# 轨迹线
geom_path(aes(color = time), size = 1.5, alpha = 0.8) +
# 起点和终点标记
geom_point(data = composition_trajectory[c(1, nrow(composition_trajectory)), ],
aes(color = time), size = 4, shape = c(24, 25), stroke = 1.5) +
# 添加箭头指示方向
geom_segment_tern(data = composition_trajectory[1:(nrow(composition_trajectory)-1), ],
aes(xend = A, yend = B, zend = C,
x = A, y = B, z = C,
color = time),
arrow = arrow(length = unit(0.02, "npc")),
alpha = 0.5) +
# 颜色方案
scale_color_gradient(low = "blue", high = "red", name = "时间") +
# 标签
labs(title = "成分变化轨迹图",
subtitle = "箭头表示时间方向 | 三角形:起点 | 倒三角:终点") +
theme_tern_bw()
print(p_trajectory)
九、瀑布图
知识点概述
瀑布图用于展示数值的累积变化过程,常用于财务分析(如收入到净利润的变化)。
核心语法
- 基础条形图模拟
geom_rect():绘制矩形块geom_segment():连接线段waterfall包:专用瀑布图包
完整案例代码
r
# 1. 准备数据
# 财务分析示例:收入到净利润的变化过程
financial_data <- data.frame(
category = c("总收入", "销售成本", "毛利润",
"运营费用", "研发费用", "营销费用",
"营业利润", "税费", "净利润"),
amount = c(1000, -350, NA, -150, -100, -80, NA, -70, NA),
start = c(0, 1000, 650, 500, 350, 250, 170, 170, 100),
end = c(1000, 650, 500, 350, 250, 170, 170, 100, 100),
type = c("起始", "减少", "小计", "减少", "减少", "减少", "小计", "减少", "总计")
)
# 自动计算瀑布图数据
calculate_waterfall <- function(values, labels) {
n <- length(values)
start <- rep(0, n)
end <- rep(0, n)
start[1] <- 0
end[1] <- values[1]
for (i in 2:n) {
start[i] <- end[i-1]
end[i] <- start[i] + values[i]
}
return(data.frame(
category = labels,
amount = values,
start = start,
end = end,
type = c("start", rep("change", n-2), "end")
))
}
# 示例:销售转化瀑布图
conversion_values <- c(10000, -2000, -1500, -800, -500, -200, 5000)
conversion_labels <- c("访问量", "跳出", "未注册", "注册失败", "放弃支付", "退款", "成功订单")
waterfall_df <- calculate_waterfall(conversion_values, conversion_labels)
# 2. 使用ggplot2创建基础瀑布图
library(ggplot2)
p_waterfall_base <- ggplot(waterfall_df, aes(x = category, fill = type)) +
# 绘制矩形条
geom_rect(aes(xmin = as.numeric(factor(category)) - 0.4,
xmax = as.numeric(factor(category)) + 0.4,
ymin = start,
ymax = end)) +
# 添加连接线段
geom_segment(aes(x = as.numeric(factor(category)),
xend = as.numeric(factor(category)) + 1,
y = end,
yend = start),
data = waterfall_df[-nrow(waterfall_df), ],
color = "gray50",
size = 0.8,
linetype = "dashed") +
# 添加数值标签
geom_text(aes(x = category,
y = end,
label = paste0(ifelse(amount >= 0, "+", ""), amount),
vjust = ifelse(amount >= 0, -0.5, 1.5)),
size = 3.5) +
# 添加起始和结束值标签
geom_text(aes(x = category,
y = end,
label = end,
vjust = ifelse(amount >= 0, 1.5, -0.5)),
size = 3, color = "blue") +
# 颜色方案
scale_fill_manual(values = c("start" = "green",
"change" = "steelblue",
"end" = "darkgreen")) +
# 标签和主题
labs(title = "销售转化瀑布图",
subtitle = "从访问量到成功订单的转化过程",
x = "转化阶段",
y = "数量",
fill = "类型") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom")
print(p_waterfall_base)
# 3. 美化瀑布图
# 创建更复杂的财务瀑布图
financial_waterfall <- data.frame(
category = c("营业收入", "营业成本", "毛利",
"销售费用", "管理费用", "财务费用",
"营业利润", "投资收益", "营业外收支",
"利润总额", "所得税", "净利润"),
amount = c(5000, -2800, NA, -500, -300, -100,
NA, 200, 50, NA, -400, NA),
stringsAsFactors = FALSE
)
# 计算累计值
financial_waterfall$cumulative <- cumsum(ifelse(is.na(financial_waterfall$amount),
0, financial_waterfall$amount))
financial_waterfall$start <- c(0, head(financial_waterfall$cumulative, -1))
financial_waterfall$end <- financial_waterfall$cumulative
# 识别增减和总计
financial_waterfall$type <- ifelse(financial_waterfall$amount > 0, "增加",
ifelse(financial_waterfall$amount < 0, "减少", "总计"))
financial_waterfall$type[is.na(financial_waterfall$amount)] <- "小计"
# 美化瀑布图
p_financial_waterfall <- ggplot(financial_waterfall,
aes(x = reorder(category, -cumulative),
fill = type)) +
# 绘制条形
geom_col(aes(y = amount),
data = financial_waterfall[!is.na(financial_waterfall$amount), ],
width = 0.7,
alpha = 0.8) +
# 添加连接线
geom_segment(aes(x = as.numeric(factor(category)) + 0.5,
xend = as.numeric(factor(category)) + 1.5,
y = end,
yend = start),
data = financial_waterfall[-nrow(financial_waterfall), ],
color = "gray40",
size = 1,
alpha = 0.6) +
# 添加数值标签
geom_text(aes(y = end,
label = paste0(ifelse(amount > 0, "+", ""), amount),
vjust = ifelse(amount > 0, -0.5, 1.5)),
data = financial_waterfall[!is.na(financial_waterfall$amount), ],
size = 4,
fontface = "bold") +
# 添加累计值标签
geom_text(aes(y = end,
label = paste0("¥", round(end)),
vjust = ifelse(amount > 0, 1.5, -0.5)),
data = financial_waterfall,
size = 3.5,
color = "darkblue") +
# 颜色方案
scale_fill_manual(values = c("增加" = "#2ca02c", # 绿色
"减少" = "#d62728", # 红色
"小计" = "#ff7f0e", # 橙色
"总计" = "#1f77b4")) + # 蓝色
# 坐标轴转换
scale_y_continuous(labels = scales::dollar_format(prefix = "¥")) +
# 标签和主题
labs(title = "公司财务损益瀑布图",
subtitle = "从营业收入到净利润的变化过程(单位:万元)",
x = "财务项目",
y = "金额",
fill = "变化类型",
caption = "绿色:增加项 | 红色:减少项 | 橙色:小计 | 蓝色:总计") +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5, color = "gray50"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
legend.position = "bottom",
panel.grid.major.x = element_blank()
)
print(p_financial_waterfall)
# 4. 使用专用包创建瀑布图
# install.packages("waterfalls")
library(waterfalls)
# 准备数据
profits <- c(5000, -2800, -500, -300, -100, 200, 50, -400)
labels_waterfall <- c("营业收入", "营业成本", "销售费用",
"管理费用", "财务费用", "投资收益",
"营业外收支", "所得税")
# 创建瀑布图
p_waterfall_pkg <- waterfall(values = profits,
labels = labels_waterfall,
calc_total = TRUE,
total_axis_text = "净利润",
fill_colours = c("#2ca02c", rep("#d62728", 7)),
rect_text_size = 3.5,
rect_width = 0.7,
draw_lines = TRUE,
lines_color = "gray50") +
labs(title = "瀑布图(专用包)",
subtitle = "使用waterfalls包创建",
y = "金额(万元)") +
theme_minimal()
print(p_waterfall_pkg)
# 5. 水平瀑布图
# 转置数据用于水平显示
p_horizontal_waterfall <- ggplot(financial_waterfall,
aes(x = reorder(category, -cumulative),
y = amount, fill = type)) +
# 水平条形
geom_col(width = 0.6, alpha = 0.8) +
# 坐标轴翻转
coord_flip() +
# 添加标签
geom_text(aes(label = ifelse(amount > 0, paste0("+", amount), amount),
hjust = ifelse(amount > 0, -0.2, 1.2)),
size = 3.5) +
# 颜色
scale_fill_manual(values = c("增加" = "#2ca02c",
"减少" = "#d62728",
"小计" = "#ff7f0e",
"总计" = "#1f77b4")) +
# 标签和主题
labs(title = "水平瀑布图",
x = "财务项目",
y = "金额(万元)",
fill = "变化类型") +
theme_bw() +
theme(legend.position = "bottom")
print(p_horizontal_waterfall)
# 6. 分组瀑布图(比较不同时期)
# 创建两年对比数据
year1_data <- c(5000, -2800, -500, -300, -100, 200, 50, -400)
year2_data <- c(5500, -3000, -550, -320, -120, 250, 60, -450)
categories <- c("营业收入", "营业成本", "销售费用", "管理费用",
"财务费用", "投资收益", "营业外收支", "所得税")
# 创建数据框
comparison_df <- data.frame(
category = rep(categories, 2),
year = rep(c("2022年", "2023年"), each = length(categories)),
amount = c(year1_data, year2_data)
)
# 计算累计值
library(dplyr)
comparison_df <- comparison_df %>%
group_by(year) %>%
mutate(cumulative = cumsum(amount),
start = lag(cumulative, default = 0),
end = cumulative,
type = ifelse(amount > 0, "增加", "减少"))
# 分组瀑布图
p_comparison_waterfall <- ggplot(comparison_df,
aes(x = category, fill = type)) +
# 分面显示不同年份
facet_wrap(~year, ncol = 1) +
# 绘制条形
geom_col(aes(y = amount), width = 0.7, alpha = 0.8) +
# 添加连接线
geom_segment(aes(x = as.numeric(factor(category)) + 0.5,
xend = as.numeric(factor(category)) + 1.5,
y = end,
yend = start),
color = "gray40",
size = 0.8,
alpha = 0.6) +
# 数值标签
geom_text(aes(y = end,
label = ifelse(amount > 0, paste0("+", amount), amount),
vjust = ifelse(amount > 0, -0.5, 1.5)),
size = 3) +
# 颜色
scale_fill_manual(values = c("增加" = "#2ca02c", "减少" = "#d62728")) +
# 标签和主题
labs(title = "年度财务对比瀑布图",
subtitle = "2022年 vs 2023年",
x = "财务项目",
y = "金额(万元)",
fill = "变化类型") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
strip.text = element_text(size = 12, face = "bold"))
print(p_comparison_waterfall)
十、火山图
知识点概述
火山图用于展示差异表达分析结果,同时显示差异倍数(Fold Change)和统计显著性(p值)。
核心语法
ggplot2基础图形geom_point():散点图层geom_hline()/geom_vline():阈值线- 颜色标记显著差异基因
完整案例代码
r
# 1. 准备数据(模拟基因表达差异分析结果)
set.seed(555)
n_genes <- 1000
# 生成模拟数据
volcano_data <- data.frame(
gene = paste0("Gene_", 1:n_genes),
log2FoldChange = rnorm(n_genes, mean = 0, sd = 1.5), # log2差异倍数
pvalue = runif(n_genes, min = 0, max = 1) # p值
)
# 添加一些显著差异的基因
n_sig_up <- 50 # 显著上调基因数
n_sig_down <- 50 # 显著下调基因数
volcano_data$log2FoldChange[1:n_sig_up] <- runif(n_sig_up, min = 2, max = 5)
volcano_data$pvalue[1:n_sig_up] <- runif(n_sig_up, min = 0, max = 0.01)
volcano_data$log2FoldChange[(n_sig_up+1):(n_sig_up+n_sig_down)] <- runif(n_sig_down, min = -5, max = -2)
volcano_data$pvalue[(n_sig_up+1):(n_sig_up+n_sig_down)] <- runif(n_sig_down, min = 0, max = 0.01)
# 计算 -log10(pvalue)
volcano_data$neg_log10_pvalue <- -log10(volcano_data$pvalue)
# 定义显著性阈值
pvalue_threshold <- 0.05
log2FC_threshold <- 1
# 添加分组标签
volcano_data$regulation <- "Not Significant"
volcano_data$regulation[volcano_data$pvalue < pvalue_threshold &
volcano_data$log2FoldChange > log2FC_threshold] <- "Up-regulated"
volcano_data$regulation[volcano_data$pvalue < pvalue_threshold &
volcano_data$log2FoldChange < -log2FC_threshold] <- "Down-regulated"
# 2. 基础火山图
library(ggplot2)
p_volcano_base <- ggplot(volcano_data,
aes(x = log2FoldChange,
y = neg_log10_pvalue,
color = regulation)) +
# 添加散点
geom_point(alpha = 0.7, size = 1.5) +
# 设置颜色
scale_color_manual(values = c("Up-regulated" = "red",
"Down-regulated" = "blue",
"Not Significant" = "gray50")) +
# 添加阈值线
geom_vline(xintercept = c(-log2FC_threshold, log2FC_threshold),
linetype = "dashed", color = "black", alpha = 0.5) +
geom_hline(yintercept = -log10(pvalue_threshold),
linetype = "dashed", color = "black", alpha = 0.5) +
# 标签和主题
labs(title = "火山图:差异表达基因分析",
subtitle = paste0("阈值: |log2FC| > ", log2FC_threshold,
", p-value < ", pvalue_threshold),
x = "log2(差异倍数)",
y = "-log10(p值)",
color = "调控类型") +
theme_minimal() +
theme(legend.position = "bottom")
print(p_volcano_base)
# 3. 美化火山图
# 添加基因标签(仅标注最显著的基因)
# 计算显著得分
volcano_data$significance_score <- volcano_data$neg_log10_pvalue * abs(volcano_data$log2FoldChange)
# 选择top显著基因
top_genes <- volcano_data[order(volcano_data$significance_score, decreasing = TRUE), ][1:20, ]
p_volcano_beautiful <- ggplot(volcano_data,
aes(x = log2FoldChange,
y = neg_log10_pvalue,
color = regulation,
size = significance_score)) +
# 散点
geom_point(alpha = 0.8) +
# 添加显著基因标签
geom_text_repel(data = top_genes,
aes(label = gene),
size = 3,
max.overlaps = 20,
box.padding = 0.5,
point.padding = 0.3,
segment.color = "gray50") +
# 颜色方案
scale_color_manual(values = c("Up-regulated" = "#e41a1c",
"Down-regulated" = "#377eb8",
"Not Significant" = "#999999")) +
# 大小范围
scale_size_continuous(range = c(1, 4), guide = "none") +
# 阈值线
geom_vline(xintercept = c(-log2FC_threshold, log2FC_threshold),
linetype = "dashed", color = "black", size = 0.8, alpha = 0.6) +
geom_hline(yintercept = -log10(pvalue_threshold),
linetype = "dashed", color = "black", size = 0.8, alpha = 0.6) +
# 添加区域标注
annotate("rect",
xmin = log2FC_threshold, xmax = Inf,
ymin = -log10(pvalue_threshold), ymax = Inf,
fill = "red", alpha = 0.1) +
annotate("rect",
xmin = -Inf, xmax = -log2FC_threshold,
ymin = -log10(pvalue_threshold), ymax = Inf,
fill = "blue", alpha = 0.1) +
# 统计信息
annotate("text",
x = max(volcano_data$log2FoldChange) - 1,
y = max(volcano_data$neg_log10_pvalue) * 0.95,
label = paste("上调:", sum(volcano_data$regulation == "Up-regulated")),
color = "red", size = 4, fontface = "bold") +
annotate("text",
x = min(volcano_data$log2FoldChange) + 1,
y = max(volcano_data$neg_log10_pvalue) * 0.95,
label = paste("下调:", sum(volcano_data$regulation == "Down-regulated")),
color = "blue", size = 4, fontface = "bold") +
# 标签和主题
labs(title = "差异表达基因火山图",
subtitle = paste0("阈值: |log2FC| > ", log2FC_threshold,
", p < ", pvalue_threshold),
x = expression(log[2]("差异倍数")),
y = expression(-log[10]("p值")),
color = "表达调控",
caption = paste("总计显著差异基因:",
sum(volcano_data$regulation != "Not Significant"))) +
theme_classic() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5, color = "gray50"),
legend.position = "bottom",
legend.title = element_text(size = 10, face = "bold"),
panel.grid.major = element_line(color = "gray90", size = 0.3),
panel.grid.minor = element_blank()
)
print(p_volcano_beautiful)
# 4. 增强型火山图(添加更多统计信息)
# 添加更多列:均值表达量、AUC等
volcano_data$baseMean <- exp(rnorm(n_genes, mean = 5, sd = 2)) # 模拟均值表达量
volcano_data$AUC <- runif(n_genes, min = 0.5, max = 1) # 模拟AUC值
# 创建增强火山图
p_volcano_enhanced <- ggplot(volcano_data,
aes(x = log2FoldChange,
y = neg_log10_pvalue,
color = regulation,
size = baseMean,
shape = ifelse(AUC > 0.8, "high_auc", "low_auc"))) +
geom_point(alpha = 0.7) +
# 颜色
scale_color_manual(values = c("Up-regulated" = "#d73027",
"Down-regulated" = "#4575b4",
"Not Significant" = "#fee090")) +
# 大小和形状
scale_size_continuous(name = "平均表达量",
range = c(1, 6),
trans = "log10") +
scale_shape_manual(values = c("high_auc" = 17, "low_auc" = 16),
labels = c("high_auc" = "高AUC (>0.8)", "low_auc" = "低AUC (≤0.8)"),
name = "AUC分类") +
# 阈值线
geom_vline(xintercept = c(-log2FC_threshold, log2FC_threshold),
linetype = "longdash", color = "gray30", size = 0.5) +
geom_hline(yintercept = -log10(pvalue_threshold),
linetype = "longdash", color = "gray30", size = 0.5) +
# 添加密度分布
geom_density2d(aes(color = NULL), alpha = 0.3, size = 0.3, color = "gray50") +
# 标签和主题
labs(title = "增强型火山图",
subtitle = "大小=平均表达量 | 形状=AUC值",
x = expression(log[2]("差异倍数")),
y = expression(-log[10]("p值"))) +
theme_bw() +
theme(legend.position = "right",
legend.box = "vertical",
panel.grid.minor = element_blank())
print(p_volcano_enhanced)
# 5. 交互式火山图(plotly)
library(plotly)
# 准备交互式数据
volcano_data$hover_text <- with(volcano_data,
paste(gene,
"<br>log2FC:", round(log2FoldChange, 3),
"<br>p-value:", format(pvalue, scientific = TRUE, digits = 3),
"<br>-log10(p):", round(neg_log10_pvalue, 3),
"<br>调控:", regulation))
# 创建交互式火山图
p_interactive_volcano <- plot_ly(
data = volcano_data,
x = ~log2FoldChange,
y = ~neg_log10_pvalue,
color = ~regulation,
colors = c("Up-regulated" = "#e41a1c",
"Down-regulated" = "#377eb8",
"Not Significant" = "#999999"),
text = ~hover_text,
hoverinfo = "text",
type = "scatter",
mode = "markers",
marker = list(size = 8, opacity = 0.7)
) %>%
layout(
title = "交互式火山图",
xaxis = list(title = "log2(差异倍数)",
zeroline = TRUE,
zerolinecolor = "gray",
zerolinewidth = 1),
yaxis = list(title = "-log10(p值)",
zeroline = TRUE),
shapes = list(
list(type = "line",
x0 = -log2FC_threshold, x1 = -log2FC_threshold,
y0 = 0, y1 = max(volcano_data$neg_log10_pvalue),
line = list(dash = "dash", color = "gray", width = 1)),
list(type = "line",
x0 = log2FC_threshold, x1 = log2FC_threshold,
y0 = 0, y1 = max(volcano_data$neg_log10_pvalue),
line = list(dash = "dash", color = "gray", width = 1)),
list(type = "line",
x0 = min(volcano_data$log2FoldChange),
x1 = max(volcano_data$log2FoldChange),
y0 = -log10(pvalue_threshold),
y1 = -log10(pvalue_threshold),
line = list(dash = "dash", color = "gray", width = 1))
),
legend = list(title = list(text = "表达调控"),
orientation = "h",
yanchor = "bottom",
y = 1.02,
xanchor = "center",
x = 0.5)
)
# 显示交互式图形
p_interactive_volcano
# 6. 分组火山图(比较不同条件)
# 创建两个条件下的差异分析结果
set.seed(666)
volcano_group1 <- volcano_data[sample(1:n_genes, 500), ]
volcano_group2 <- volcano_data[sample(1:n_genes, 500), ]
# 添加组别标签
volcano_group1$group <- "治疗组 vs 对照组"
volcano_group2$group <- "疾病组 vs 健康组"
# 合并数据
volcano_combined <- rbind(volcano_group1, volcano_group2)
# 重新计算调控状态(使用相同的阈值)
volcano_combined$regulation <- "Not Significant"
volcano_combined$regulation[volcano_combined$pvalue < pvalue_threshold &
volcano_combined$log2FoldChange > log2FC_threshold] <- "Up-regulated"
volcano_combined$regulation[volcano_combined$pvalue < pvalue_threshold &
volcano_combined$log2FoldChange < -log2FC_threshold] <- "Down-regulated"
# 分组火山图
p_volcano_facet <- ggplot(volcano_combined,
aes(x = log2FoldChange,
y = neg_log10_pvalue,
color = regulation)) +
# 分面
facet_wrap(~group, ncol = 1, scales = "free") +
# 散点
geom_point(alpha = 0.6, size = 1.2) +
# 颜色
scale_color_manual(values = c("Up-regulated" = "#d73027",
"Down-regulated" = "#4575b4",
"Not Significant" = "#bababa")) +
# 阈值线
geom_vline(xintercept = c(-log2FC_threshold, log2FC_threshold),
linetype = "dashed", color = "black", alpha = 0.5) +
geom_hline(yintercept = -log10(pvalue_threshold),
linetype = "dashed", color = "black", alpha = 0.5) +
# 标签
labs(title = "多组比较火山图",
subtitle = "不同实验条件下的差异表达分析",
x = expression(log[2]("差异倍数")),
y = expression(-log[10]("p值")),
color = "表达调控") +
theme_bw() +
theme(strip.background = element_rect(fill = "lightblue"),
strip.text = element_text(size = 11, face = "bold"),
legend.position = "bottom")
print(p_volcano_facet)
# 7. 火山图与热图结合
# 提取显著差异基因的表达矩阵
sig_genes <- volcano_data[volcano_data$regulation != "Not Significant", ]
sig_genes <- sig_genes[order(sig_genes$log2FoldChange, decreasing = TRUE), ]
top_genes_list <- head(sig_genes$gene, 30)
# 模拟表达矩阵(30个基因 × 10个样本)
expression_matrix <- matrix(rnorm(30 * 10, mean = 0, sd = 1),
nrow = 30, ncol = 10)
colnames(expression_matrix) <- paste0("Sample", 1:10)
rownames(expression_matrix) <- top_genes_list
# 标准化表达矩阵
expression_matrix_scaled <- t(scale(t(expression_matrix)))
# 绘制热图
library(pheatmap)
annotation_row <- data.frame(
Regulation = sig_genes$regulation[match(top_genes_list, sig_genes$gene)]
)
rownames(annotation_row) <- top_genes_list
# 保存热图
pheatmap(expression_matrix_scaled,
main = "显著差异基因表达热图",
annotation_row = annotation_row,
annotation_colors = list(Regulation = c("Up-regulated" = "red",
"Down-regulated" = "blue")),
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = TRUE,
show_colnames = TRUE,
fontsize_row = 8,
filename = "volcano_heatmap.png")
本章小结
核心知识点总结
- 散点图:最基础的数值关系可视化,适用于展示两个连续变量的相关关系
- 三维散点图:扩展至三个维度,可从立体角度观察数据分布
- 线性拟合与置信区间:量化变量间关系强度,展示预测不确定性
- 带标定区域的散点图:突出显示特定数据子集或区域
- viridis包:提供色觉友好、感知均匀的颜色方案
- 气泡图:通过点大小展示第三个变量,实现多维度数据展示
- 等高线图:展示三维数据的二维投影,适合密度和地形数据
- 三元相图:专门用于成分数据分析(和为常数)
- 瀑布图:展示数值的累积变化过程,财务分析常用
- 火山图:生物信息学专用,同时展示差异倍数和显著性
最佳实践建议
- 根据数据类型选择合适的图形
- 注意颜色方案的可访问性(推荐viridis)
- 合理使用透明度避免过度绘制
- 添加必要的标签、图例和统计信息
- 考虑交互式可视化提升探索性分析