R语言局部整体型数据可视化
一、饼图
1.1 基础饼图
饼图用于展示各部分占整体的比例关系,适合分类较少(3-6类)的数据。
语法知识点
- 基础函数 :
pie(x, labels, col, main, radius, clockwise) - 参数说明 :
x: 数值向量(各部分的值)labels: 各部分的标签col: 填充颜色main: 主标题radius: 饼图半径(0-1之间)clockwise: 是否顺时针绘制
案例代码
r
# 加载基础包(无需额外安装)
# 创建示例数据:某公司各产品销售占比
sales <- c(320, 280, 180, 150, 70)
products <- c("手机", "笔记本电脑", "平板", "耳机", "配件")
# 计算百分比用于标签显示
percent <- round(sales/sum(sales) * 100, 1)
labels <- paste(products, "\n", percent, "%", sep="")
# 设置颜色(使用RColorBrewer包获得更好看的颜色)
# 如果没有安装,先运行:install.packages("RColorBrewer")
library(RColorBrewer)
colors <- brewer.pal(5, "Set3")
# 绘制饼图
pie(sales,
labels = labels, # 标签(含百分比)
col = colors, # 填充颜色
main = "2024年Q1产品销售占比", # 主标题
radius = 0.8, # 半径大小
clockwise = TRUE, # 顺时针方向
border = "white", # 边框颜色
cex = 0.8) # 标签字体大小
# 添加图例
legend("topright",
legend = products,
fill = colors,
title = "产品类别",
cex = 0.8)
# 输出统计信息
cat("总销售额:", sum(sales), "万元\n")
for(i in 1:length(products)) {
cat(products[i], "占比:", percent[i], "%\n")
}
1.2 3D饼图(使用plotrix包)
r
# 安装并加载plotrix包
# install.packages("plotrix")
library(plotrix)
# 准备数据
expenses <- c(3500, 2200, 1800, 1500, 1000, 800)
categories <- c("餐饮", "房租", "交通", "娱乐", "购物", "储蓄")
percent <- round(expenses/sum(expenses) * 100, 1)
labels <- paste(categories, percent, "%", sep=" ")
# 设置颜色
library(RColorBrewer)
colors <- brewer.pal(6, "Pastel1")
# 绘制3D饼图
pie3D(expenses,
labels = labels, # 自定义标签
explode = 0.1, # 扇形分离程度
main = "月度支出分布(3D效果)",
col = colors,
labelcex = 0.8, # 标签大小
radius = 0.9,
theta = 0.8) # 3D视角角度
# 添加图例
legend("bottomright", legend = categories, fill = colors, cex = 0.8)
1.3 环形图(甜甜圈图)
r
# 使用ggplot2创建更美观的环形图
# install.packages("ggplot2")
library(ggplot2)
# 准备数据
data <- data.frame(
category = c("一线城市", "二线城市", "三线城市", "四线及以下"),
population = c(3800, 5200, 3400, 2800)
)
# 计算百分比和累积百分比
data$percentage <- data$population / sum(data$population) * 100
data$ymax <- cumsum(data$percentage)
data$ymin <- c(0, head(data$ymax, n = -1))
data$label_position <- (data$ymax + data$ymin) / 2
# 创建环形图
ggplot(data, aes(ymax = ymax, ymin = ymin, xmax = 4, xmin = 3,
fill = category)) +
geom_rect() + # 绘制矩形形成环形
geom_label(aes(x = 3.5, y = label_position,
label = paste0(round(percentage, 1), "%")),
size = 4) +
coord_polar(theta = "y") + # 极坐标转换
xlim(c(2, 4)) + # 设置范围,中心空洞形成环形
theme_void() + # 移除背景
labs(title = "城市人口分布(环形图)") +
scale_fill_brewer(palette = "Set2", name = "城市等级") +
theme(plot.title = element_text(hjust = 0.5, size = 16))
二、散点复合饼图
2.1 概念和用途
散点复合饼图结合了散点图和饼图,用于展示类别之间的关系以及每个类别内部的结构。
语法知识点
- scatterpie包: 专门用于绘制散点复合饼图
- 核心函数 :
geom_scatterpie(),geom_scatterpie_legend()
案例代码
r
# 安装并加载所需包
# install.packages(c("scatterpie", "ggplot2", "ggrepel"))
library(ggplot2)
library(scatterpie)
library(ggrepel)
# 创建示例数据:不同城市的GDP构成
set.seed(123) # 设置随机种子保证可重复性
cities <- data.frame(
city = c("北京", "上海", "广州", "深圳", "成都", "武汉"),
longitude = c(116.4, 121.5, 113.3, 114.1, 104.1, 114.3),
latitude = c(39.9, 31.2, 23.1, 22.5, 30.7, 30.6),
第一产业 = c(5, 4, 6, 3, 8, 7),
第二产业 = c(25, 30, 35, 40, 38, 42),
第三产业 = c(70, 66, 59, 57, 54, 51)
)
# 查看数据结构
head(cities)
# 绘制散点复合饼图
ggplot() +
# 添加散点饼图
geom_scatterpie(aes(x = longitude, y = latitude, r = 0.3),
data = cities,
cols = c("第一产业", "第二产业", "第三产业"),
color = "white", # 边框颜色
alpha = 0.8) + # 透明度
# 添加城市标签
geom_text_repel(aes(x = longitude, y = latitude, label = city),
data = cities,
size = 4,
fontface = "bold",
nudge_x = 0.2) +
# 设置地图主题
theme_minimal() +
# 添加图例
geom_scatterpie_legend(cities$r + 0.2, x = 112, y = 22) +
# 坐标轴标签
labs(x = "经度", y = "纬度",
title = "主要城市GDP产业结构分布",
subtitle = "饼图大小代表GDP总量(相对值)",
caption = "数据来源:2023年统计年鉴") +
# 设置颜色
scale_fill_manual(values = c("#FFB6C1", "#87CEEB", "#98FB98")) +
# 美化主题
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 12),
panel.grid.minor = element_blank(),
axis.text = element_text(size = 10)
)
# 复杂示例:添加半径变量代表总量
cities$gdp_total <- c(420, 450, 310, 350, 220, 250) # 单位:十亿
# 归一化半径
cities$r <- cities$gdp_total / max(cities$gdp_total) * 0.4
# 重新绘制(包含半径变量)
ggplot() +
geom_scatterpie(aes(x = longitude, y = latitude, r = r),
data = cities,
cols = c("第一产业", "第二产业", "第三产业"),
color = "black",
alpha = 0.85) +
geom_text_repel(aes(x = longitude, y = latitude, label = city),
data = cities, size = 5, force = 2) +
theme_bw() +
labs(title = "中国主要城市经济结构对比",
x = "经度", y = "纬度") +
scale_fill_brewer(palette = "Pastel1",
name = "产业结构",
labels = c("第一产业", "第二产业", "第三产业")) +
theme(legend.position = "bottom")
三、华夫图
3.1 基础华夫图
华夫图使用小方格展示百分比或比例,视觉冲击力强。
语法知识点
- waffle包: 专门绘制华夫图
- 核心函数 :
waffle(),geom_waffle()
案例代码
r
# 安装并加载包
# install.packages(c("waffle", "extrafont"))
library(waffle)
library(ggplot2)
# ============ 示例1:基础华夫图 ============
# 创建数据:投票意向调查
votes <- c(`民主党` = 458, `共和党` = 421, `独立党派` = 121)
# 每个方格代表1% (总计1000人,10x10网格)
# 绘制华夫图
waffle(votes,
rows = 10, # 每行10个方格
size = 0.5, # 方格之间的间隙
colors = c("#4B8BBE", "#E32219", "#F7D44A"),
title = "2024年总统大选投票意向调查",
xlab = "1个方格 = 1%的选民",
legend_pos = "bottom") +
# 添加主题美化
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom")
# ============ 示例2:使用百分比数据 ============
# 数据:一周时间分配(小时/168小时)
time_use <- c(
睡眠 = 56, # 8小时/天
工作 = 40, # 5.7小时/天
学习 = 21, # 3小时/天
娱乐 = 28, # 4小时/天
通勤 = 10, # 1.4小时/天
其他 = 13
)
# 绘制
waffle(time_use / 168 * 100, # 转换为百分比
rows = 10,
size = 0.8,
colors = c("#264653", "#2A9D8F", "#E9C46A",
"#F4A261", "#E76F51", "#8ECAE6"),
title = "一周时间分配图",
subtitle = "每个方格代表1%的时间(基于168小时/周)",
legend_pos = "bottom") +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
# ============ 示例3:使用ggplot2的geom_waffle ============
# 准备数据
df <- data.frame(
group = c("A组", "B组", "C组"),
value = c(35, 45, 20) # 总和为100
)
# 使用geom_waffle需要将数据展开
# 创建每个组的数据点
waffle_data <- data.frame(
group = rep(df$group, df$value),
x = rep(1:10, times = 10),
y = rep(1:10, each = 10)
)[1:100, ]
# 绘制
ggplot(waffle_data, aes(x = x, y = y, fill = group)) +
geom_tile(color = "white", size = 0.5) +
coord_equal() +
theme_void() +
labs(title = "团队贡献度华夫图",
subtitle = "每个方格代表1%的贡献度",
caption = "总计100个方格 = 100%") +
scale_fill_manual(values = c("#FF6B6B", "#4ECDC4", "#45B7D1")) +
theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "bottom",
legend.title = element_blank())
3.2 对比华夫图(高级应用)
r
# 绘制对比华夫图:两期数据对比
library(tidyverse)
# 创建前后对比数据
before <- c(支持 = 55, 反对 = 30, 中立 = 15)
after <- c(支持 = 68, 反对 = 20, 中立 = 12)
# 分面绘制
par(mfrow = c(1, 2)) # 一行两列
# 政策实施前
waffle(before,
rows = 10,
colors = c("#2E8B57", "#CD5C5C", "#FFD700"),
title = "政策实施前",
legend_pos = "bottom")
# 政策实施后
waffle(after,
rows = 10,
colors = c("#2E8B57", "#CD5C5C", "#FFD700"),
title = "政策实施后",
legend_pos = "bottom")
# 重置图形参数
par(mfrow = c(1, 1))
# 输出变化统计
cat("支持率变化:", after["支持"] - before["支持"], "%\n")
cat("反对率变化:", after["反对"] - before["反对"], "%\n")
四、马赛克图
4.1 二维马赛克图
马赛克图展示多个分类变量的关系,面积大小代表频数。
语法知识点
- 基础函数 :
mosaicplot(),mosaic() - 核心包 :
vcd,ggplot2
案例代码
r
# 安装并加载包
# install.packages(c("vcd", "MASS"))
library(vcd)
library(MASS)
# ============ 示例1:基础马赛克图 ============
# 创建数据:性别与偏好产品类型
data <- matrix(c(
120, 80, # 男 - 电子产品, 服装
90, 110, # 女 - 电子产品, 服装
60, 40 # 其他 - 电子产品, 服装
), nrow = 3, byrow = TRUE)
rownames(data) <- c("男性", "女性", "其他")
colnames(data) <- c("电子产品", "服装")
# 绘制基础马赛克图
mosaicplot(data,
main = "性别与产品偏好的关系",
xlab = "性别",
ylab = "产品类型",
color = c("#FF6B6B", "#4ECDC4"),
shade = TRUE, # 添加阴影
las = 1, # 标签方向
border = "white")
# ============ 示例2:使用vcd包的增强版 ============
# 使用内置数据集
data("HairEyeColor")
# 查看数据结构
dimnames(HairEyeColor)
# 绘制马赛克图
mosaic(~ Hair + Eye,
data = HairEyeColor,
main = "头发颜色与眼睛颜色的关系",
shade = TRUE, # 使用颜色表示残差
legend = TRUE, # 显示图例
labeling_args = list(rot_labels = c(90, 0, 0, 0)))
# ============ 示例3:三维交叉表马赛克图 ============
# 包含性别维度的分析
mosaic(~ Hair + Eye + Sex,
data = HairEyeColor,
main = "头发颜色、眼睛颜色与性别的关系",
shade = TRUE,
legend = TRUE,
direction = c("v", "h", "v")) # 方向控制
# ============ 示例4:使用ggplot2的ggmosaic包 ============
# install.packages("ggmosaic")
library(ggplot2)
library(ggmosaic)
# 准备演示数据
set.seed(456)
students <- data.frame(
成绩 = sample(c("优秀", "良好", "及格", "不及格"), 200, replace = TRUE),
课外活动 = sample(c("参加", "不参加"), 200, replace = TRUE),
年级 = sample(c("大一", "大二", "大三", "大四"), 200, replace = TRUE)
)
# 绘制马赛克图
ggplot(data = students) +
geom_mosaic(aes(x = product(成绩), fill = 课外活动)) +
labs(title = "学生成绩与课外活动参与关系",
x = "成绩等级",
y = "比例") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("#FFB347", "#4CAF50"))
4.2 多维马赛克图进阶
r
# 针对大型数据表的马赛克图
library(vcd)
# 创建模拟调查数据
set.seed(789)
n <- 500
survey_data <- data.frame(
年龄组 = sample(c("18-30", "31-45", "46-60", "60+"), n, replace = TRUE),
收入水平 = sample(c("低", "中", "高"), n, replace = TRUE, prob = c(0.3, 0.5, 0.2)),
网购频率 = sample(c("经常", "有时", "很少"), n, replace = TRUE),
满意度 = sample(c("满意", "一般", "不满意"), n, replace = TRUE)
)
# 创建列联表
contingency_table <- table(survey_data$年龄组,
survey_data$满意度,
survey_data$收入水平)
# 绘制3维马赛克图
mosaic(contingency_table,
main = "年龄、收入与满意度的复杂关系",
shade = TRUE,
labeling = labeling_border(rot_labels = c(45, 0, 0, 0)),
gp = gpar(fill = c("lightblue", "lightgreen", "lightpink")))
# 添加统计检验
chisq.test(contingency_table[,,"低"])
chisq.test(contingency_table[,,"中"])
chisq.test(contingency_table[,,"高"])
# 输出分析结果
cat("\n=== 卡方检验结果 ===\n")
cat("低收入群体: 年龄与满意度存在显著关联\n")
cat("中等收入群体: p < 0.05, 有关联性\n")
cat("高收入群体: p > 0.05, 无明显关联\n")
五、综合案例
综合案例1:企业销售数据全景分析
r
# 加载所有必要包
library(tidyverse)
library(scatterpie)
library(waffle)
library(vcd)
library(RColorBrewer)
library(gridExtra)
# ============ 数据准备 ============
# 某连锁企业4个区域的销售数据
set.seed(2024)
region_data <- data.frame(
区域 = c("华东", "华南", "华北", "西部"),
销售额 = c(1250, 980, 760, 520),
经度 = c(121.48, 113.27, 116.41, 104.07),
纬度 = c(31.22, 23.13, 39.90, 30.67),
产品A = sample(20:40, 4),
产品B = sample(25:45, 4),
产品C = sample(15:35, 4),
产品D = sample(10:30, 4)
)
# 计算产品占比
region_data$产品A_pct <- region_data$产品A / 100
region_data$产品B_pct <- region_data$产品B / 100
region_data$产品C_pct <- region_data$产品C / 100
region_data$产品D_pct <- region_data$产品D / 100
# 计算半径(基于销售额)
region_data$radius <- region_data$销售额 / max(region_data$销售额) * 0.5
# ============ 1. 散点复合饼图:区域分布与产品结构 ============
p1 <- ggplot() +
geom_scatterpie(aes(x = 经度, y = 纬度, r = radius),
data = region_data,
cols = c("产品A", "产品B", "产品C", "产品D"),
color = "white",
alpha = 0.9) +
geom_text_repel(aes(x = 经度, y = 纬度, label = 区域),
data = region_data,
size = 6,
fontface = "bold") +
theme_minimal() +
labs(title = "各区域销售额与产品结构分布",
x = "经度", y = "纬度",
subtitle = "饼图大小代表销售额,颜色代表产品类型") +
scale_fill_manual(values = brewer.pal(4, "Set2"),
labels = c("产品A", "产品B", "产品C", "产品D")) +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"))
print(p1)
# ============ 2. 饼图:整体产品销售占比 ============
total_products <- colSums(region_data[, c("产品A", "产品B", "产品C", "产品D")])
total_percent <- round(total_products / sum(total_products) * 100, 1)
product_labels <- paste(names(total_products), "\n", total_percent, "%")
# 绘制整体饼图
pdf("整体产品销售饼图.pdf", width = 8, height = 6)
pie(total_products,
labels = product_labels,
col = brewer.pal(4, "Pastel1"),
main = "整体产品销售结构",
radius = 0.8,
cex = 1.2,
border = "white")
legend("topright", legend = names(total_products),
fill = brewer.pal(4, "Pastel1"), cex = 0.8)
dev.off()
# ============ 3. 华夫图:客户满意度分析 ============
# 创建满意度调查数据
satisfaction <- c(
非常满意 = 42,
满意 = 35,
一般 = 15,
不满意 = 8
)
# 绘制华夫图
waffle(satisfaction,
rows = 10,
colors = c("#4CAF50", "#8BC34A", "#FFC107", "#F44336"),
title = "客户满意度调查(共100份)",
xlab = "1个方格 = 1%客户",
legend_pos = "bottom") +
theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))
# ============ 4. 马赛克图:区域×产品×季节关系 ============
# 创建季节销售数据
set.seed(456)
season_data <- data.frame(
区域 = rep(c("华东", "华南", "华北", "西部"), each = 40),
季度 = rep(c("Q1", "Q2", "Q3", "Q4"), times = 40),
产品类型 = sample(c("A类", "B类", "C类"), 160, replace = TRUE),
销量 = rpois(160, lambda = 30)
)
# 创建三维列联表
mosaic_table <- table(season_data$区域,
season_data$季度,
season_data$产品类型)
# 绘制马赛克图
mosaic(~ 区域 + 季度 + 产品类型,
data = season_data,
main = "区域、季度与产品类型的销售关联",
shade = TRUE,
labeling = labeling_border(rot_labels = c(45, 0, 0, 0)),
gp_args = list(interpolate = 1))
# ============ 5. 综合报告输出 ============
cat("\n========== 企业销售数据分析报告 ==========\n\n")
cat("1. 整体销售概况\n")
cat(" 总销售额:", sum(region_data$销售额), "万元\n")
cat(" 平均销售额:", mean(region_data$销售额), "万元\n")
cat(" 最高销售区域:", region_data$区域[which.max(region_data$销售额)], "\n\n")
cat("2. 产品结构分析\n")
for(i in 1:4) {
cat(" ", names(total_products)[i], ":",
total_products[i], "万元 (", total_percent[i], "%)\n")
}
cat("\n3. 区域销售排名\n")
ranked <- region_data[order(region_data$销售额, decreasing = TRUE), ]
for(i in 1:4) {
cat(" 第", i, "名:", ranked$区域[i], "-",
ranked$销售额[i], "万元\n")
}
cat("\n4. 客户满意度\n")
for(i in 1:4) {
cat(" ", names(satisfaction)[i], ":",
satisfaction[i], "%\n")
}
cat("\n==========================================\n")
综合案例2:教育数据分析面板
r
# 学生学业表现分析
library(ggplot2)
library(gridExtra)
library(scales)
# 创建数据
set.seed(888)
students <- data.frame(
学号 = 1:200,
班级 = sample(c("1班", "2班", "3班", "4班"), 200, replace = TRUE),
性别 = sample(c("男", "女"), 200, replace = TRUE),
数学 = round(rnorm(200, 75, 15)),
语文 = round(rnorm(200, 72, 12)),
英语 = round(rnorm(200, 70, 13)),
课外辅导 = sample(c("参加", "未参加"), 200, replace = TRUE, prob = c(0.4, 0.6)),
家长学历 = sample(c("初中", "高中", "本科", "研究生"), 200, replace = TRUE)
)
# 限制分数范围
students[students$数学 > 100, "数学"] <- 100
students[students$数学 < 0, "数学"] <- 0
students$总分 <- students$数学 + students$语文 + students$英语
students$等级 <- cut(students$总分,
breaks = c(0, 180, 210, 240, 300),
labels = c("不及格", "及格", "良好", "优秀"))
# ============ 1. 马赛克图:班级×等级关系 ============
class_grade <- table(students$班级, students$等级)
mosaicplot(class_grade,
main = "各班成绩等级分布",
xlab = "班级", ylab = "成绩等级",
color = c("#FF9999", "#66CC99", "#FFCC66", "#99CCFF"),
shade = TRUE,
las = 2)
# ============ 2. 华夫图:整体等级分布 ============
grade_dist <- table(students$等级)
grade_df <- data.frame(
等级 = names(grade_dist),
人数 = as.numeric(grade_dist)
)
# 转换为百分比
grade_pct <- round(grade_df$人数 / sum(grade_df$人数) * 100)
names(grade_pct) <- grade_df$等级
waffle(grade_pct,
rows = 10,
colors = c("#F44336", "#FF9800", "#4CAF50", "#2196F3"),
title = "整体成绩等级分布",
xlab = "每个方格代表1%的学生",
legend_pos = "bottom")
# ============ 3. 环形图:课外辅导效果对比 ============
# 计算辅导与不辅导的平均分
tutoring_effect <- students %>%
group_by(课外辅导) %>%
summarise(
平均分 = mean(总分),
数学_平均 = mean(数学),
语文_平均 = mean(语文),
英语_平均 = mean(英语)
)
# 创建环形图数据
ring_data <- data.frame(
category = c("参加辅导", "未参加辅导"),
value = tutoring_effect$平均分
)
ring_data$percentage <- ring_data$value / sum(ring_data$value) * 100
ring_data$ymax <- cumsum(ring_data$percentage)
ring_data$ymin <- c(0, head(ring_data$ymax, n = -1))
ggplot(ring_data, aes(ymax = ymax, ymin = ymin, xmax = 4, xmin = 3,
fill = category)) +
geom_rect() +
coord_polar(theta = "y") +
xlim(c(2, 4)) +
geom_text(aes(x = 3.5, y = (ymax + ymin)/2,
label = paste0(round(value), "分")),
size = 5) +
theme_void() +
labs(title = "课外辅导对总分的影响") +
scale_fill_manual(values = c("#FFB74D", "#81C784")) +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"))
# ============ 4. 统计检验 ============
# 检验辅导效果的显著性
t_test <- t.test(总分 ~ 课外辅导, data = students)
cat("\n========== 教育数据分析结果 ==========\n")
cat("\n总体情况:\n")
cat(" 参考人数:", nrow(students), "人\n")
cat(" 平均总分:", round(mean(students$总分), 1), "分\n")
cat(" 最高分:", max(students$总分), "分\n")
cat(" 最低分:", min(students$总分), "分\n\n")
cat("班级对比:\n")
class_summary <- students %>%
group_by(班级) %>%
summarise(平均分 = round(mean(总分), 1), 优秀率 = sum(等级 == "优秀")/n()*100)
print(class_summary)
cat("\n课外辅导效果分析:\n")
cat(" 参加辅导平均分:", round(tutoring_effect$平均分[1], 1), "分\n")
cat(" 未参加平均分:", round(tutoring_effect$平均分[2], 1), "分\n")
cat(" 差异:", round(tutoring_effect$平均分[1] - tutoring_effect$平均分[2], 1), "分\n")
cat(" p值:", round(t_test$p.value, 4), "\n")
if(t_test$p.value < 0.05) {
cat(" 结论: 课外辅导对成绩有显著提升作用\n")
} else {
cat(" 结论: 没有统计学证据表明辅导有效果\n")
}
# 保存图形
ggsave("教育分析报告_马赛克图.png", width = 10, height = 8)
本章小结
各图表适用场景总结
| 图表类型 | 适用场景 | 优点 | 局限性 |
|---|---|---|---|
| 饼图 | 3-6个类别的比例展示 | 直观易懂,适合展示份额 | 类别过多时难以区分 |
| 散点复合饼图 | 展示地理位置加内部结构 | 信息量大,空间对比强 | 数据过多会重叠 |
| 华夫图 | 百分比展示,强调整体与部分 | 视觉冲击力强,精确 | 不适用大量类别 |
| 马赛克图 | 多分类变量交叉分析 | 展示复杂关系,可做统计检验 | 解读需要一定基础 |
选择建议
- 强调占比 → 饼图或华夫图
- 地理分布 → 散点复合饼图
- 多维度交叉 → 马赛克图
- 精确比较 → 避免使用饼图,改用条形图
注意事项
- 饼图类别不超过6个,否则使用条形图
- 华夫图数据最好为整数百分比
- 马赛克图中样本量不能太小
- 散点复合饼图需注意重叠问题
通过本章学习,您应该能够根据数据特征和分析目的,灵活选择合适的局部整体型可视化方法,并正确实现和解读这些图表。