R语言类别比较数据可视化
4.1 柱状图
4.1.1 柱状图释义
柱状图用于展示不同类别(离散变量)的数值大小,高度表示数值。
4.1.2 单一柱状图
r
# 加载ggplot2包
library(ggplot2)
# 创建示例数据:不同水果的销量
data <- data.frame(
fruit = c("苹果", "香蕉", "橙子", "葡萄", "西瓜"),
sales = c(120, 150, 130, 170, 90)
)
# 绘制单一柱状图
ggplot(data, aes(x = fruit, y = sales)) + # 映射x轴为水果,y轴为销量
geom_col(fill = "steelblue", color = "black") + # 添加柱子,fill填充色,color边框色
labs(title = "不同水果销量柱状图", x = "水果种类", y = "销量") + # 添加标题和轴标签
theme_minimal() # 使用简洁主题
4.1.3 分组柱状图
r
# 创建数据:不同年份不同产品的销量
data <- data.frame(
year = rep(c("2022", "2023"), each = 3),
product = rep(c("产品A", "产品B", "产品C"), 2),
sales = c(100, 120, 110, 130, 150, 140)
)
# 分组柱状图
ggplot(data, aes(x = product, y = sales, fill = year)) + # fill根据年份分组
geom_col(position = "dodge") + # position = "dodge"实现并排分组
scale_fill_manual(values = c("#3498db", "#e74c3c")) + # 自定义填充颜色
labs(title = "分组柱状图", x = "产品", y = "销量") +
theme_minimal()
4.1.4 堆积柱状图
r
# 使用相同数据
ggplot(data, aes(x = product, y = sales, fill = year)) +
geom_col(position = "stack") + # position = "stack"实现堆积
labs(title = "堆积柱状图", x = "产品", y = "总销量") +
theme_minimal()
4.1.5 百分比柱状图
r
# 创建数据
data <- data.frame(
group = rep(c("低", "中", "高"), each = 2),
category = rep(c("男", "女"), 3),
value = c(30, 20, 40, 30, 20, 40)
)
# 计算百分比
library(dplyr)
data <- data %>%
group_by(group) %>%
mutate(percentage = value / sum(value) * 100)
# 绘制百分比柱状图
ggplot(data, aes(x = group, y = percentage, fill = category)) +
geom_col(position = "fill") + # position = "fill"使高度标准化为1
scale_y_continuous(labels = scales::percent_format()) + # y轴显示为百分比
labs(title = "百分比柱状图", x = "组别", y = "百分比") +
theme_minimal()
4.1.6 均值柱状图(带误差线)
r
# 创建数据:不同处理组的测量值
set.seed(123)
data <- data.frame(
group = rep(c("对照组", "处理组1", "处理组2"), each = 20),
value = c(rnorm(20, mean = 50, sd = 5),
rnorm(20, mean = 55, sd = 6),
rnorm(20, mean = 60, sd = 7))
)
# 计算均值和标准差
summary_data <- data %>%
group_by(group) %>%
summarise(mean_val = mean(value),
sd_val = sd(value))
# 绘制带误差线的均值柱状图
ggplot(summary_data, aes(x = group, y = mean_val)) +
geom_col(fill = "skyblue") + # 绘制柱子
geom_errorbar(aes(ymin = mean_val - sd_val, ymax = mean_val + sd_val), # 添加误差线
width = 0.2, color = "red") + # 误差线宽度和颜色
labs(title = "均值柱状图(带标准差误差线)", x = "组别", y = "测量值") +
theme_minimal()
4.1.7 不等宽柱状图
r
# 创建数据:类别及对应宽度和高度
data <- data.frame(
category = c("A", "B", "C", "D"),
width = c(1, 1.5, 2, 0.8), # 柱子宽度
height = c(10, 15, 12, 8) # 柱子高度
)
# 计算x轴位置(累积宽度)
data$x_start <- c(0, cumsum(data$width[-nrow(data)]))
data$x_center <- data$x_start + data$width / 2
# 绘制不等宽柱状图
ggplot(data) +
geom_rect(aes(xmin = x_start, xmax = x_start + width, # 矩形左右边界
ymin = 0, ymax = height), # 矩形上下边界
fill = "lightblue", color = "black") + # 填充和边框
scale_x_continuous(breaks = data$x_center, labels = data$category) + # x轴标签居中
labs(title = "不等宽柱状图", x = "类别", y = "值") +
theme_minimal()
4.2 条形图
r
# 条形图(水平放置的柱状图)
data <- data.frame(
city = c("北京", "上海", "广州", "深圳", "成都"),
population = c(2154, 2424, 1530, 1756, 1633)
)
# 绘制条形图
ggplot(data, aes(x = city, y = population)) +
geom_col(fill = "tomato") +
coord_flip() + # 坐标轴翻转,变成水平条形图
labs(title = "城市人口条形图", x = "城市", y = "人口(万人)") +
theme_minimal()
4.3 棒棒糖图
4.3.1 基础棒棒糖图
r
# 创建数据
data <- data.frame(
country = c("中国", "美国", "印度", "日本", "德国"),
gdp = c(14.34, 21.43, 2.87, 4.97, 3.85)
)
# 绘制棒棒糖图
ggplot(data, aes(x = reorder(country, gdp), y = gdp)) + # reorder按GDP排序
geom_segment(aes(xend = country, y = 0, yend = gdp), # 线段从0到gdp
color = "gray", size = 1) + # 线颜色和粗细
geom_point(size = 5, color = "steelblue") + # 末端圆点
coord_flip() + # 水平放置
labs(title = "各国GDP棒棒糖图", x = "国家", y = "GDP(万亿美元)") +
theme_minimal()
4.3.2 带基线的棒棒糖图
r
# 添加目标基线
data$target <- 10 # 设定目标值为10万亿美元
ggplot(data, aes(x = reorder(country, gdp), y = gdp)) +
geom_segment(aes(xend = country, y = 0, yend = gdp), color = "gray") +
geom_point(size = 5, aes(color = gdp >= target)) + # 根据是否达标着色
geom_hline(yintercept = target, linetype = "dashed", color = "red") + # 添加基线
scale_color_manual(values = c("red", "green"), labels = c("未达标", "达标")) +
coord_flip() +
labs(title = "带基线的棒棒糖图", x = "国家", y = "GDP") +
theme_minimal() +
theme(legend.title = element_blank())
4.4 克利夫兰点图
r
# 克利夫兰点图是棒棒糖图的简化版(只有点)
data <- data.frame(
department = c("销售部", "市场部", "研发部", "财务部", "人事部"),
satisfaction = c(85, 78, 92, 70, 88)
)
ggplot(data, aes(x = satisfaction, y = reorder(department, satisfaction))) +
geom_point(size = 4, color = "darkorange") + # 只画点
labs(title = "部门满意度克利夫兰点图", x = "满意度分数", y = "部门") +
theme_minimal()
4.5 哑铃图
r
# 创建前后对比数据
data <- data.frame(
subject = c("数学", "语文", "英语", "科学", "历史"),
pre_test = c(65, 70, 60, 75, 68),
post_test = c(85, 82, 78, 88, 80)
)
# 绘制哑铃图
ggplot(data) +
geom_segment(aes(x = pre_test, xend = post_test,
y = reorder(subject, pre_test), yend = subject),
size = 1.5, color = "gray") + # 连接线
geom_point(aes(x = pre_test, y = subject), size = 3, color = "blue") + # 前测点
geom_point(aes(x = post_test, y = subject), size = 3, color = "red") + # 后测点
labs(title = "哑铃图:前后测试对比", x = "分数", y = "科目") +
theme_minimal()
4.6 雷达图
r
# 需要加载fmsb包
library(fmsb)
# 创建数据
data <- data.frame(
A = c(5, 0, 4, 3, 5, 2), # 第1行最大值,第2行最小值,后面为实际数据
B = c(5, 0, 2, 4, 5, 3),
C = c(5, 0, 5, 4, 3, 4)
)
rownames(data) <- c("max", "min", "产品X", "产品Y", "产品Z", "产品W")
colnames(data) <- c("价格", "性能", "外观", "续航", "品牌", "售后")
# 绘制雷达图
radarchart(data[1:2, ], # 最大值和最小值
axistype = 1, # 轴类型
pcol = c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3"), # 线条颜色
pfcol = rgb(c(0.9, 0.5, 0.3, 0.4), alpha = 0.3), # 填充颜色(半透明)
plwd = 2, # 线宽
cglcol = "grey", # 网格线颜色
cglty = 1, # 网格线类型
axislabcol = "grey", # 轴标签颜色
vlcex = 0.8) # 变量标签大小
legend(x = 1.2, y = 1, legend = rownames(data[-c(1,2),]), # 图例
bty = "n", pch = 20, col = c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3"))
4.7 玫瑰图
r
# 玫瑰图(极坐标下的柱状图)
data <- data.frame(
month = month.abb, # 月份缩写
value = c(10, 12, 15, 18, 22, 25, 28, 27, 23, 19, 14, 11)
)
ggplot(data, aes(x = month, y = value, fill = month)) +
geom_col() +
coord_polar(start = 0) + # 转换为极坐标
scale_fill_brewer(palette = "Set3") +
labs(title = "月度气温玫瑰图", x = "", y = "") +
theme_minimal() +
theme(legend.position = "none") # 隐藏图例
4.8 径向柱状图
4.8.1 基础径向柱状图
r
# 径向柱状图(圆形柱状图)
data <- data.frame(
category = LETTERS[1:12],
value = sample(10:100, 12)
)
ggplot(data, aes(x = category, y = value, fill = category)) +
geom_col() +
coord_polar(theta = "x", start = 0) + # theta="x"使柱子径向排列
scale_fill_viridis_d() + # 使用viridis颜色方案
labs(title = "基础径向柱状图") +
theme_void() + # 移除坐标轴
theme(legend.position = "none")
4.8.2 带标签的径向柱状图
r
ggplot(data, aes(x = category, y = value, fill = category)) +
geom_col() +
coord_polar(theta = "x", start = 0) +
geom_text(aes(label = value), position = position_stack(vjust = 0.5), # 添加数值标签
size = 3, color = "white") + # 标签位置在柱子中间
scale_fill_viridis_d() +
labs(title = "带标签的径向柱状图") +
theme_void() +
theme(legend.position = "none")
4.8.3 带断点的径向柱状图
r
# 创建有断点的数据(y轴截断)
data$value_log <- log(data$value) # 对数变换创造断点效果
ggplot(data, aes(x = category, y = value_log, fill = category)) +
geom_col() +
coord_polar(theta = "x", start = 0) +
scale_y_continuous(labels = exp) + # 还原原始标签
labs(title = "带断点的径向柱状图(对数变换)") +
theme_void() +
theme(legend.position = "none")
4.8.4 分组径向柱状图
r
# 创建分组数据
data_group <- data.frame(
category = rep(LETTERS[1:8], 2),
group = rep(c("组1", "组2"), each = 8),
value = c(sample(30:70, 8), sample(40:80, 8))
)
ggplot(data_group, aes(x = category, y = value, fill = group)) +
geom_col(position = "dodge", width = 0.7) + # 并排分组
coord_polar(theta = "x", start = 0) +
scale_fill_manual(values = c("#3498db", "#e74c3c")) +
labs(title = "分组径向柱状图") +
theme_void() +
theme(legend.position = "bottom")
4.8.5 为径向柱状图添加元素
r
# 添加中心圆和辅助线
ggplot(data, aes(x = category, y = value, fill = category)) +
geom_col() +
coord_polar(theta = "x", start = 0) +
annotate("point", x = 0, y = 0, size = 10, shape = 21, fill = "white") + # 中心圆点
geom_hline(yintercept = seq(0, 100, 25), linetype = "dashed", color = "gray") + # 辅助线
labs(title = "添加元素的径向柱状图") +
theme_void() +
theme(legend.position = "none")
4.8.6 分组堆叠径向柱状图
r
# 分组堆叠
data_stack <- data.frame(
category = rep(LETTERS[1:6], 3),
subgroup = rep(c("子组A", "子组B", "子组C"), each = 6),
value = c(sample(10:30, 6), sample(15:35, 6), sample(5:25, 6))
)
ggplot(data_stack, aes(x = category, y = value, fill = subgroup)) +
geom_col(position = "stack") + # 堆叠
coord_polar(theta = "x", start = 0) +
scale_fill_brewer(palette = "Set2") +
labs(title = "分组堆叠径向柱状图") +
theme_void() +
theme(legend.position = "bottom")
4.9 词云图
r
# 安装并加载wordcloud2包
# install.packages("wordcloud2")
library(wordcloud2)
# 创建词频数据
words <- data.frame(
word = c("数据科学", "R语言", "可视化", "机器学习", "统计分析",
"深度学习", "人工智能", "大数据", "数据挖掘", "商业智能",
"Python", "ggplot2", "Shiny", "Tidyverse", "数据清洗"),
freq = c(100, 95, 90, 85, 80, 75, 70, 65, 60, 55, 50, 45, 40, 35, 30)
)
# 绘制词云图
wordcloud2(words,
size = 0.8, # 词的大小
color = "random-dark", # 颜色方案
backgroundColor = "white", # 背景色
shape = "circle", # 形状(circle, cardioid, diamond等)
rotateRatio = 0.5) # 旋转比例
# 自定义形状词云(心形)
# wordcloud2(words, shape = "heart", size = 0.6)
# 带图文字云
# figPath <- system.file("examples/t.png", package = "wordcloud2")
# wordcloud2(words, figPath = figPath, size = 0.5)
本章小结
本章系统介绍了R语言中类别比较数据的可视化方法,涵盖:
- 柱状图家族:单一、分组、堆积、百分比、均值、不等宽柱状图
- 水平变体:条形图、棒棒糖图、克利夫兰点图、哑铃图
- 圆形布局:雷达图、玫瑰图、径向柱状图及其各种变体
- 文本可视化:词云图
关键语法要点:
geom_col():绘制柱状图(高度对应y值)position参数:"dodge"(分组)、"stack"(堆积)、"fill"(百分比)coord_polar():极坐标变换,用于圆形图表coord_flip():坐标轴翻转,用于水平图表geom_segment()+geom_point():构建棒棒糖图和哑铃图
选择建议:
- 类别较少时用柱状图,较多时用条形图
- 强调排名用克利夫兰点图
- 对比前后变化用哑铃图
- 展示周期性数据用玫瑰图
- 多指标综合对比用雷达图
- 文本数据用词云图
以上代码可直接在R环境中运行,建议结合ggplot2官方文档进一步学习各参数的详细用法。