R语言网格绘图系统(ggplot2)- 完整知识点与案例代码
3.1 基本语法
3.1.1 ggplot2语法框架
r
# ggplot2是R语言最强大的绘图系统,基于图形语法理论
# 安装和加载ggplot2
# install.packages("ggplot2")
library(ggplot2)
# ggplot2基本语法框架
# ggplot(data = 数据, mapping = aes(映射)) + 几何对象 + 统计变换 + 坐标系统 + 分面 + 主题
# 示例1:最简单的ggplot2图形
# 准备数据
df_simple <- data.frame(
x = 1:10,
y = c(2, 4, 6, 8, 10, 12, 14, 16, 18, 20)
)
# 创建基础图形对象
p <- ggplot(data = df_simple, # 数据框
mapping = aes(x = x, y = y)) # 美学映射:x轴和y轴
# 添加几何对象(散点图)
p + geom_point() # 添加点图层
# 添加多个几何对象
p +
geom_point(color = "blue", size = 3) + # 添加散点,设置颜色和大小
geom_line(color = "red", linetype = 2) + # 添加线,虚线
labs(title = "ggplot2基础示例", # 添加标题
x = "X轴变量",
y = "Y轴变量")
# 示例2:使用内置数据集
# ggplot2内置的钻石数据集
head(diamonds) # 查看钻石数据集前6行
# 列说明:carat(克拉), cut(切工), color(颜色), clarity(净度), price(价格)
# 基础散点图:克拉 vs 价格
ggplot(data = diamonds, # 数据
mapping = aes(x = carat, y = price)) + # 映射
geom_point(alpha = 0.3) + # 半透明点,避免过度绘制
labs(title = "钻石价格与克拉关系图",
x = "克拉数(Carat)",
y = "价格(Price)")
# ggplot2的三种调用方式
# 方式1:完整写法(推荐,清晰明了)
ggplot(data = mtcars, mapping = aes(x = wt, y = mpg)) +
geom_point()
# 方式2:省略参数名(简洁)
ggplot(mtcars, aes(wt, mpg)) +
geom_point()
# 方式3:分步构建
p <- ggplot(mtcars, aes(wt, mpg)) # 创建基础对象
p <- p + geom_point() # 添加图层
p <- p + labs(title = "汽车重量与油耗关系") # 添加标题
p # 显示图形
# 管道操作符方式(需要magrittr包)
# install.packages("magrittr")
library(magrittr)
mtcars %>%
ggplot(aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE) # 添加线性回归线及置信区间
3.2 数据和图形属性映射
r
# 美学映射(aes)是将数据变量映射到图形属性(颜色、大小、形状等)
# 3.2.1 基本映射
# 示例1:颜色映射
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(color = cyl)) + # 将cyl变量映射到颜色
labs(title = "颜色映射:cyl数值决定点的颜色",
color = "气缸数") # 图例标题
# 将分类变量映射到颜色
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(color = factor(cyl))) + # 将cyl转为因子(分类变量)
labs(title = "分类颜色映射",
color = "气缸数(分类)")
# 示例2:大小映射
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(size = hp)) + # 将hp(马力)映射到点大小
labs(title = "大小映射:马力越大点越大",
size = "马力(HP)")
# 示例3:形状映射(仅适用于分类变量)
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(shape = factor(cyl))) + # 不同气缸数不同形状
labs(title = "形状映射",
shape = "气缸数") +
scale_shape_manual(values = c(16, 17, 18)) # 自定义形状
# 示例4:多重映射
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(color = factor(cyl), # 颜色映射
size = hp, # 大小映射
shape = factor(am))) + # 形状映射(自动挡/手动挡)
labs(title = "多重美学映射",
color = "气缸数",
size = "马力",
shape = "变速箱") +
scale_color_manual(values = c("red", "blue", "green"))
# 3.2.2 映射范围设置
# 设置颜色渐变范围
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(color = hp), size = 3) +
scale_color_gradient(low = "blue", high = "red") + # 低值蓝,高值红
labs(title = "颜色渐变:蓝(低马力) → 红(高马力)")
# 三色渐变
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(color = hp), size = 3) +
scale_color_gradient2(low = "blue", # 低值颜色
mid = "yellow", # 中值颜色
high = "red", # 高值颜色
midpoint = 150) + # 中值点
labs(title = "三色渐变:蓝→黄→红")
# 3.2.3 固定美学设置(在geom_*中设置,不在aes中)
# 固定颜色
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(color = "darkblue", # 固定颜色(不在aes中)
size = 3, # 固定大小
shape = 17, # 固定形状
alpha = 0.7) + # 固定透明度
labs(title = "固定美学属性(所有点相同)")
# 3.2.4 高级映射技巧
# 示例1:使用不同数据集的不同图层
# 主数据集:全部数据
# 辅助数据集:仅4缸车
mtcars_4cyl <- mtcars[mtcars$cyl == 4, ]
ggplot() + # 空的基础对象
geom_point(data = mtcars, # 图层1:全部数据
aes(x = wt, y = mpg),
color = "gray",
alpha = 0.5,
size = 2) +
geom_point(data = mtcars_4cyl, # 图层2:4缸车高亮
aes(x = wt, y = mpg),
color = "red",
size = 4) +
labs(title = "突出显示4缸汽车",
subtitle = "灰色点为其他车型")
# 示例2:文本映射
# 准备数据
df_text <- mtcars[1:10, ] # 取前10行
df_text$model <- rownames(df_text) # 添加车型名称
ggplot(df_text, aes(x = wt, y = mpg)) +
geom_point(size = 3, color = "blue") +
geom_text(aes(label = model), # 将车型名称映射到文本标签
hjust = -0.1, # 水平调整
vjust = 0.5, # 垂直调整
size = 3) +
labs(title = "文本映射:为点添加标签",
x = "重量(1000磅)",
y = "英里/加仑") +
xlim(1.5, 4) # 扩展x轴范围以容纳标签
3.3 几何对象
r
# 几何对象(geom)定义了图形的类型
# 3.3.1 基本几何对象
# 1. geom_point() - 散点图
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point(aes(color = Species), # 按物种着色
size = 2,
alpha = 0.7) +
labs(title = "散点图:鸢尾花花瓣长宽关系")
# 2. geom_line() - 折线图
# 创建时间序列数据
df_time <- data.frame(
time = 1:20,
value = cumsum(rnorm(20, mean = 0, sd = 1)) # 随机游走
)
ggplot(df_time, aes(x = time, y = value)) +
geom_line(color = "blue", size = 1.2) + # 折线
geom_point(color = "red", size = 2) + # 添加点
labs(title = "折线图:时间序列")
# 3. geom_bar() / geom_col() - 条形图
# geom_bar() 默认统计频数,geom_col() 直接使用数值
df_bar <- data.frame(
category = c("A", "B", "C", "D"),
value = c(25, 40, 30, 55)
)
# geom_col:高度直接对应value
ggplot(df_bar, aes(x = category, y = value)) +
geom_col(fill = "steelblue", # 填充色
color = "black", # 边框色
width = 0.7) + # 条宽度
labs(title = "条形图(geom_col)",
x = "类别",
y = "数值")
# geom_bar:统计频数
ggplot(diamonds, aes(x = cut)) +
geom_bar(fill = "lightgreen",
color = "darkgreen") +
labs(title = "条形图(geom_bar):钻石切工分布",
x = "切工",
y = "计数")
# 4. geom_histogram() - 直方图
ggplot(diamonds, aes(x = price)) +
geom_histogram(bins = 30, # 组数
fill = "lightblue",
color = "black",
alpha = 0.7) +
labs(title = "直方图:钻石价格分布",
x = "价格",
y = "频数")
# 5. geom_density() - 密度图
ggplot(diamonds, aes(x = price, fill = cut)) +
geom_density(alpha = 0.5) + # 半透明
labs(title = "密度图:不同切工钻石价格分布",
x = "价格",
y = "密度")
# 6. geom_boxplot() - 箱线图
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
geom_boxplot(aes(fill = Species),
alpha = 0.7) +
labs(title = "箱线图:不同物种花萼长度对比")
# 7. geom_violin() - 小提琴图(箱线图+密度图)
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
geom_violin(aes(fill = Species),
alpha = 0.7) +
geom_boxplot(width = 0.2, # 添加箱线图内部
fill = "white") +
labs(title = "小提琴图:分布更详细")
# 8. geom_smooth() - 平滑曲线/回归线
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(size = 2) +
geom_smooth(method = "lm", # 线性回归
se = TRUE, # 显示置信区间
color = "red",
fill = "lightgray") +
labs(title = "回归平滑线:wt与mpg关系")
# 局部加权回归(LOESS)
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "loess", # 局部加权回归
span = 0.8, # 平滑参数
se = TRUE) +
labs(title = "LOESS平滑曲线")
# 9. geom_text() / geom_label() - 文本注释
# 创建子集用于标注
mtcars_sub <- mtcars[mtcars$hp > 200 | mtcars$mpg > 25, ]
mtcars_sub$model <- rownames(mtcars_sub)
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(size = hp), alpha = 0.6) +
geom_label(data = mtcars_sub, # 仅标注特定点
aes(label = model,
fill = factor(cyl)),
size = 3,
alpha = 0.8) +
labs(title = "文本标注:高马力或高油耗车型")
# 10. geom_errorbar() - 误差线
# 准备带误差的数据
df_error <- data.frame(
group = c("A", "B", "C", "D"),
mean = c(10, 15, 12, 18),
sd = c(2, 1.5, 2.5, 1.8)
)
ggplot(df_error, aes(x = group, y = mean)) +
geom_col(fill = "lightblue", width = 0.6) +
geom_errorbar(aes(ymin = mean - sd,
ymax = mean + sd),
width = 0.2, # 误差线横杠宽度
size = 1,
color = "red") +
labs(title = "误差线:均值和标准差")
# 3.3.2 组合多个几何对象
# 示例1:散点+回归线+密度边缘图
# 需要ggExtra包
# install.packages("ggExtra")
library(ggExtra)
p_scatter <- ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(color = factor(cyl)), size = 2) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "散点图+回归线") +
theme_minimal()
# 添加边缘直方图
ggMarginal(p_scatter, type = "histogram",
fill = "lightblue", alpha = 0.5)
# 示例2:条形图+误差线+显著性标记
df_anova <- data.frame(
treatment = rep(c("Control", "Treatment1", "Treatment2"), each = 10),
value = c(rnorm(10, 10, 2),
rnorm(10, 15, 2),
rnorm(10, 18, 2))
)
# 计算统计量
library(dplyr)
df_summary <- df_anova %>%
group_by(treatment) %>%
summarise(mean = mean(value),
sd = sd(value),
se = sd / sqrt(n()))
ggplot(df_summary, aes(x = treatment, y = mean)) +
geom_col(aes(fill = treatment), width = 0.6) +
geom_errorbar(aes(ymin = mean - se,
ymax = mean + se),
width = 0.2, size = 1) +
geom_text(aes(label = sprintf("%.1f", mean),
y = mean + se + 0.5),
size = 4) +
labs(title = "处理组效应比较",
x = "处理组",
y = "测量值") +
theme_bw() +
theme(legend.position = "none")
3.4 统计变换
r
# 统计变换(stat)对数据进行统计计算后再绘图
# 3.4.1 常用统计变换
# 1. stat = "count" - 计数(默认)
ggplot(diamonds, aes(x = cut)) +
geom_bar() + # 默认stat = "count"
labs(title = "默认统计:计数")
# 2. stat = "identity" - 直接使用原始值
df_identity <- data.frame(
x = c("A", "B", "C"),
y = c(10, 20, 15)
)
ggplot(df_identity, aes(x = x, y = y)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "stat='identity':直接使用y值")
# 3. stat = "bin" - 分箱(直方图)
ggplot(diamonds, aes(x = price)) +
geom_histogram(bins = 30, fill = "lightgreen") +
labs(title = "直方图:数据分箱统计")
# 4. stat = "density" - 密度估计
ggplot(diamonds, aes(x = price, color = cut)) +
geom_density(stat = "density") + # 或直接使用geom_density
labs(title = "密度估计")
# 5. stat = "boxplot" - 箱线图统计
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
geom_boxplot() + # 自动计算五数概括
labs(title = "箱线图统计:最小值、Q1、中位数、Q3、最大值")
# 3.4.2 使用stat_*函数
# stat_summary() - 汇总统计
# 计算每组的中位数和四分位数
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
stat_summary(fun.data = mean_sdl, # 均值±标准差
fun.args = list(mult = 1),
geom = "pointrange",
color = "red",
size = 1) +
stat_summary(fun = mean, # 添加均值点
geom = "point",
color = "darkred",
size = 3) +
labs(title = "stat_summary:自定义统计汇总")
# 自定义汇总函数
custom_summary <- function(x) {
data.frame(
y = mean(x),
ymin = mean(x) - sd(x),
ymax = mean(x) + sd(x)
)
}
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
stat_summary(fun.data = custom_summary,
geom = "pointrange",
color = "blue",
size = 1.2) +
labs(title = "自定义汇总函数:均值±标准差")
# 3.4.3 统计变换实例
# 示例1:计算比例而非计数
ggplot(diamonds, aes(x = cut, fill = clarity)) +
geom_bar(position = "fill") + # position="fill"显示比例
labs(title = "堆积百分比条形图",
y = "比例")
# 示例2:添加统计标签
ggplot(diamonds, aes(x = cut)) +
geom_bar(aes(fill = clarity)) +
geom_text(stat = "count", # 使用计数统计
aes(label = after_stat(count)), # 在顶部显示计数
position = position_stack(vjust = 0.5),
size = 3) +
labs(title = "条形图+数值标签")
# 示例3:使用after_stat进行后统计计算
ggplot(diamonds, aes(x = cut, y = price)) +
stat_summary(aes(group = 1), # group=1表示整体
fun = mean,
geom = "line",
color = "red",
size = 1.5) +
stat_summary(fun.data = mean_cl_normal, # 均值±1.96*se
geom = "errorbar",
width = 0.2) +
stat_summary(fun = mean,
geom = "point",
size = 3) +
labs(title = "不同切工钻石的平均价格趋势")
3.5 坐标系统
3.5.1 笛卡儿坐标系
r
# 默认坐标系:coord_cartesian()
# 1. 坐标轴缩放
p <- ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(size = 2) +
geom_smooth(method = "lm")
# 原始图形
p + labs(title = "原始坐标系")
# 缩放坐标轴(不改变数据,只改变显示范围)
p + coord_cartesian(xlim = c(2, 4),
ylim = c(15, 25)) +
labs(title = "coord_cartesian:缩放显示范围")
# 对比:使用xlim()/ylim()会删除范围外的数据
p + xlim(2, 4) + ylim(15, 25) +
labs(title = "xlim/ylim:删除范围外数据(影响回归线)")
# 2. 翻转坐标轴
df_flip <- data.frame(
name = c("A", "B", "C", "D"),
value = c(30, 45, 25, 60)
)
ggplot(df_flip, aes(x = name, y = value)) +
geom_col(fill = "steelblue") +
coord_flip() + # 翻转x和y轴
labs(title = "coord_flip:水平条形图",
x = "类别",
y = "数值")
# 3. 固定纵横比
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
coord_fixed(ratio = 0.5) + # y/x比例=0.5
labs(title = "coord_fixed:固定纵横比")
3.5.2 极坐标系
r
# coord_polar() - 将笛卡儿坐标转换为极坐标
# 1. 饼图(使用极坐标)
df_pie <- data.frame(
category = c("A", "B", "C", "D"),
value = c(30, 25, 25, 20)
)
ggplot(df_pie, aes(x = "", y = value, fill = category)) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y", start = 0) + # theta="y"表示角度映射到y值
labs(title = "饼图(极坐标)") +
theme_void() + # 移除背景和坐标轴
geom_text(aes(label = paste0(value, "%")),
position = position_stack(vjust = 0.5))
# 2. 玫瑰图(南丁格尔玫瑰图)
df_rose <- data.frame(
month = month.abb,
value = c(10, 12, 15, 18, 22, 25,
28, 26, 23, 19, 14, 11)
)
ggplot(df_rose, aes(x = month, y = value, fill = month)) +
geom_bar(stat = "identity", width = 0.9) +
coord_polar(theta = "x", start = 0) + # theta="x"表示角度映射到x
labs(title = "玫瑰图(极坐标条形图)") +
theme_minimal() +
theme(axis.text.x = element_text(size = 8),
legend.position = "none")
# 3. 雷达图(极坐标折线图)
df_radar <- data.frame(
metric = c("速度", "力量", "技巧", "耐力", "智力", "魅力"),
score = c(85, 70, 95, 60, 80, 75)
)
# 添加闭合点(首尾相连)
df_radar <- rbind(df_radar, df_radar[1, ])
ggplot(df_radar, aes(x = metric, y = score, group = 1)) +
geom_polygon(fill = "lightblue", alpha = 0.5, color = "blue", size = 1) +
geom_point(color = "red", size = 3) +
coord_polar() +
labs(title = "雷达图(能力评估)") +
theme_minimal() +
ylim(0, 100)
3.5.3 地理坐标系
r
# 地理坐标系需要地图数据
# install.packages(c("maps", "mapdata", "mapproj"))
library(maps)
library(mapdata)
# 1. 简单地图
# 获取世界地图数据
world_map <- map_data("world")
# 绘制世界地图
ggplot(world_map, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightgreen", color = "black", size = 0.1) +
coord_quickmap() + # 地理坐标系(快速)
labs(title = "世界地图(coord_quickmap)") +
theme_minimal()
# 2. 中国地图示例
# 获取中国地图数据
china_map <- map_data("world", region = "China")
ggplot(china_map, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightblue", color = "black", size = 0.2) +
coord_quickmap() +
labs(title = "中国地图") +
theme_void()
# 3. 使用coord_map(精确投影)
# 墨卡托投影
ggplot(world_map, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightyellow", color = "gray", size = 0.1) +
coord_map(projection = "mercator") + # 墨卡托投影
labs(title = "墨卡托投影地图") +
theme_void()
# 艾伯斯投影(适合中纬度地区)
ggplot(world_map, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "lightgreen", color = "gray", size = 0.1) +
coord_map(projection = "albers",
lat0 = 30, lat1 = 40) + # 标准纬线
labs(title = "艾伯斯等积投影") +
theme_void()
# 4. 在地图上添加数据点
# 创建城市数据
cities <- data.frame(
name = c("Beijing", "Shanghai", "Guangzhou", "Shenzhen"),
long = c(116.40, 121.48, 113.27, 114.06),
lat = c(39.90, 31.22, 23.13, 22.54),
population = c(2154, 2428, 1530, 1302) # 万人
)
# 绘制中国地图并标记城市
china_map <- map_data("world", region = "China")
ggplot() +
geom_polygon(data = china_map,
aes(x = long, y = lat, group = group),
fill = "lightgray", color = "black", size = 0.2) +
geom_point(data = cities,
aes(x = long, y = lat, size = population),
color = "red", alpha = 0.7) +
geom_text(data = cities,
aes(x = long, y = lat + 0.5, label = name),
size = 3) +
coord_quickmap(xlim = c(70, 135), ylim = c(15, 55)) +
labs(title = "中国主要城市分布",
size = "人口(万人)") +
theme_void()
3.6 图形分面
r
# 分面(facet)用于创建多个子图,按变量分组
# 3.6.1 facet_wrap() - 单变量分面(网格状)
# 基础分面
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point(aes(color = Species), size = 2) +
facet_wrap(~ Species, # 按物种分面
nrow = 1, # 1行
ncol = 3) + # 3列
labs(title = "facet_wrap:按物种分面")
# 自定义分面布局
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point(aes(color = Species)) +
facet_wrap(~ Species,
nrow = 3, # 3行
ncol = 1, # 1列
scales = "free") + # 自由坐标轴尺度
labs(title = "自由坐标轴尺度")
# 3.6.2 facet_grid() - 双变量分面(网格状)
# 创建示例数据
df_grid <- data.frame(
x = rnorm(200),
y = rnorm(200),
group1 = sample(c("A", "B"), 200, replace = TRUE),
group2 = sample(c("X", "Y", "Z"), 200, replace = TRUE)
)
# 双变量分面
ggplot(df_grid, aes(x = x, y = y)) +
geom_point(alpha = 0.5) +
facet_grid(group1 ~ group2) + # 行~列
labs(title = "facet_grid:行~列分面",
subtitle = "group1为行,group2为列")
# 3.6.3 高级分面技巧
# 1. 分面中不同图层使用不同数据
# 主数据:所有点
# 辅助数据:各组的均值点
df_means <- df_grid %>%
group_by(group1, group2) %>%
summarise(x = mean(x), y = mean(y))
ggplot(df_grid, aes(x = x, y = y)) +
geom_point(alpha = 0.3, color = "gray") +
geom_point(data = df_means,
aes(x = x, y = y),
color = "red", size = 3) +
facet_grid(group1 ~ group2) +
labs(title = "分面图中突出显示各组均值")
# 2. 添加分面标签自定义
ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(bins = 20, alpha = 0.7) +
facet_wrap(~ Species, ncol = 1) +
labs(title = "分面直方图") +
theme(strip.text = element_text(size = 12, face = "bold"),
strip.background = element_rect(fill = "lightblue"))
# 3. 分面中不同坐标轴尺度
# scales参数选项:
# "fixed":所有分面相同尺度(默认)
# "free":所有分面自由尺度
# "free_x":x轴自由
# "free_y":y轴自由
# 创建不同量级的数据
df_scale <- data.frame(
group = rep(c("Small", "Medium", "Large"), each = 50),
value = c(rnorm(50, mean = 10, sd = 2),
rnorm(50, mean = 100, sd = 20),
rnorm(50, mean = 1000, sd = 200))
)
# 固定尺度
p1 <- ggplot(df_scale, aes(x = value)) +
geom_histogram(bins = 20, fill = "lightblue") +
facet_wrap(~ group, scales = "fixed") +
labs(title = "固定尺度(fixed)")
# 自由尺度
p2 <- ggplot(df_scale, aes(x = value)) +
geom_histogram(bins = 20, fill = "lightgreen") +
facet_wrap(~ group, scales = "free") +
labs(title = "自由尺度(free)")
# 使用gridExtra包并排显示
# install.packages("gridExtra")
library(gridExtra)
grid.arrange(p1, p2, ncol = 2)
# 4. 分面中绘制回归线
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ cyl, scales = "free") +
labs(title = "各气缸组别回归线")
3.7 标度函数
3.7.1 颜色标度函数
r
# 颜色标度用于控制颜色映射的细节
# 1. scale_color_continuous() / scale_fill_continuous() - 连续变量
p_color <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point(aes(color = Petal.Length), size = 3)
# 默认渐变色
p_color + labs(title = "默认渐变色")
# 自定义渐变色(双色)
p_color +
scale_color_gradient(low = "blue", high = "red") +
labs(title = "蓝→红渐变")
# 三色渐变
p_color +
scale_color_gradient2(low = "blue",
mid = "yellow",
high = "red",
midpoint = 3.5) +
labs(title = "蓝→黄→红渐变")
# n色渐变
p_color +
scale_color_gradientn(colors = rainbow(10)) +
labs(title = "彩虹色渐变")
# 2. scale_color_discrete() / scale_fill_discrete() - 分类变量
p_discrete <- ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(bins = 30, alpha = 0.7)
# 默认分类色
p_discrete + labs(title = "默认分类色")
# 自定义分类色
p_discrete +
scale_fill_manual(values = c("setosa" = "red",
"versicolor" = "green",
"virginica" = "blue")) +
labs(title = "手动指定分类颜色")
# 使用调色板
p_discrete +
scale_fill_brewer(palette = "Set1") + # RColorBrewer调色板
labs(title = "RColorBrewer调色板")
# 3. Viridis调色板(色盲友好)
p_color +
scale_color_viridis_c() + # 连续变量
labs(title = "Viridis调色板(连续)")
p_discrete +
scale_fill_viridis_d() + # 分类变量
labs(title = "Viridis调色板(分类)")
3.7.2 坐标标度函数
r
# 坐标标度用于控制坐标轴的显示
# 1. 连续坐标轴
p_axis <- ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
# 默认
p_axis + labs(title = "默认坐标轴")
# 自定义范围
p_axis +
scale_x_continuous(limits = c(1, 5)) +
scale_y_continuous(limits = c(10, 35)) +
labs(title = "自定义范围")
# 自定义刻度和标签
p_axis +
scale_x_continuous(breaks = seq(1, 5, by = 0.5),
labels = paste0(seq(1, 5, by = 0.5), "k"),
minor_breaks = seq(1, 5, by = 0.1)) +
scale_y_continuous(breaks = c(10, 20, 30),
labels = c("低", "中", "高")) +
labs(title = "自定义刻度标签")
# 2. 离散坐标轴
p_discrete_axis <- ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_boxplot()
p_discrete_axis +
scale_x_discrete(labels = c("4缸", "6缸", "8缸")) +
labs(title = "自定义离散轴标签")
# 3. 坐标轴变换
# 对数变换
df_log <- data.frame(
x = 1:100,
y = exp(1:100 / 10) + rnorm(100)
)
ggplot(df_log, aes(x = x, y = y)) +
geom_point() +
scale_y_log10() + # y轴对数变换
labs(title = "对数坐标轴")
# 双对数坐标轴
ggplot(df_log, aes(x = x, y = y)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
labs(title = "双对数坐标轴")
# 反向坐标轴
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
scale_x_reverse() + # x轴反向
labs(title = "反向x轴")
3.8 主题函数
r
# 主题(theme)控制图形的非数据元素(背景、网格线、字体等)
# 3.8.1 预设主题
p_theme <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point(size = 2) +
labs(title = "主题演示")
# 不同预设主题
p_theme + theme_bw() + labs(title = "theme_bw() - 黑白主题")
p_theme + theme_minimal() + labs(title = "theme_minimal() - 简约主题")
p_theme + theme_classic() + labs(title = "theme_classic() - 经典主题")
p_theme + theme_dark() + labs(title = "theme_dark() - 暗色主题")
p_theme + theme_light() + labs(title = "theme_light() - 亮色主题")
p_theme + theme_void() + labs(title = "theme_void() - 空白主题")
# 3.8.2 自定义主题元素
# 自定义主题示例
p_custom <- ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(color = factor(cyl)), size = 3) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "自定义主题",
x = "重量(1000磅)",
y = "油耗(英里/加仑)",
color = "气缸数")
# 逐个修改主题元素
p_custom +
theme(
# 标题设置
plot.title = element_text(size = 20,
face = "bold",
color = "darkblue",
hjust = 0.5), # 居中对齐
plot.subtitle = element_text(size = 12,
color = "gray50"),
# 坐标轴设置
axis.title = element_text(size = 12,
face = "bold"),
axis.text = element_text(size = 10,
color = "black"),
axis.line = element_line(color = "black",
size = 0.5),
axis.ticks = element_line(color = "black"),
# 图例设置
legend.title = element_text(size = 11,
face = "italic"),
legend.text = element_text(size = 10),
legend.position = "bottom",
legend.background = element_rect(fill = "lightgray",
color = "black"),
# 面板设置
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(color = "lightgray",
size = 0.3),
panel.grid.minor = element_line(color = "gray95",
size = 0.2),
panel.border = element_rect(color = "black",
fill = NA,
size = 0.5),
# 分面设置
strip.background = element_rect(fill = "darkblue"),
strip.text = element_text(color = "white",
size = 11,
face = "bold")
)
# 3.8.3 创建和保存自定义主题
# 创建公司风格主题
theme_company <- function(base_size = 12) {
theme_minimal(base_size = base_size) %+replace%
theme(
plot.title = element_text(size = rel(1.5),
face = "bold",
color = "#2C3E50",
margin = margin(b = 10)),
plot.subtitle = element_text(size = rel(1.1),
color = "#7F8C8D",
margin = margin(b = 20)),
plot.caption = element_text(size = rel(0.8),
color = "#95A5A6",
hjust = 1),
axis.title = element_text(size = rel(1),
face = "bold"),
axis.text = element_text(size = rel(0.9)),
axis.ticks = element_line(color = "#BDC3C7"),
legend.position = "top",
legend.title = element_text(size = rel(0.9),
face = "bold"),
legend.key = element_blank(),
panel.grid.major = element_line(color = "#ECF0F1",
size = 0.5),
panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "#3498DB",
color = NA),
strip.text = element_text(color = "white",
face = "bold")
)
}
# 应用自定义主题
p_custom + theme_company() +
labs(title = "公司报告主题",
subtitle = "专业数据可视化",
caption = "数据来源:mtcars数据集")
# 3.8.4 主题元素完整列表
# 常用theme元素总结
theme_elements <- data.frame(
元素 = c("plot.title", "plot.subtitle", "plot.caption",
"axis.title", "axis.text", "axis.line",
"legend.title", "legend.text", "legend.position",
"panel.background", "panel.grid", "panel.border",
"strip.background", "strip.text"),
说明 = c("主标题", "副标题", "脚注",
"坐标轴标题", "坐标轴刻度标签", "坐标轴线",
"图例标题", "图例文本", "图例位置",
"面板背景", "面板网格线", "面板边框",
"分面标签背景", "分面标签文本")
)
print(theme_elements)
3.9 注释
3.9.1 添加文本注释
r
# 注释用于在图形上添加额外信息
# 1. geom_text() / geom_label() - 数据点注释
df_annotate <- mtcars[1:10, ]
df_annotate$model <- rownames(df_annotate)
ggplot(df_annotate, aes(x = wt, y = mpg)) +
geom_point(size = 3, color = "steelblue") +
geom_text(aes(label = model),
hjust = -0.1,
vjust = 0.5,
size = 3) +
labs(title = "geom_text:数据点标签") +
xlim(1.5, 5)
# 使用geom_label(带背景框)
ggplot(df_annotate, aes(x = wt, y = mpg)) +
geom_point(size = 3, color = "steelblue") +
geom_label(aes(label = model),
hjust = -0.1,
size = 3,
fill = "lightyellow",
alpha = 0.7) +
labs(title = "geom_label:带背景框的标签") +
xlim(1.5, 5)
# 2. annotate() - 添加独立注释(不基于数据)
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
annotate("text",
x = 4, y = 30, # 注释位置
label = "观察:重量增加,油耗降低",
color = "red",
size = 4,
fontface = "bold") +
annotate("rect", # 添加矩形框
xmin = 3.5, xmax = 5,
ymin = 25, ymax = 35,
alpha = 0.2,
fill = "yellow") +
annotate("point", # 添加点
x = 3.5, y = 20,
color = "blue",
size = 4) +
annotate("segment", # 添加线段
x = 3.5, y = 20,
xend = 4.5, yend = 28,
color = "blue",
arrow = arrow(type = "closed")) +
labs(title = "annotate():多种注释元素")
# 3. 数学表达式注释
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
annotate("text",
x = 2, y = 30,
label = "E = mc^2",
parse = TRUE, # 解析数学表达式
size = 5,
color = "red") +
annotate("text",
x = 2, y = 25,
label = "beta[0] + beta[1]*x",
parse = TRUE,
size = 4) +
labs(title = "数学表达式注释")
3.9.2 通过嵌套为图形做注释
r
# 嵌套注释:使用ggpubr或patchwork包组合多个图形
# 安装所需包
# install.packages(c("patchwork", "ggpubr"))
library(patchwork)
library(ggpubr)
# 创建主图和子图
main_plot <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point(size = 2) +
theme_minimal() +
labs(title = "主图:鸢尾花数据")
# 创建缩略图(小图)
inset_plot <- ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_boxplot() +
theme_minimal() +
theme(legend.position = "none") +
labs(title = "箱线图摘要")
# 方法1:使用patchwork嵌套
main_plot +
inset_element(inset_plot, # 嵌套图形
left = 0.7, # 左边界位置(0-1)
bottom = 0.65, # 下边界位置
right = 0.95, # 右边界位置
top = 0.95) + # 上边界位置
labs(title = "嵌套注释:主图+缩略图")
# 方法2:使用annotation_custom
# 将ggplot对象转换为grob对象
library(grid)
inset_grob <- ggplotGrob(inset_plot)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point(size = 2) +
annotation_custom(grob = inset_grob, # 添加grob对象
xmin = 6.5, # x轴左边界
xmax = 8, # x轴右边界
ymin = 3.5, # y轴下边界
ymax = 4.5) + # y轴上边界
labs(title = "annotation_custom:嵌套ggplot对象")
# 方法3:使用ggpubr::annotate_figure()
p_main <- ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
p_inset1 <- ggplot(mtcars, aes(x = mpg)) +
geom_histogram(bins = 20, fill = "lightblue") +
theme_void()
p_inset2 <- ggplot(mtcars, aes(x = wt)) +
geom_histogram(bins = 20, fill = "lightgreen") +
theme_void()
# 组合图形
annotate_figure(
p_main,
fig.lab = "图1:主要分析结果", # 图形标签
fig.lab.face = "bold",
fig.lab.size = 14,
top = text_grob("完整数据分析报告", # 顶部标题
face = "bold",
size = 16),
bottom = text_grob("数据来源:mtcars数据集",
size = 10),
left = text_grob("纵轴:油耗",
rot = 90,
size = 10),
right = text_grob("横轴:重量",
rot = -90,
size = 10)
)
3.9.3 为坐标轴添加对数刻度线
r
# 对数坐标轴的刻度线设置
# 1. 基础对数坐标
df_log <- data.frame(
x = 1:100,
y = 10^(1:100 / 20) + rnorm(100, 0, 5)
)
# 对数坐标轴
ggplot(df_log, aes(x = x, y = y)) +
geom_point(alpha = 0.5) +
scale_y_log10() + # y轴对数变换
labs(title = "对数坐标轴(y轴)",
y = "对数刻度")
# 2. 自定义对数刻度标签
ggplot(df_log, aes(x = x, y = y)) +
geom_point(alpha = 0.5) +
scale_y_log10(
breaks = 10^(-2:5), # 自定义断点
labels = scales::math_format(10^.x) # 数学格式标签
) +
annotation_logticks(sides = "l") + # 添加对数刻度线(左侧)
labs(title = "自定义对数刻度",
y = expression(log[10](y)))
# 3. 双对数坐标轴
ggplot(df_log, aes(x = x, y = y)) +
geom_point(alpha = 0.5, color = "steelblue") +
scale_x_log10(breaks = c(1, 2, 5, 10, 20, 50, 100)) +
scale_y_log10(breaks = 10^(-2:5)) +
annotation_logticks(sides = "bl") + # 添加对数刻度线(底部和左侧)
labs(title = "双对数坐标轴",
x = "对数刻度(x)",
y = "对数刻度(y)") +
theme_minimal()
# 4. 使用trans参数自定义变换
library(scales)
ggplot(df_log, aes(x = x, y = y)) +
geom_point() +
scale_y_continuous(trans = log_trans(), # 使用trans对象
breaks = log_breaks()) +
labs(title = "使用trans参数的变换")
3.10 页面布局与保存
3.10.1 页面布局
r
# 多图形组合布局
# 1. 使用patchwork包(推荐)
# install.packages("patchwork")
library(patchwork)
# 创建多个图形
p1 <- ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
labs(title = "散点图")
p2 <- ggplot(mtcars, aes(x = mpg)) +
geom_histogram(bins = 20, fill = "lightblue") +
labs(title = "直方图")
p3 <- ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_boxplot(fill = "lightgreen") +
labs(title = "箱线图")
p4 <- ggplot(mtcars, aes(x = mpg, y = ..density..)) +
geom_density(fill = "orange", alpha = 0.5) +
labs(title = "密度图")
# 简单并排
p1 + p2 # 水平排列
# 垂直排列
p1 / p2 # 垂直排列
# 复杂布局
(p1 | p2) / # 第一行:p1和p2并排
(p3 | p4) # 第二行:p3和p4并排
# 自定义布局区域
layout_design <- "
AABB
AABB
CCDD
CCDD
"
p1 + p2 + p3 + p4 +
plot_layout(design = layout_design) +
plot_annotation(title = "自定义布局设计")
# 添加标签
(p1 | p2) / (p3 | p4) +
plot_annotation(title = "综合分析报告",
subtitle = "mtcars数据集分析",
tag_levels = "A") # 自动添加标签A、B、C、D
# 2. 使用gridExtra包
# install.packages("gridExtra")
library(gridExtra)
# 并排排列
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)
# 自定义宽度和高度
grid.arrange(p1, p2, p3, p4,
widths = c(1, 2), # 列宽比例
heights = c(1, 1.5), # 行高比例
top = "主标题",
bottom = "数据来源:mtcars")
# 3. 使用cowplot包
# install.packages("cowplot")
library(cowplot)
# 创建带标签的图形组合
plot_grid(p1, p2, p3, p4,
labels = c("A", "B", "C", "D"),
ncol = 2,
label_size = 12)
# 带标题的组合
plot_grid(
p1, p2, p3, p4,
ncol = 2,
align = "hv" # 水平和垂直对齐
) %>%
ggdraw() + # 添加标题
draw_label("综合分析报告",
x = 0.5, y = 0.98,
fontface = "bold",
size = 16)
3.10.2 保存图形
r
# 保存ggplot2图形的方法
# 方法1:ggsave()函数(最常用)
p <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point(size = 2) +
theme_minimal()
# 保存为PNG
ggsave("plot.png", # 文件名
plot = p, # 图形对象
width = 8, # 宽度(英寸)
height = 6, # 高度(英寸)
dpi = 300) # 分辨率
# 保存为PDF
ggsave("plot.pdf", p, width = 8, height = 6)
# 保存为JPEG
ggsave("plot.jpg", p, width = 8, height = 6, dpi = 300)
# 保存为SVG(矢量图)
ggsave("plot.svg", p, width = 8, height = 6)
# 保存为TIFF(适合出版)
ggsave("plot.tiff", p, width = 8, height = 6, dpi = 300, compression = "lzw")
# 方法2:直接保存最后显示的图形
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
ggsave("last_plot.png") # 保存最后显示的图形
# 方法3:使用设备函数
png("device_plot.png", width = 800, height = 600, res = 96)
print(p) # 打印图形到设备
dev.off() # 关闭设备
# 方法4:保存多个图形到PDF
pdf("multi_page.pdf", width = 10, height = 8)
for(i in unique(iris$Species)) {
p_sub <- ggplot(iris[iris$Species == i, ],
aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point(color = "steelblue") +
labs(title = paste("Species:", i))
print(p_sub)
}
dev.off()
# 方法5:保存高分辨率图形
ggsave("high_res_plot.png",
p,
width = 12, # 英寸
height = 8,
dpi = 600, # 高DPI
units = "in", # 单位:英寸
device = "png")
# 方法6:保存带透明背景的图形
ggsave("transparent_plot.png",
p,
bg = "transparent", # 透明背景
width = 8,
height = 6)
# 方法7:调整保存时的图形尺寸单位
ggsave("cm_plot.png",
p,
width = 20, # 厘米
height = 15,
units = "cm", # 单位:厘米
dpi = 300)
# 方法8:保存组合图形
# 使用patchwork组合后保存
combined_plot <- (p1 + p2) / (p3 + p4)
ggsave("combined_plot.png",
combined_plot,
width = 12,
height = 10,
dpi = 300)
# 方法9:批量保存多个图形
plots <- list(
plot1 = ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point(),
plot2 = ggplot(mtcars, aes(x = mpg)) + geom_histogram(bins = 20),
plot3 = ggplot(mtcars, aes(x = factor(cyl), y = mpg)) + geom_boxplot()
)
# 循环保存
for(i in names(plots)) {
ggsave(paste0(i, ".png"),
plots[[i]],
width = 8,
height = 6,
dpi = 300)
}
# 方法10:保存时添加时间戳
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
ggsave(paste0("plot_", timestamp, ".png"), p, width = 8, height = 6)
本章小结
本章全面介绍了ggplot2网格绘图系统的核心知识点:
核心概念总结
- 语法框架:ggplot2基于图形语法,通过图层叠加构建图形
- 美学映射:将数据变量映射到颜色、大小、形状等视觉属性
- 几何对象:定义图形类型(点、线、柱、箱线等)
- 统计变换:对数据进行统计计算(计数、密度、回归等)
- 坐标系统:笛卡儿、极坐标、地理坐标等
- 图形分面:按变量分组创建多个子图
- 标度函数:控制颜色、坐标轴等的映射细节
- 主题系统:自定义图形的非数据元素外观
- 注释功能:添加文本、标签、数学表达式等
- 布局保存:多图形组合和高质量输出
最佳实践建议
r
# 完整的工作流程示例
# 1. 准备数据
library(ggplot2)
library(dplyr)
library(patchwork)
# 数据处理
data_clean <- diamonds %>%
filter(carat < 3, price < 20000) %>%
mutate(price_group = cut(price,
breaks = quantile(price, probs = seq(0, 1, 0.2)),
labels = c("很低", "低", "中", "高", "很高")))
# 2. 创建主图
main_plot <- ggplot(data_clean, aes(x = carat, y = price, color = cut)) +
geom_point(alpha = 0.5, size = 1.5) +
geom_smooth(method = "loess", se = FALSE, size = 0.8) +
scale_color_brewer(palette = "Set1") +
scale_x_log10() +
scale_y_log10() +
labs(title = "钻石价格分析",
x = "克拉数(对数刻度)",
y = "价格(对数刻度)",
color = "切工等级") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
# 3. 创建辅助图
hist_plot <- ggplot(data_clean, aes(x = price, fill = cut)) +
geom_histogram(bins = 40, alpha = 0.6, position = "identity") +
scale_fill_brewer(palette = "Set1") +
labs(title = "价格分布", x = "价格", y = "计数") +
theme_minimal() +
theme(legend.position = "none")
box_plot <- ggplot(data_clean, aes(x = cut, y = price, fill = cut)) +
geom_boxplot() +
scale_fill_brewer(palette = "Set1") +
labs(title = "价格对比", x = "切工", y = "价格") +
theme_minimal() +
theme(legend.position = "none")
# 4. 组合图形
final_plot <- (main_plot + (hist_plot / box_plot)) +
plot_annotation(title = "钻石数据集综合分析报告",
subtitle = "基于切工、克拉数和价格的关系",
caption = "数据来源:ggplot2内置数据集",
theme = theme(plot.title = element_text(hjust = 0.5, size = 16)))
# 5. 显示图形
print(final_plot)
# 6. 保存图形
ggsave("diamond_analysis_report.pdf",
final_plot,
width = 14,
height = 8,
dpi = 300)
print("报告已生成并保存!")
通过本章学习,您应该能够熟练使用ggplot2创建专业、美观的数据可视化图形。