简介
最近在网上冲浪🏄时,发现了这样一个博客,该博客使用层次聚类方法绘制简笔画。完整代码见仓库。
思想 :加载照片后,使用 imager 包中的
thresold()
函数将值化为黑白图像。之后,随机采样样本并使用聚类算法得到聚类点并进行连接。
教程
安装包
读者需要安装以下包:
r
install.packages("imager")
install.packages("tidyverse")
install.packages("purrr")
加载与处理图形
r
library(imager)
library(tidyverse)
library(purrr)
# 照片位置设定
file="img/frankenstein.jpg"
加载图形,转换为灰度,并过滤和采样图像。
r
load.image(file) %>%
grayscale() %>%
threshold("45%") %>%
as.cimg() %>%
as.data.frame() -> franky
相关函数
clustResultx
:切割树突,并移除仅由一个点形成的簇
r
clustResultx <- function(x) {
clustCut <- tibble(cluster_id = cutree(clusters, x)) %>% bind_cols(data)
clustCut %>% group_by(cluster_id) %>%
summarize(size = n()) %>%
filter(size > 1) %>%
select(cluster_id) %>%
inner_join(clustCut, by = "cluster_id") -> clustCut
return(clustCut)
}
add_segments
:添加比较两个连续聚类结果的结果片段
r
add_segments <- function(x){
df1 <- clustEvol[[x]]
df0 <- clustEvol[[x-1]]
new_points <- anti_join(df1, df0, by = "id")
# If a new point is added to an existing cluster
new_points %>%
inner_join(df1, by = "cluster_id", suffix = c(".1", ".2")) %>%
filter(id.1 != id.2) %>%
mutate(d = sqrt((x.1 - x.2)^2 + (y.1 - y.2)^2)) %>%
group_by(id.1) %>%
arrange(d) %>%
slice(1) %>%
select(p1 = id.1, p2 = id.2) %>%
ungroup -> new_segments1
# If a new 2-points cluster is generated
new_points %>% anti_join(bind_rows(select(new_segments1, id = p1),
select(new_segments1, id = p2)), by = "id") %>%
group_by(cluster_id) %>%
ungroup -> unpaired_points
unpaired_points %>% inner_join(unpaired_points, by = "cluster_id", suffix = c(".1", ".2")) %>%
filter(id.1 < id.2) %>%
select(p1 = id.1, p2 = id.2) -> new_segments2
# If two existing clusters are joined
new_points <- anti_join(df1, df0, by = c("id", "cluster_id"))
new_points %>%
inner_join(df1, by = "cluster_id", suffix = c(".1", ".2")) %>%
filter(id.1 != id.2) %>%
anti_join(new_points, by = c("id.2" = "id")) %>%
mutate(d = sqrt((x.1 - x.2)^2 + (y.1 - y.2)^2)) %>%
arrange(d) %>%
slice(1) %>%
select(p1 = id.1, p2 = id.2) %>%
ungroup -> new_segments3
bind_rows(new_segments1, new_segments2, new_segments3)
}
算法应用
r
# 样本大小
n <- 2500
# 随机抽取图形中的样本
franky %>%
sample_n(n, weight=(1-value)) %>%
select(x,y) %>% mutate(id = row_number()) -> data
# 各点之间距离计算
dist_data <- dist(data %>% select(-id), method = "euclidean")
# 分层聚类
clusters <- hclust(dist_data, method = 'single')
# 列出所有可能的集群,从大到小
nrow(data):1 %>%
map(function(x) clustResultx(x)) -> clustEvol
# Segments of clusters
2:length(clustEvol) %>%
map(function(x) add_segments(x)) %>%
bind_rows() -> segments_id
# Segments in (x, y) and (xend, yend) format
segments_id %>%
inner_join(data, by = c("p1" = "id"), suffix = c(".1", ".2")) %>%
inner_join(data, by = c("p2" = "id"), suffix = c(".1", ".2")) %>%
select(x = x.1, y = y.1, xend = x.2, yend = y.2) -> segments
注意:运行本文所有代码,大概需要花费3-4分钟。请耐性等待~
绘图
r
ggplot(segments) +
geom_curve(aes(x = x, y = y, xend = xend, yend = yend),
ncp = 10) +
scale_x_continuous(expand=c(.1, .1)) +
scale_y_continuous(expand=c(.1, .1), trans = scales::reverse_trans()) +
coord_equal() +
theme_void()
通过修改采样样本数量可以得到不同的图形,例如:
其他例子
小编使用上面代码尝试了其他图形,结果如下: