脚本优化--visium的细胞niche与共定位(R版本)

作者,Evil Genius
今天我们拉群,2025番外--linux、R、python培训https://mp.weixin.qq.com/s/8E1vYJMNhe5m0DXieBHfzA?payreadticket=HM3XzM3dAiSdVyi83oiJ3UWd-zPIOKO1EMMnYwy9ASu_scWj_WEje8Xwiwq_z8JvgcLDfd0&scene=1&click_id=1, 这是所有将来从事生物信息行业的第一课,也是现在入职诺禾、华大等上市公司的必备技能。
今天更新脚本,visium的细胞niche与共定位
单细胞空间联合推荐cell2location, 拿到联合后的结果,注意联合的时候尽量样本要匹配,联合关键的参数在课堂上都已经有详细的讲解。
我们来更新脚本
复制代码
library(compositions)
library(tidyverse)
library(clustree)
library(uwot)
library(scran)
library(cluster)

integrated_compositions = read.csv('cell2location的联合矩阵')
关键的一步,矩阵转换, 这个转换是等距对数比变换 (Isometric Log-Ratio Transformation),是处理比例、百分比等相对组成数据的专业方法。
其中ilr转换核心作用是解决成分数据的闭合性约束问题,使其能够在标准的多元统计方法中使用。

成分数据的根本问题

成分数据具有"闭合性"(总和为常数,如100%),这导致:
1、成分数据具有"闭合性"(总和为常数,如100%):伪相关性问题
2、非欧几里得几何:成分空间是单形空间,不是欧几里得空间,不能直接应用常规统计方法。
ilr转换的核心作用
1、消除闭合约束
2、保持等距性质
3. 实现坐标系统转换
复制代码
# Generate ILR transformation
baseILR <- ilrBase(x = integrated_compositions,method = "basic")

cell_ilr <- as.matrix(ilr(integrated_compositions, baseILR))
colnames(cell_ilr) <- paste0("ILR_", 1:ncol(cell_ilr))

# Make community graph
k_vect <- c(10, 20,30)
k_vect <- set_names(k_vect, paste0("k_",k_vect))

cluster_info <- map(k_vect, function(k) {
  print(k)
  print("Generating SNN")
  snn_graph <- scran::buildSNNGraph(x = t(cell_ilr %>% as.data.frame() %>% as.matrix()), k = k)
  print("Louvain clustering")
  clust.louvain <- igraph::cluster_louvain(snn_graph)
  clusters <- tibble(cluster = clust.louvain$membership,
                     spot_id = rownames(cell_ilr))
})

cluster_info <- cluster_info %>% 
  enframe() %>%
  unnest() %>%
  pivot_wider(names_from = name,
              values_from = cluster)

k_vect <- set_names(names(k_vect))

subsampling_mean_ss <- map(k_vect, function(k) {
  print(k)
  
  cluster_info_summary <- cluster_info %>%
    group_by_at(k) %>%
    summarize(ncells = floor(n() * 0.3))
  
  cells <- cluster_info %>%
    dplyr::select_at(c("spot_id", k)) %>%
    group_by_at(k) %>%
    nest() %>%
    left_join(cluster_info_summary) %>%
    mutate(data = map(data, ~ .x[[1]])) %>%
    mutate(selected_cells = map2(data, ncells, function(dat,n) {
      sample(dat, n)
    })) %>%
    pull(selected_cells) %>%
    unlist()
  
  dist_mat <- dist(cell_ilr[cells, ])
  
  k_vect <- purrr::set_names(cluster_info[[k]], cluster_info[["spot_id"]])[cells]
  
  sil <- cluster::silhouette(x = k_vect, dist = dist_mat)
  
  mean(sil[, 'sil_width'])
  
})

subsampling_mean_ss <- enframe(subsampling_mean_ss) %>% 
  unnest() %>%
  dplyr::filter()

niche_resolution <- dplyr::filter(subsampling_mean_ss, 
                       value == max(value)) %>%
  pull(name)

plt <- comp_umap %>%
  ggplot(aes(x = V1, y = V2, 
             color = as.character(k_50))) +
  ggrastr::geom_point_rast(size = 0.1) +
  theme_classic() +
  xlab("UMAP1") +
  ylab("UMAP2") +
  guides(colour = guide_legend(override.aes = list(size=4)))

plot(plt)

cts <- set_names(colnames(integrated_compositions))

walk(cts, function(ct){
  
  plot_df <- comp_umap %>%
    mutate(ct_prop = log_comps[ , ct])
  
  plt <- plot_df %>%
    ggplot(aes(x = V1, y = V2, 
               color = ct_prop)) +
    ggrastr::geom_point_rast(size = 0.07) +
    theme_classic() +
    ggtitle(ct) +
    xlab("UMAP1") +
    ylab("UMAP2")
  
  plot(plt)
  
})

dev.off()

# Make the niche annotation meta-data

cluster_info <- comp_umap %>%
  dplyr::select(c("row_id","k_50")) %>%
  dplyr::rename("niche" = k_50) %>%
  dplyr::mutate(ct_niche = paste0("niche_", niche))

niche_summary_pat <- integrated_compositions %>%
  as.data.frame() %>%
  rownames_to_column("row_id") %>%
  pivot_longer(-row_id,values_to = "ct_prop", 
               names_to = "cell_type") %>%
  left_join(cluster_info) %>%
  mutate(orig.ident = strsplit(row_id, "[..]") %>%
           map_chr(., ~ .x[1])) %>%
  group_by(orig.ident, ct_niche, cell_type) %>%
  summarize(median_ct_prop = median(ct_prop))
  
niche_summary <- niche_summary_pat %>%
  ungroup() %>%
  group_by(ct_niche, cell_type) %>%
  summarise(patient_median_ct_prop = median(median_ct_prop))

# Data manipulation to have clustered data

niche_summary_mat <- niche_summary %>%
  pivot_wider(values_from = patient_median_ct_prop, 
              names_from =  cell_type, values_fill = 0) %>%
  column_to_rownames("ct_niche") %>%
  as.matrix()

niche_order <- hclust(dist(niche_summary_mat))
niche_order <- niche_order$labels[niche_order$order]

ct_order <- hclust(dist(t(niche_summary_mat)))
ct_order <- ct_order$labels[ct_order$order]

# Find characteristic cell types of each niche
# We have per patient the proportion of each cell-type in each niche

run_wilcox_up <- function(prop_data) {
  
  prop_data_group <- prop_data[["ct_niche"]] %>%
    unique() %>%
    set_names()
  
  map(prop_data_group, function(g) {
    
    test_data <- prop_data %>%
      mutate(test_group = ifelse(ct_niche == g,
                                 "target", "rest")) %>%
      mutate(test_group = factor(test_group,
                                 levels = c("target", "rest")))
    
    wilcox.test(median_ct_prop ~ test_group, 
                data = test_data,
                alternative = "greater") %>%
      broom::tidy()
  }) %>% enframe("ct_niche") %>%
    unnest()
  
}

wilcoxon_res <- niche_summary_pat %>%
  ungroup() %>%
  group_by(cell_type) %>%
  nest() %>%
  mutate(wres = map(data, run_wilcox_up)) %>%
  dplyr::select(wres) %>%
  unnest() %>%
  ungroup() %>%
  mutate(p_corr = p.adjust(p.value))

wilcoxon_res <- wilcoxon_res %>%
  mutate(significant = ifelse(p_corr <= 0.15, "*", ""))

write.table(niche_summary_pat, file = "./results/niche_mapping/ct_niches/niche_summary_pat.txt", 
            col.names = T, row.names = F, quote = F, sep = "\t")

write.table(wilcoxon_res, file = "./results/niche_mapping/ct_niches/wilcoxon_res_cells_niches.txt", 
            col.names = T, row.names = F, quote = F, sep = "\t")

mean_ct_prop_plt <- niche_summary %>%
  left_join(wilcoxon_res, by = c("ct_niche", "cell_type")) %>%
  mutate(cell_type = factor(cell_type, levels = ct_order),
         ct_niche = factor(ct_niche, levels = niche_order)) %>%
  ungroup() %>%
  group_by(cell_type) %>%
  mutate(scaled_pat_median = (patient_median_ct_prop - mean(patient_median_ct_prop))/sd(patient_median_ct_prop)) %>%
  ungroup() %>%
  ggplot(aes(x = cell_type, y = ct_niche, fill = scaled_pat_median)) +
  geom_tile() +
  geom_text(aes(label = significant)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12),
        legend.position = "bottom",
        plot.margin = unit(c(0, 0, 0, 0), "cm"),
        axis.text.y = element_text(size=12)) +
  scale_fill_gradient(high = "#ffd89b", low = "#19547b")
  

# Finally describe the proportions of those niches in all the data
cluster_counts <- cluster_info %>%
  dplyr::select_at(c("row_id", "ct_niche")) %>%
  group_by(ct_niche) %>%
  summarise(nspots = length(ct_niche)) %>%
  mutate(prop_spots = nspots/sum(nspots))

write_csv(cluster_counts, file = "./results/niche_mapping/ct_niches/niche_prop_summary.csv")

barplts <- cluster_counts %>%
  mutate(ct_niche = factor(ct_niche, levels = niche_order)) %>%
  ggplot(aes(y = ct_niche, x = prop_spots)) +
  geom_bar(stat = "identity") +
  theme_classic() + ylab("") +
  theme(axis.text.y = element_blank(),
        plot.margin = unit(c(0, 0, 0, 0), "cm"),
        axis.text.x = element_text(size=12)) 

niche_summary_plt <- cowplot::plot_grid(mean_ct_prop_plt, barplts, align = "hv", axis = "tb")

pdf("./results/niche_mapping/ct_niches/characteristic_ct_niches.pdf", height = 3, width = 6)

plot(niche_summary_plt)

dev.off()

pdf(file = "./results/niche_mapping/ct_niches/niche_summary_pat.pdf", height = 5, width = 8)
niche_summary_pat %>%
  ggplot(aes(x = ct_niche, y  = median_ct_prop)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  facet_wrap(.~cell_type, ncol = 3,scales = "free_y")
dev.off()
生活很好,有你更好
相关推荐
Jmayday7 小时前
python数据分析项目之:房地产数据可视化分析
python·信息可视化·数据分析
成长痕迹15 小时前
【Python与Matlab数据分析对比】
python·matlab·数据分析
学掌门15 小时前
数据分析过程中,发现数值缺失,怎么办?
数据挖掘·数据分析
Jackiejin52615 小时前
WPS表格选择性粘贴快捷键怎么使用?WPS如何给表格某一行都加上指定数字?
数据分析·excel·wps
B站_计算机毕业设计之家16 小时前
python电商商品评论数据分析可视化系统 爬虫 数据采集 Flask框架 NLP情感分析 LDA主题分析 Bayes评论分类(源码) ✅
大数据·hadoop·爬虫·python·算法·数据分析·1024程序员节
Tiger Z18 小时前
R 语言科研绘图第 80 期 --- 词云图
r语言·论文·科研·绘图·研究生
Tiger Z18 小时前
R 语言科研配色 --- 第 94 期 (附免费下载的配色绘图PPT)
r语言·论文·科研·研究生·配色
SelectDB21 小时前
更高效的数据处理解决方案:基于 MinIO 部署 Apache Doris 存算分离版本实践
数据库·数据分析·apache
weixin_307779131 天前
C#程序实现将Teradata的存储过程转换为Azure Synapse Dedicated SQL pool的存储过程
数据库·数据分析·c#·云计算·azure