WNN 多模态整合 | Seurat 单细胞多组学整合流程

测试环境:CentOS7.9, R4.3.2, Seurat 4.4.0, SeuratObject 4.1.4

2024.10.23

复制代码
# WNN
library(ggplot2)
library(dplyr)
library(patchwork)

1. 导入数据

(1). load counts of RNA and protein

复制代码
dyn.load('/home/wangjl/.local/lib/libhdf5_hl.so.100')
library(hdf5r)

library(Seurat)
dat=Read10X_h5("/datapool/wangjl/others/hanlu/raw/GSE210079/GSM6459763_32-3mo_raw_feature_bc_matrix.h5")
str(dat)
names(dat) #"Gene Expression"  "Antibody Capture" #两个矩阵:RNA和 55个蛋白

str(dat$`Gene Expression`)
dat$`Gene Expression`[1:4, 1:5]

# make sure cell id are the same
all.equal(colnames(dat[["Gene Expression"]]), colnames(dat[["Antibody Capture"]])) #T

(2). use RNA data to create Obj

复制代码
scRNA=CreateSeuratObject(counts = dat$`Gene Expression`, project = "A1")

(3). add protein mat

复制代码
# https://zhuanlan.zhihu.com/p/567253121
adt_assay <- CreateAssayObject(counts = dat$`Antibody Capture`)
scRNA[["ADT"]] <- adt_assay

# (4). check
# protein names
rownames(scRNA[["ADT"]])

# assays
Assays(scRNA) #"RNA" "ADT"

# check default assay, or change default assay
DefaultAssay(scRNA) #"RNA"

2. 每个模态分别分析

要分别分析到PCA结束。

复制代码
bm=scRNA

## QC ====
bm #655671
bm[["percent.mt"]] <- PercentageFeatureSet(bm, pattern = "^MT-")

# VlnPlot(bm, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

plot1 <- FeatureScatter(bm, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(bm, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
(plot1 + geom_hline(yintercept = 10, linetype=2, color="red") ) + 
  (plot2 + geom_hline(yintercept = c(300, 5000), linetype=2, color="red")) #fig1

Fig1

(1)Filter

复制代码
bm <- subset(bm, subset = nFeature_RNA > 300 & nFeature_RNA < 5000 & percent.mt < 10)
bm #7837

(2)for RNA

复制代码
DefaultAssay(bm) <- 'RNA'
bm <- NormalizeData(bm) %>% FindVariableFeatures(nfeatures = 3000) %>% 
  ScaleData() %>% RunPCA(dims = 1:50)
DimPlot(bm, reduction = 'pca')
ElbowPlot(bm, ndims = 50) #fig2

(3)for protein

复制代码
DefaultAssay(bm) <- 'ADT'
# we will use all ADT features for dimensional reduction
# we set a dimensional reduction name to avoid overwriting the 
VariableFeatures(bm) <- rownames(bm[["ADT"]])
bm <- NormalizeData(bm, normalization.method = 'CLR', margin = 2) %>% 
  ScaleData() %>% RunPCA(reduction.name = 'apca')
ElbowPlot(bm, ndims = 50, reduction = "apca") #fig2

Fig2

3. 整合模态

复制代码
# Identify multimodal neighbors. These will be stored in the neighbors slot, 
# and can be accessed using bm[['weighted.nn']]  加权最近邻
# The WNN graph can be accessed at bm[["wknn"]],  加权knn图
# and the SNN graph used for clustering at bm[["wsnn"]] 加权snn图
# Cell-specific modality weights can be accessed at bm$RNA.weight #模态的权重
bm2=bm
bm2 <- FindMultiModalNeighbors(
  bm,
  reduction.list = list("pca", "apca"),
  dims.list = list(1:30, 1:20)
  #modality.weight.name = c("RNA.weight", "ADT.weight")
  # 模态权重名字 要和 reduction.list 长度一致,否则会使用默认:assay + ".weight"
)
bm2@graphs |> names() #[1] "wknn" "wsnn"

4. 基于wnn的下游分析

(1)UMAP和细胞分群

复制代码
bm2 <- RunUMAP(bm2, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_")
bm2 <- FindClusters(bm2, graph.name = "wsnn", algorithm = 1, resolution = 0.7, verbose = T)
#0.3,0.4,0.6 too small;  0.8 too large;

p1=DimPlot(bm2, reduction = 'wnn.umap', label=T, group.by = 'wsnn_res.0.7') + ggtitle("WNN"); p1 #fig3

Fig3 (same as Fig8)

(2)模态权重:按cluster统计

复制代码
head([email protected])
VlnPlot(bm3, features = c("RNA.weight", "nFeature_RNA", 
                          "ADT.weight", "nFeature_ADT"), 
        group.by = 'wsnn_res.0.7', 
        sort = F, #是否排序
        pt.size = 0, ncol = 2) +
  NoLegend() #Fig3B

# 每个细胞的2个模态中的权重和为1
all( abs(([email protected]$RNA.weight + [email protected]$ADT.weight) -1) < 1e-10) #T 

Fig3B

5. 和单一模态的比较

复制代码
bm3=bm2
DefaultAssay(bm3)="RNA" #RNA
DefaultAssay(bm3) #RNA

(1) 单模态UMAP

复制代码
bm3 <- RunUMAP(bm3, reduction = 'pca', dims = 1:30, assay = 'RNA', 
              reduction.name = 'rna.umap', reduction.key = 'rnaUMAP_')
bm3 <- RunUMAP(bm3, reduction = 'apca', dims = 1:20, assay = 'ADT', 
              reduction.name = 'adt.umap', reduction.key = 'adtUMAP_')
bm3@reductions |> names() #[1] "pca"      "apca"     "wnn.umap" "rna.umap" "adt.umap"


p2 <- DimPlot(bm3, reduction = 'rna.umap', #group.by = 'celltype.l2', 
              label = TRUE, #label.size = 2.5,
              repel = TRUE) + ggtitle("RNA") + NoLegend() 
p3 <- DimPlot(bm3, reduction = 'adt.umap', #group.by = 'celltype.l2', 
              label = TRUE, #label.size = 2.5,
              repel = TRUE) + ggtitle("ADT")+ NoLegend()
p2 + p3 + p1 #Fig3



if(0){
p4 <- FeaturePlot(bm3, features = c("adt_CD45RA","adt_CD14.1","adt_CD161"),
                  reduction = 'wnn.umap', max.cutoff = 2, 
                  cols = c("lightgrey","darkgreen"), ncol = 3)
p5 <- FeaturePlot(bm3, features = c("rna_PTPRC", "rna_CD14", "rna_KLRB1"), 
                  reduction = 'wnn.umap', max.cutoff = 3, ncol = 3)
p4 / p5
}


grep("CD45", bm3@[email protected], value=T) #"CD45RA" "CD45"   "CD4.1"  "CD45RO"
grep("FCGR3A", rownames(bm3@assays$RNA@counts), value=T)
FeatureScatter(bm3, feature1 = "adt_CD4.1", feature2 = "adt_CD8a")
FeatureScatter(bm3, feature1 = "adt_CD45", feature2 = "adt_CD8a") #Fig4

Fig4

复制代码
#RNA UMAP
pC1=FeaturePlot(bm3, features = c("adt_CD45RA","adt_CD45RO", "adt_CD3","adt_CD4.1", "adt_CD8a", "adt_CD19.1"),
                reduction = 'rna.umap', max.cutoff = 2, 
                cols = c("lightgrey","darkgreen"), ncol = 6) & NoLegend(); pC1
pC2=FeaturePlot(bm3, features = c("rna_PTPRC", "rna_CCR7", "rna_CD3D", "rna_CD4", "rna_CD8A", "rna_CD19"),
                reduction = 'rna.umap', max.cutoff = 2, 
                cols = c("lightgrey","navy"), ncol = 6)& NoLegend(); pC2
pC1 / pC2 #Fig5

Fig5

复制代码
#ADT UMAP
pC1=FeaturePlot(bm3, features = c("adt_CD45RA","adt_CD45RO", "adt_CD3","adt_CD4.1", "adt_CD8a", "adt_CD19.1"),
                reduction = 'adt.umap', max.cutoff = 2, 
                cols = c("lightgrey","darkgreen"), ncol = 6) & NoLegend(); pC1
pC2=FeaturePlot(bm3, features = c("rna_PTPRC", "rna_CCR7", "rna_CD3D", "rna_CD4", "rna_CD8A", "rna_CD19"),
                reduction = 'adt.umap', max.cutoff = 2, 
                cols = c("lightgrey","navy"), ncol = 6)& NoLegend(); pC2
pC1 / pC2 #Fig6

Fig6

复制代码
# WNN
pC1=FeaturePlot(bm3, features = c("adt_CD45RA","adt_CD45RO", "adt_CD3","adt_CD4.1", "adt_CD8a", "adt_CD19.1"),
                reduction = 'wnn.umap', max.cutoff = 2, 
                cols = c("lightgrey","darkgreen"), ncol = 6) & NoLegend(); pC1
pC2=FeaturePlot(bm3, features = c("rna_PTPRC", "rna_CCR7", "rna_CD3D", "rna_CD4", "rna_CD8A", "rna_CD19"),
                reduction = 'wnn.umap', max.cutoff = 2, 
                cols = c("lightgrey","navy"), ncol = 6)& NoLegend(); pC2
pC1 / pC2 #Fig7

Fig7 效果似乎不好,CD4+和CD8+依旧不清晰。

也没有其他更优的参数可以调试。

也就是wnn不一定适合所有该类型(RNA + ADT)的样本。

(2) 单模态细胞聚类/cell cluster

复制代码
DefaultAssay(bm3)="RNA"
bm3@graphs |> names() #[1] "wknn" "wsnn"
bm3 <- FindNeighbors(bm3, dims = 1:30, reduction = "pca")
bm3@graphs |> names() ##[1] "wknn"    "wsnn"    "RNA_nn"  "RNA_snn"
bm3 <- FindClusters(bm3, graph.name = "RNA_snn", algorithm = 1, resolution = 0.5)
table([email protected]$RNA_snn_res.0.5)


DefaultAssay(bm3)="ADT"
DefaultAssay(bm3) #ADT
bm3 <- FindNeighbors(bm3, dims = 1:20, reduction = "apca")
bm3@graphs |> names() #[1] "wknn"    "wsnn"    "RNA_nn"  "RNA_snn" "ADT_nn"  "ADT_snn"
bm3 <- FindClusters(bm3, graph.name = "ADT_snn", algorithm = 1, resolution = 0.5)
table([email protected]$ADT_snn_res.0.5)


pB1 <- DimPlot(bm3, reduction = 'rna.umap', group.by = 'RNA_snn_res.0.5', 
              label = TRUE, #label.size = 2.5,
              repel = F) + ggtitle("RNA umap & its cluster")
pB2 <- DimPlot(bm3, reduction = 'adt.umap', group.by = 'ADT_snn_res.0.5', 
              label = TRUE, #label.size = 2.5,
              repel = F) + ggtitle("ADT umap & its cluster")
pB3=DimPlot(bm3, reduction = 'wnn.umap', group.by = 'wsnn_res.0.7', label=T) + ggtitle("WNN");
pB1 + pB2 + pB3 #Fig8

Fig8 (same as Fig3B)

相关推荐
拓端研究室TRL2 小时前
Python贝叶斯回归、强化学习分析医疗健康数据拟合截断删失数据与参数估计3实例
开发语言·人工智能·python·数据挖掘·回归
ArimaMisaki6 小时前
量化策略分类、优劣势及对抗风险解析
人工智能·金融·分类·数据挖掘·游戏策划
用户199701080186 小时前
深入研究:京东图片搜索商品 API 详解
大数据·爬虫·数据挖掘
邢博士谈科教7 小时前
在执行生信分析的时候提示缺少一些R包的报错解决
数据挖掘·r语言
小八四爱吃甜食7 小时前
【R语言绘图】圈图绘制代码
开发语言·r语言
maizeman12611 小时前
R语言——散点图
开发语言·r语言·可视化·散点图
人类群星闪耀时12 小时前
从数据海洋中“淘金”——数据挖掘的魔法与实践
人工智能·数据挖掘
普美瑞生物前沿13 小时前
打分函数分类
人工智能·数据挖掘
SmallFatMan15 小时前
智能客服系统中的意图识别与分类技术详解
大数据·人工智能·ai·数据挖掘·ai编程
数小模.1 天前
R语言进行聚类分析
开发语言·r语言