【单细胞第二节:单细胞示例数据分析-GSE218208】

GSE218208

1.创建Seurat对象

#untar("GSE218208_RAW.tar")

c 复制代码
rm(list = ls())
a = data.table::fread("GSM6736629_10x-PBMC-1_ds0.1974_CountMatrix.tsv.gz",data.table = F)
a[1:4,1:4]
library(tidyverse)
a$`alias:gene` = str_split(a$`alias:gene`,":",simplify = T)[,1]
#str_split_i(a$`alias:gene`,":",i = 1)
a = distinct(a,`alias:gene`,.keep_all = T) #从数据框a中去除alias:gene列中重复的值,同时保留所有列的信息。
a = column_to_rownames(a,var = "alias:gene") #将数据框a中的alias:gene列的值设置为行名(row names),并将alias:gene列从数据框中移除。
a[1:4,1:4]
library(Seurat)
pbmc <- CreateSeuratObject(counts = a, 
                           project = "a", 
                           min.cells = 3, 
                           min.features = 200)
#使用输入的基因表达矩阵a创建一个新的Seurat对象,并设置项目名称为"a",同时过滤掉表达在少于3个细胞中的基因,以及过滤掉表达基因数少于200的细胞。

2.质控

c 复制代码
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
head([email protected], 3)
VlnPlot(pbmc, 
        features = c("nFeature_RNA",
                     "nCount_RNA", 
                     "percent.mt"), 
        ncol = 3,pt.size = 0.5)
pbmc = subset(pbmc,nFeature_RNA < 4200 &
                nCount_RNA < 18000 &
                percent.mt < 18)

3.降维聚类分群

c 复制代码
f = "obj.Rdata"
if(!file.exists(f)){
  pbmc = pbmc %>% 
  NormalizeData() %>%  
  FindVariableFeatures() %>%  
  ScaleData(features = rownames(.)) %>%  
  RunPCA(pc.genes = [email protected])  %>%
  FindNeighbors(dims = 1:15) %>% 
  FindClusters(resolution = 0.5) %>% 
  RunUMAP(dims = 1:15) %>% 
  RunTSNE(dims = 1:15)
  save(pbmc,file = f)
}
load(f)
ElbowPlot(pbmc)
p1 <- DimPlot(pbmc, reduction = "umap",label = T)+NoLegend();p1

4.makergene

c 复制代码
library(dplyr)
f = "markers.Rdata"
if(!file.exists(f)){
  pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE,min.pct = 0.25)
  save(pbmc.markers,file = f)
}
load(f)
mks = pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
g = unique(mks$gene)

5.makergene的可视化

c 复制代码
DoHeatmap(pbmc, features = g) + NoLegend()+
  scale_fill_gradientn(colors = c("#2fa1dd", "white", "#f87669"))

DotPlot(pbmc, features = g,cols = "RdYlBu") +
  RotatedAxis()

VlnPlot(pbmc, features = g[1:3])

FeaturePlot(pbmc, features = g[1:4])

6.注释亚群

手动注释

c 复制代码
a = read.delim("../supp/markers.txt",header = F)
gt = split(a[,2],a[,1])

DotPlot(pbmc, features = gt,cols = "RdYlBu") +
  RotatedAxis()

#利用writeLines(paste0(0:11,",")),自己手动写,打开一新的text file,将writeLines(paste0(0:11,","))的输出写在里边,然后保存在工作目录下,命名为xx.txt

c 复制代码
writeLines(paste0(0:11,","))
celltype = read.table("anno.txt",header = F,sep = ",") #自己照着DotPlot图填的
celltype
new.cluster.ids <- celltype$V2
names(new.cluster.ids) <- levels(pbmc)
seu.obj <- RenameIdents(pbmc, new.cluster.ids)
save(seu.obj,file = "seu.obj.Rdata")
p1 <- DimPlot(seu.obj, 
        reduction = "umap", 
        label = TRUE, 
        pt.size = 0.5) + NoLegend()
p1

自动注释

c 复制代码
library(celldex)
library(SingleR)
ls("package:celldex")
f = "../supp/single_ref/ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <- celldex::BlueprintEncodeData()
  save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = pbmc
test = scRNA@assays$RNA@layers$data
rownames(test) = Features(scRNA)
colnames(test) = Cells(scRNA)
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = [email protected])
pred.scRNA$pruned.labels
#查看注释准确性 
plotScoreHeatmap(pred.scRNA, clusters=pred.scRNA@rownames, fontsize.row = 9,show_colnames = T)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
levels(scRNA)
p2 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p1+p2

可选的celldex包:

c 复制代码
a = 1
save(a,file = "a.Rdata")

b = load("a.Rdata")

b = get(load("a.Rdata")) #load可将a的数值赋值给b
相关推荐
qq_4369621816 分钟前
AI数据分析的利器:解锁BI工具的无限潜力
人工智能·数据挖掘·数据分析·ai数据分析
DarkAthena40 分钟前
【ORACLE】记录一些ORACLE的merge into语句的BUG
数据库·oracle·bug
大新新大浩浩1 小时前
arm64适配系列文章-第三章-arm64环境上mariadb的部署
数据库·arm·mariadb
逾非时1 小时前
MySQL触法器
android·mysql·sqlserver
聪明的墨菲特i1 小时前
SQL进阶知识:九、高级数据类型
xml·数据库·sql·mysql·json·空间数据类型
oioihoii1 小时前
金仓数据库 KingbaseES 产品深度优化提案:迈向卓越的全面升级
数据库·性能优化·金融·金仓数据库 2025 征文·数据库平替用金仓
艺杯羹1 小时前
JDBC 批处理与事务处理:提升数据操作效率与一致性的密钥
数据库·mysql·jdbc·事务处理·批处理数据
珹洺2 小时前
Jsp技术入门指南【十】IDEA 开发环境下实现 MySQL 数据在 JSP 页面的可视化展示,实现前后端交互
java·运维·前端·mysql·intellij-idea·jsp
猫咪-95272 小时前
【金仓数据库征文】——选择金仓,选择胜利
数据库·金仓数据库 2025 征文·数据库平替用金仓
lilye662 小时前
精益数据分析(24/126):聚焦第一关键指标,驱动创业成功
数据挖掘·数据分析