基因共表达分析-R-脚本04

R script for tissue-specific coexpression analysis of RNA sequencing dataset. Used for identification of novel XXXX genes involved in natural products biosynthesis.

复制代码
# Import A. belladonna RNA sequencing dataset, downloaded from MSU Medicinal Plant Genomics Resource
logFPKMs <- read.csv(file="/path/to/dataset/aba.matrix.FPKM.vf.082511.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
logFPKMs$X <- NULL
colnames(logFPKMs)[3:13] <- unlist(lapply(logFPKMs[1,c(3:13)], as.character), use.names=FALSE)
logFPKMs <- logFPKMs[-1,]
logFPKMvals <- data.matrix(logFPKMs[,3:13])
rownames(logFPKMvals) <- paste(logFPKMs$Locus.ID, logFPKMs$Unified.Functional.Annotation, logFPKMs$PFAM, sep=">")
logFPKMvals <- 2^logFPKMvals #Using raw reads instead of log-vals

# Identify initial list of oxidoreductase candidates based on PFAM and functional annotations
candidates <- logFPKMvals[grep("PF00106|PF13561|PF08659|PF08240|PF00107|PF00248|PF00465|PF13685|PF13823|PF13602|PF16884|PF00248|alcohol dehydrogenase|aldehyde reductase|short chain|aldo/keto|littorine|hyoscyamine|putrescine|tropinone|tropine", rownames(logFPKMvals), ignore.case=TRUE),]

# Using CYP80F1 littorine monooxygenase as bait gene
baitdata <- colMeans(candidates[grep("littorine", rownames(candidates), ignore.case = TRUE),]) 
model <- apply(candidates, 1, function(x) summary(lm(baitdata~x))$coefficients[,4])
CYP80F1_pvals <- data.frame(p=sapply(model, function(x) x[2]))
CYP80F1_pvals <- na.omit(CYP80F1_pvals[order(CYP80F1_pvals$p), , drop=FALSE])

# Using hyoscyamine 6b-hydroxylase/oxygenase as bait gene
baitdata <- colMeans(candidates[grep("hyoscyamine", rownames(candidates), ignore.case = TRUE),]) 
model <- apply(candidates, 1, function(x) summary(lm(baitdata~x))$coefficients[,4])
H6H_pvals <- data.frame(p=sapply(model, function(x) x[2]))
H6H_pvals <- na.omit(H6H_pvals[order(H6H_pvals$p), , drop=FALSE])

# Take hits for each of the bait genes and trim duplicates, then compute product of the p-values
top_hits <- data.frame(ID=unique(c(rownames(CYP80F1_pvals), rownames(H6H_pvals)))) #compile lists
top_hits$ID <- sub('>.*', '', top_hits$ID) #remove everything but locus IDs from rownames
top_hits$combined_p <- sapply(seq(1:length(top_hits$ID)), function(x) 
  log10(CYP80F1_pvals$p[grep(top_hits$ID[x], rownames(CYP80F1_pvals), ignore.case = TRUE)])
  + log10(H6H_pvals$p[grep(top_hits$ID[x], rownames(H6H_pvals), ignore.case = TRUE)]))
top_hits <- top_hits[order(top_hits$combined_p), , drop=FALSE] #order by combined p value
top_hits <- top_hits[top_hits$combined_p < -1.3,] #drop any with combined P < 0.05 (log10 < -1.3)

# Generate subset of original log2FPKM values with the top_hits candidates
top_hits_FPKMs <- data.matrix(logFPKMs[sapply(top_hits$ID, function(x)
  grep(x, logFPKMs$Locus.ID, ignore.case = TRUE)), 3:13, drop=FALSE])
rownames(top_hits_FPKMs) <- top_hits$ID
top_hits_FPKMs <- 2^top_hits_FPKMs

# OPTIONAL: Normalize top_hits_FPKMs by highest expression level for that gene
norm_FPKMs <- t(apply(top_hits_FPKMs, 1, function(x)(x-min(x))/(max(x)-min(x))))

# Generate heatmap for top hits, non-normalized
library(gplots)
library(RColorBrewer)
heatmap.2(top_hits_FPKMs, key=TRUE, col=colorRampPalette(c('red', 'black', 'green')), Colv=FALSE, dendrogram="row",
          scale="row", margins=c(10, 40), trace="none", sepwidth=c(0,0), density.info = 'none', 
          key.title = NA, colsep=1:ncol(top_hits_FPKMs), rowsep=1:nrow(top_hits_FPKMs), keysize = 1)

# Generate heatmap for top hits, normalized
heatmap.2(norm_FPKMs, key=TRUE, col=colorRampPalette(c('red', 'black', 'green')), Colv=FALSE, dendrogram="row",
          margins=c(10, 30), tracecol="white", trace="none", sepwidth=c(0,0), density.info = 'none', key.title = NA,
          colsep=1:ncol(norm_FPKMs), rowsep=1:nrow(norm_FPKMs), keysize = 1)
相关推荐
瓯雅爱分享1 天前
Java+Vue构建的采购招投标一体化管理系统,集成招标计划、投标审核、在线竞价、中标公示及合同跟踪功能,附完整源码,助力企业实现采购全流程自动化与规范化
java·mysql·vue·软件工程·源代码管理
mit6.8241 天前
[C# starter-kit] 命令/查询职责分离CQRS | MediatR |
java·数据库·c#
诸神缄默不语1 天前
Maven用户设置文件(settings.xml)配置指南
xml·java·maven
任子菲阳1 天前
学Java第三十四天-----抽象类和抽象方法
java·开发语言
学Linux的语莫1 天前
机器学习数据处理
java·算法·机器学习
找不到、了1 天前
JVM的即时编译JIT的介绍
java·jvm
西瓜er1 天前
JAVA:Spring Boot 集成 FFmpeg 实现多媒体处理
java·spring boot·ffmpeg
你总是一副不开心的样子(´ . .̫ .1 天前
一、十天速通Java面试(第三天)
java·面试·职场和发展·java面试
迎風吹頭髮1 天前
UNIX下C语言编程与实践63-UNIX 并发 Socket 编程:非阻塞套接字与轮询模型
java·c语言·unix
我是华为OD~HR~栗栗呀1 天前
23届考研-Java面经(华为OD)
java·c++·python·华为od·华为·面试