基因共表达分析-R-脚本04

R script for tissue-specific coexpression analysis of RNA sequencing dataset. Used for identification of novel XXXX genes involved in natural products biosynthesis.

复制代码
# Import A. belladonna RNA sequencing dataset, downloaded from MSU Medicinal Plant Genomics Resource
logFPKMs <- read.csv(file="/path/to/dataset/aba.matrix.FPKM.vf.082511.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
logFPKMs$X <- NULL
colnames(logFPKMs)[3:13] <- unlist(lapply(logFPKMs[1,c(3:13)], as.character), use.names=FALSE)
logFPKMs <- logFPKMs[-1,]
logFPKMvals <- data.matrix(logFPKMs[,3:13])
rownames(logFPKMvals) <- paste(logFPKMs$Locus.ID, logFPKMs$Unified.Functional.Annotation, logFPKMs$PFAM, sep=">")
logFPKMvals <- 2^logFPKMvals #Using raw reads instead of log-vals

# Identify initial list of oxidoreductase candidates based on PFAM and functional annotations
candidates <- logFPKMvals[grep("PF00106|PF13561|PF08659|PF08240|PF00107|PF00248|PF00465|PF13685|PF13823|PF13602|PF16884|PF00248|alcohol dehydrogenase|aldehyde reductase|short chain|aldo/keto|littorine|hyoscyamine|putrescine|tropinone|tropine", rownames(logFPKMvals), ignore.case=TRUE),]

# Using CYP80F1 littorine monooxygenase as bait gene
baitdata <- colMeans(candidates[grep("littorine", rownames(candidates), ignore.case = TRUE),]) 
model <- apply(candidates, 1, function(x) summary(lm(baitdata~x))$coefficients[,4])
CYP80F1_pvals <- data.frame(p=sapply(model, function(x) x[2]))
CYP80F1_pvals <- na.omit(CYP80F1_pvals[order(CYP80F1_pvals$p), , drop=FALSE])

# Using hyoscyamine 6b-hydroxylase/oxygenase as bait gene
baitdata <- colMeans(candidates[grep("hyoscyamine", rownames(candidates), ignore.case = TRUE),]) 
model <- apply(candidates, 1, function(x) summary(lm(baitdata~x))$coefficients[,4])
H6H_pvals <- data.frame(p=sapply(model, function(x) x[2]))
H6H_pvals <- na.omit(H6H_pvals[order(H6H_pvals$p), , drop=FALSE])

# Take hits for each of the bait genes and trim duplicates, then compute product of the p-values
top_hits <- data.frame(ID=unique(c(rownames(CYP80F1_pvals), rownames(H6H_pvals)))) #compile lists
top_hits$ID <- sub('>.*', '', top_hits$ID) #remove everything but locus IDs from rownames
top_hits$combined_p <- sapply(seq(1:length(top_hits$ID)), function(x) 
  log10(CYP80F1_pvals$p[grep(top_hits$ID[x], rownames(CYP80F1_pvals), ignore.case = TRUE)])
  + log10(H6H_pvals$p[grep(top_hits$ID[x], rownames(H6H_pvals), ignore.case = TRUE)]))
top_hits <- top_hits[order(top_hits$combined_p), , drop=FALSE] #order by combined p value
top_hits <- top_hits[top_hits$combined_p < -1.3,] #drop any with combined P < 0.05 (log10 < -1.3)

# Generate subset of original log2FPKM values with the top_hits candidates
top_hits_FPKMs <- data.matrix(logFPKMs[sapply(top_hits$ID, function(x)
  grep(x, logFPKMs$Locus.ID, ignore.case = TRUE)), 3:13, drop=FALSE])
rownames(top_hits_FPKMs) <- top_hits$ID
top_hits_FPKMs <- 2^top_hits_FPKMs

# OPTIONAL: Normalize top_hits_FPKMs by highest expression level for that gene
norm_FPKMs <- t(apply(top_hits_FPKMs, 1, function(x)(x-min(x))/(max(x)-min(x))))

# Generate heatmap for top hits, non-normalized
library(gplots)
library(RColorBrewer)
heatmap.2(top_hits_FPKMs, key=TRUE, col=colorRampPalette(c('red', 'black', 'green')), Colv=FALSE, dendrogram="row",
          scale="row", margins=c(10, 40), trace="none", sepwidth=c(0,0), density.info = 'none', 
          key.title = NA, colsep=1:ncol(top_hits_FPKMs), rowsep=1:nrow(top_hits_FPKMs), keysize = 1)

# Generate heatmap for top hits, normalized
heatmap.2(norm_FPKMs, key=TRUE, col=colorRampPalette(c('red', 'black', 'green')), Colv=FALSE, dendrogram="row",
          margins=c(10, 30), tracecol="white", trace="none", sepwidth=c(0,0), density.info = 'none', key.title = NA,
          colsep=1:ncol(norm_FPKMs), rowsep=1:nrow(norm_FPKMs), keysize = 1)
相关推荐
qq_12498707539 分钟前
基于Srpingboot心晴疗愈社平台的设计与实现(源码+论文+部署+安装)
java·数据库·spring boot·spring·microsoft·毕业设计·计算机毕业设计
大爱编程♡10 分钟前
SpringBoot统一功能处理
java·spring boot·后端
leiming634 分钟前
FreeRTOS 的任务与 Linux
java·开发语言
小马爱记录36 分钟前
枚举策略驱动
java
马猴烧酒.1 小时前
【JAVA数据传输】Java 数据传输与转换详解笔记
java·数据库·笔记·tomcat·mybatis
爱编码的傅同学1 小时前
【常见锁的概念】死锁的产生与避免
java·开发语言
x***r1512 小时前
Putty远程管理软件安装步骤详解(附首次连接教程)
windows
rabbit_pro2 小时前
SpringBoot3使用PostGis+GeoTools整合MybatisPlus
java·spring
望眼欲穿的程序猿2 小时前
Ai8051U+DHT11温湿度!
java·开发语言
一只大马猴呀2 小时前
IntelliJ IDEA 中启动项目不显示端口号
java·ide·intellij-idea