基因共表达分析-R-脚本04

R script for tissue-specific coexpression analysis of RNA sequencing dataset. Used for identification of novel XXXX genes involved in natural products biosynthesis.

复制代码
# Import A. belladonna RNA sequencing dataset, downloaded from MSU Medicinal Plant Genomics Resource
logFPKMs <- read.csv(file="/path/to/dataset/aba.matrix.FPKM.vf.082511.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
logFPKMs$X <- NULL
colnames(logFPKMs)[3:13] <- unlist(lapply(logFPKMs[1,c(3:13)], as.character), use.names=FALSE)
logFPKMs <- logFPKMs[-1,]
logFPKMvals <- data.matrix(logFPKMs[,3:13])
rownames(logFPKMvals) <- paste(logFPKMs$Locus.ID, logFPKMs$Unified.Functional.Annotation, logFPKMs$PFAM, sep=">")
logFPKMvals <- 2^logFPKMvals #Using raw reads instead of log-vals

# Identify initial list of oxidoreductase candidates based on PFAM and functional annotations
candidates <- logFPKMvals[grep("PF00106|PF13561|PF08659|PF08240|PF00107|PF00248|PF00465|PF13685|PF13823|PF13602|PF16884|PF00248|alcohol dehydrogenase|aldehyde reductase|short chain|aldo/keto|littorine|hyoscyamine|putrescine|tropinone|tropine", rownames(logFPKMvals), ignore.case=TRUE),]

# Using CYP80F1 littorine monooxygenase as bait gene
baitdata <- colMeans(candidates[grep("littorine", rownames(candidates), ignore.case = TRUE),]) 
model <- apply(candidates, 1, function(x) summary(lm(baitdata~x))$coefficients[,4])
CYP80F1_pvals <- data.frame(p=sapply(model, function(x) x[2]))
CYP80F1_pvals <- na.omit(CYP80F1_pvals[order(CYP80F1_pvals$p), , drop=FALSE])

# Using hyoscyamine 6b-hydroxylase/oxygenase as bait gene
baitdata <- colMeans(candidates[grep("hyoscyamine", rownames(candidates), ignore.case = TRUE),]) 
model <- apply(candidates, 1, function(x) summary(lm(baitdata~x))$coefficients[,4])
H6H_pvals <- data.frame(p=sapply(model, function(x) x[2]))
H6H_pvals <- na.omit(H6H_pvals[order(H6H_pvals$p), , drop=FALSE])

# Take hits for each of the bait genes and trim duplicates, then compute product of the p-values
top_hits <- data.frame(ID=unique(c(rownames(CYP80F1_pvals), rownames(H6H_pvals)))) #compile lists
top_hits$ID <- sub('>.*', '', top_hits$ID) #remove everything but locus IDs from rownames
top_hits$combined_p <- sapply(seq(1:length(top_hits$ID)), function(x) 
  log10(CYP80F1_pvals$p[grep(top_hits$ID[x], rownames(CYP80F1_pvals), ignore.case = TRUE)])
  + log10(H6H_pvals$p[grep(top_hits$ID[x], rownames(H6H_pvals), ignore.case = TRUE)]))
top_hits <- top_hits[order(top_hits$combined_p), , drop=FALSE] #order by combined p value
top_hits <- top_hits[top_hits$combined_p < -1.3,] #drop any with combined P < 0.05 (log10 < -1.3)

# Generate subset of original log2FPKM values with the top_hits candidates
top_hits_FPKMs <- data.matrix(logFPKMs[sapply(top_hits$ID, function(x)
  grep(x, logFPKMs$Locus.ID, ignore.case = TRUE)), 3:13, drop=FALSE])
rownames(top_hits_FPKMs) <- top_hits$ID
top_hits_FPKMs <- 2^top_hits_FPKMs

# OPTIONAL: Normalize top_hits_FPKMs by highest expression level for that gene
norm_FPKMs <- t(apply(top_hits_FPKMs, 1, function(x)(x-min(x))/(max(x)-min(x))))

# Generate heatmap for top hits, non-normalized
library(gplots)
library(RColorBrewer)
heatmap.2(top_hits_FPKMs, key=TRUE, col=colorRampPalette(c('red', 'black', 'green')), Colv=FALSE, dendrogram="row",
          scale="row", margins=c(10, 40), trace="none", sepwidth=c(0,0), density.info = 'none', 
          key.title = NA, colsep=1:ncol(top_hits_FPKMs), rowsep=1:nrow(top_hits_FPKMs), keysize = 1)

# Generate heatmap for top hits, normalized
heatmap.2(norm_FPKMs, key=TRUE, col=colorRampPalette(c('red', 'black', 'green')), Colv=FALSE, dendrogram="row",
          margins=c(10, 30), tracecol="white", trace="none", sepwidth=c(0,0), density.info = 'none', key.title = NA,
          colsep=1:ncol(norm_FPKMs), rowsep=1:nrow(norm_FPKMs), keysize = 1)
相关推荐
泡泡以安9 分钟前
【Android逆向工程】第3章:Java 字节码与 Smali 语法基础
android·java·安卓逆向
毕设源码-朱学姐6 小时前
【开题答辩全过程】以 工厂能耗分析平台的设计与实现为例,包含答辩的问题和答案
java·vue.js
Spring AI学习7 小时前
Spring AI深度解析(9/50):可观测性与监控体系实战
java·人工智能·spring
java1234_小锋8 小时前
Spring IoC的实现机制是什么?
java·后端·spring
xqqxqxxq8 小时前
背单词软件技术笔记(V2.0扩展版)
java·笔记·python
消失的旧时光-19439 小时前
深入理解 Java 线程池(二):ThreadPoolExecutor 执行流程 + 运行状态 + ctl 原理全解析
java·开发语言
哈哈老师啊9 小时前
Springboot学生综合测评系统hxtne(程序+源码+数据库+调试部署+开发环境)带论文文档1万字以上,文末可获取,系统界面在最后面。
java·数据库·spring boot
4311媒体网9 小时前
帝国cms调用文章内容 二开基本操作
java·开发语言·php
zwxu_9 小时前
Nginx NIO对比Java NIO
java·nginx·nio
可观测性用观测云10 小时前
Pyroscope Java 接入最佳实践
java