1 利用FUSION方法计算可变剪接sQTL TWAS模型
表型数据为leafcutter软件计算的内含子切除率数据,表型格式为chr1_12227_12595_clu_1_+。
2 运行命令
直接运行一下命令
bash
target_plink=/.../plink
phenotype_file=/.../AS/AS.txt
coordinate_file=/.../AS_coordinate/AS_coordinate.txt
fusion_ldref=/.../LD_reference/GRCh38/EAS/LDREF
fusion_software=/.../fusion_twas
plink=/.../plink
gemma=/.../gemma
gcta=/.../fusion_twas/gcta_nr_robust
output=/.../model_sqtl/
covar=/.../cov.txt
# 第一步构建模型,生成wgt.RDat文件
feature_name="NA"
Rscript ./TWAS_weights_AS.R \
--PLINK_prefix ${target_plink} \
--phenotype_file ${phenotype_file} \
--coordinate_file ${coordinate_file} \
--gene_name ${feature_name} \
--plink ${plink} \
--gcta ${gcta} \
--gemma ${gemma} \
--ld_ref_dir ${fusion_ldref} \
--fusion_software ${fusion_software} \
--output_dir ${output} \
--covar ${covar}
Rscript ./OP_packaging_fusion_weights.R \
--RDat_dir ${output}/Output \
--coordinate_file ${coordinate_file} \
--output_name TWAS_Weights \
--output_dir ${output}/TWAS_weights_package
3 输入文件详情
3.1 plink 为vcf数据,plink格式文件
3.2 AS.txt为内含子剪接率矩阵
格式如下
bash
head -5 AS.txt | awk '{print $1, $2, $3, $4, $5}'
FID IID chr1_12227_12595_clu_1_+ chr1_12227_12613_clu_1_+ chr1_12227_12646_clu_1_+
A1 A1 -1.117206e+00 1.173844e+00 3.630778e-01
A2 A2 8.535833e-01 -4.706559e-01 5.331248e-01
A3 A3 8.869868e-01 -8.779329e-01 -3.090774e-01
A4 A4 0.01870461 -0.12663100 0.01870461
3.3 AS_coordinate.txt文件为内含子位置信息
格式如下
bash
head AS_coordinate.txt
Chr start end ID
1 12227 12595 chr1_12227_12595_clu_1_+
1 12227 12613 chr1_12227_12613_clu_1_+
1 12227 12646 chr1_12227_12646_clu_1_+
1 12697 13221 chr1_12697_13221_clu_2_+
1 12697 13403 chr1_12697_13403_clu_2_+
1 12697 13453 chr1_12697_13453_clu_2_+
1 12697 13483 chr1_12697_13483_clu_2_+
1 12721 13221 chr1_12721_13221_clu_2_+
1 12721 13403 chr1_12721_13403_clu_2_+```
3.4 /.../LD_reference/GRCh38/EAS/LDREF为LD reference所在文件夹
bash
1000G.EAS.10.bed 1000G.EAS.12.map 1000G.EAS.15.bim 1000G.EAS.17.nosex 1000G.EAS.1.fam 1000G.EAS.21.ped 1000G.EAS.3.log 1000G.EAS.6.bed 1000G.EAS.8.map
1000G.EAS.10.bim 1000G.EAS.12.nosex 1000G.EAS.15.fam 1000G.EAS.17.ped 1000G.EAS.1.log 1000G.EAS.22.bed 1000G.EAS.3.map 1000G.EAS.6.bim 1000G.EAS.8.nosex
1000G.EAS.10.fam 1000G.EAS.12.ped 1000G.EAS.15.log 1000G.EAS.18.bed 1000G.EAS.1.map 1000G.EAS.22.bim 1000G.EAS.3.nosex 1000G.EAS.6.fam 1000G.EAS.8.ped
1000G.EAS.10.log 1000G.EAS.13.bed 1000G.EAS.15.map 1000G.EAS.18.bim 1000G.EAS.1.nosex 1000G.EAS.22.fam 1000G.EAS.3.ped 1000G.EAS.6.log 1000G.EAS.9.bed
1000G.EAS.10.map 1000G.EAS.13.bim 1000G.EAS.15.nosex 1000G.EAS.18.fam 1000G.EAS.1.ped 1000G.EAS.22.log 1000G.EAS.4.bed 1000G.EAS.6.map 1000G.EAS.9.bim
1000G.EAS.10.nosex 1000G.EAS.13.fam 1000G.EAS.15.ped 1000G.EAS.18.log 1000G.EAS.20.bed 1000G.EAS.22.map 1000G.EAS.4.bim 1000G.EAS.6.nosex 1000G.EAS.9.fam
1000G.EAS.10.ped 1000G.EAS.13.log 1000G.EAS.16.bed 1000G.EAS.18.map 1000G.EAS.20.bim 1000G.EAS.22.nosex 1000G.EAS.4.fam 1000G.EAS.6.ped 1000G.EAS.9.log
1000G.EAS.11.bed 1000G.EAS.13.map 1000G.EAS.16.bim 1000G.EAS.18.nosex 1000G.EAS.20.fam 1000G.EAS.22.ped 1000G.EAS.4.log 1000G.EAS.7.bed 1000G.EAS.9.map
1000G.EAS.11.bim 1000G.EAS.13.nosex 1000G.EAS.16.fam 1000G.EAS.18.ped 1000G.EAS.20.log 1000G.EAS.2.bed 1000G.EAS.4.map 1000G.EAS.7.bim 1000G.EAS.9.nosex
1000G.EAS.11.fam 1000G.EAS.13.ped 1000G.EAS.16.log 1000G.EAS.19.bed 1000G.EAS.20.map 1000G.EAS.2.bim 1000G.EAS.4.nosex 1000G.EAS.7.fam 1000G.EAS.9.ped
1000G.EAS.11.log 1000G.EAS.14.bed 1000G.EAS.16.map 1000G.EAS.19.bim 1000G.EAS.20.nosex 1000G.EAS.2.fam 1000G.EAS.4.ped 1000G.EAS.7.log
1000G.EAS.11.map 1000G.EAS.14.bim 1000G.EAS.16.nosex 1000G.EAS.19.fam 1000G.EAS.20.ped 1000G.EAS.2.log 1000G.EAS.5.bed 1000G.EAS.7.map
1000G.EAS.11.nosex 1000G.EAS.14.fam 1000G.EAS.16.ped 1000G.EAS.19.log 1000G.EAS.21.bed 1000G.EAS.2.map 1000G.EAS.5.bim 1000G.EAS.7.nosex
1000G.EAS.11.ped 1000G.EAS.14.log 1000G.EAS.17.bed 1000G.EAS.19.map 1000G.EAS.21.bim 1000G.EAS.2.nosex 1000G.EAS.5.fam 1000G.EAS.7.ped
1000G.EAS.12.bed 1000G.EAS.14.map 1000G.EAS.17.bim 1000G.EAS.19.nosex 1000G.EAS.21.fam 1000G.EAS.2.ped 1000G.EAS.5.log 1000G.EAS.8.bed
1000G.EAS.12.bim 1000G.EAS.14.nosex 1000G.EAS.17.fam 1000G.EAS.19.ped 1000G.EAS.21.log 1000G.EAS.3.bed 1000G.EAS.5.map 1000G.EAS.8.bim
1000G.EAS.12.fam 1000G.EAS.14.ped 1000G.EAS.17.log 1000G.EAS.1.bed 1000G.EAS.21.map 1000G.EAS.3.bim 1000G.EAS.5.nosex 1000G.EAS.8.fam
1000G.EAS.12.log 1000G.EAS.15.bed 1000G.EAS.17.map 1000G.EAS.1.bim 1000G.EAS.21.nosex 1000G.EAS.3.fam 1000G.EAS.5.ped 1000G.EAS.8.log
3.5 fusion_twas为fusion安装包所在的位置
3.6 plink、gemma、gcta_nr_robust分别是各软件位置
4 所需要的R程序
4.1 TWAS_weights_AS.R
该文件在资源里寻找
注意:TWAS_weights_AS.R里需要修改t.awk的路径
bash
cat t.awk
NR==1 {
for (i=1; i<=NF; i++) {
ix[$i] = i
}
}
NR>1 {
print $ix[c1], $ix[c2], $ix[c3]
}
4.2 TWAS_weights_AS.R里调用了FUSION.compute_weights.R程序
该文件在资源里寻找
参考:
leafcutter : https://davidaknowles.github.io/leafcutter/
FUSION: http://gusevlab.org/projects/fusion/

