人工智能药物设计和生信常用 R 包一键全自动安装脚本

人工智能药物设计和生信常用 R 包一键全自动安装脚本

R下载地址:https://www.r-project.org/

Rtools下载地址(R安装什么版本下载对应的版本):https://cran.r-project.org/bin/windows/Rtools/

覆盖:scRNA-seq / bulk RNA-seq / 富集分析 / 注释 / 基因组学 / 可视化 / 加速与并行 ,并包含 CRAN + Bioconductor + GitHub 安装、失败重试、日志记录、可选并行)。

用法:把下面整段保存为 install_bio_pkgs.R,然后在 R 里运行:
source("install_bio_pkgs.R")

R 复制代码
###############################################################################
#  Bioinformatics R Packages One-Click Installer (CRAN + Bioconductor + GitHub)
#  - 全自动安装/加载常用生信包(含scRNA-seq/bulk/富集/注释/基因组学/可视化)
#  - 自动重试、日志、可选并行
#  - Windows/macOS/Linux 通用(Windows建议先装Rtools)
###############################################################################

# =========================
# 0) 用户可调配置
# =========================
CONFIG_INSTALL <- list(
  # 是否安装 "重型/可选" 包(单细胞通讯、拟时序等,依赖多、时间久)
  install_heavy_optional = TRUE,

  # 是否安装 GitHub 包(需要网络能访问 GitHub)
  install_github = TRUE,

  # 是否在安装时启用并行(BiocManager支持Ncpus;CRAN安装并行效果有限)
  use_parallel = TRUE,

  # 最大重试次数
  max_retries = 2,

  # CRAN镜像
  cran_repo = "https://cloud.r-project.org",

  # 日志文件
  log_file = "bio_pkg_install.log"
)

# =========================
# 1) 日志与工具函数
# =========================
log_msg <- function(..., level = "INFO") {
  ts <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
  msg <- paste0("[", ts, "] [", level, "] ", paste(..., collapse = ""))
  cat(msg, "\n")
  try(write(msg, file = CONFIG_INSTALL$log_file, append = TRUE), silent = TRUE)
}

n_cores <- function() {
  if (!CONFIG_INSTALL$use_parallel) return(1L)
  x <- try(parallel::detectCores(), silent = TRUE)
  if (inherits(x, "try-error") || is.na(x)) return(1L)
  max(1L, as.integer(x) - 1L)
}

is_installed <- function(pkg) {
  requireNamespace(pkg, quietly = TRUE)
}

ensure_cran_tools <- function() {
  options(repos = c(CRAN = CONFIG_INSTALL$cran_repo))
  # 基础工具包
  for (p in c("remotes", "pkgbuild")) {
    if (!is_installed(p)) {
      install.packages(p, quiet = TRUE)
    }
  }
  invisible(TRUE)
}

ensure_bioc <- function() {
  if (!is_installed("BiocManager")) {
    install.packages("BiocManager", quiet = TRUE)
  }
  invisible(TRUE)
}

install_cran_safe <- function(pkgs) {
  pkgs <- unique(pkgs)
  pkgs <- pkgs[!vapply(pkgs, is_installed, logical(1))]
  if (length(pkgs) == 0) return(invisible(TRUE))

  options(repos = c(CRAN = CONFIG_INSTALL$cran_repo))
  for (pkg in pkgs) {
    ok <- FALSE
    for (attempt in seq_len(CONFIG_INSTALL$max_retries)) {
      log_msg("CRAN安装: ", pkg, " (尝试 ", attempt, "/", CONFIG_INSTALL$max_retries, ")")
      ok <- tryCatch({
        install.packages(pkg, dependencies = TRUE, quiet = TRUE)
        is_installed(pkg)
      }, error = function(e) {
        log_msg("CRAN安装失败: ", pkg, " | ", e$message, level = "WARN")
        FALSE
      })
      if (ok) break
    }
    if (ok) log_msg("✓ CRAN已安装: ", pkg, level = "SUCCESS")
    else    log_msg("✗ CRAN最终失败: ", pkg, level = "ERROR")
  }
  invisible(TRUE)
}

install_bioc_safe <- function(pkgs) {
  ensure_bioc()
  pkgs <- unique(pkgs)
  pkgs <- pkgs[!vapply(pkgs, is_installed, logical(1))]
  if (length(pkgs) == 0) return(invisible(TRUE))

  nc <- n_cores()
  for (pkg in pkgs) {
    ok <- FALSE
    for (attempt in seq_len(CONFIG_INSTALL$max_retries)) {
      log_msg("Bioc安装: ", pkg, " (尝试 ", attempt, "/", CONFIG_INSTALL$max_retries, ", Ncpus=", nc, ")")
      ok <- tryCatch({
        BiocManager::install(pkg, ask = FALSE, update = FALSE, quiet = TRUE, Ncpus = nc)
        is_installed(pkg)
      }, error = function(e) {
        log_msg("Bioc安装失败: ", pkg, " | ", e$message, level = "WARN")
        FALSE
      })
      if (ok) break
    }
    if (ok) log_msg("✓ Bioc已安装: ", pkg, level = "SUCCESS")
    else    log_msg("✗ Bioc最终失败: ", pkg, level = "ERROR")
  }
  invisible(TRUE)
}

install_github_safe <- function(repo_map) {
  if (!CONFIG_INSTALL$install_github) {
    log_msg("已关闭GitHub安装(CONFIG_INSTALL$install_github=FALSE)", level = "SKIP")
    return(invisible(TRUE))
  }
  ensure_cran_tools()

  # repo_map: list(pkg="owner/repo", ...)
  for (pkg in names(repo_map)) {
    if (is_installed(pkg)) {
      log_msg("GitHub包已存在,跳过: ", pkg, level = "SKIP")
      next
    }
    ok <- FALSE
    for (attempt in seq_len(CONFIG_INSTALL$max_retries)) {
      log_msg("GitHub安装: ", pkg, " <- ", repo_map[[pkg]],
              " (尝试 ", attempt, "/", CONFIG_INSTALL$max_retries, ")")
      ok <- tryCatch({
        remotes::install_github(repo_map[[pkg]], dependencies = TRUE, upgrade = "never", quiet = TRUE)
        is_installed(pkg)
      }, error = function(e) {
        log_msg("GitHub安装失败: ", pkg, " | ", e$message, level = "WARN")
        FALSE
      })
      if (ok) break
    }
    if (ok) log_msg("✓ GitHub已安装: ", pkg, level = "SUCCESS")
    else    log_msg("✗ GitHub最终失败: ", pkg, level = "ERROR")
  }
  invisible(TRUE)
}

# =========================
# 2) 常用包清单(可按需删减/增补)
# =========================

# --- CRAN:通用、可视化、加速、工具 ---
PKG_CRAN_CORE <- c(
  "data.table", "dplyr", "tidyr", "tibble", "purrr", "stringr", "readr",
  "ggplot2", "patchwork", "cowplot", "ggrepel", "ggpubr", "ggsci",
  "RColorBrewer", "scales", "pheatmap",
  "Matrix", "matrixStats", "irlba", "uwot", "Rcpp",
  "future", "future.apply", "parallel", "doParallel", "foreach",
  "remotes", "devtools"
)

# --- CRAN:单细胞/Seurat生态常用 ---
PKG_CRAN_SCRNA <- c(
  "Seurat", "SeuratObject", "sctransform",
  "harmony",       # Harmony batch correction(你之前缺的就是它)
  "SoupX",         # ambient RNA
  "NMF",
  "igraph"
)

# --- Bioconductor:基础生信、差异、注释、富集 ---
PKG_BIOC_CORE <- c(
  "BiocManager",
  "SummarizedExperiment", "SingleCellExperiment", "S4Vectors",
  "BiocGenerics", "DelayedArray", "BiocParallel",
  "limma", "edgeR", "DESeq2",
  "GenomicRanges", "IRanges", "GenomeInfoDb",
  "Biostrings", "Rsamtools", "rtracklayer",
  "AnnotationDbi", "biomaRt",
  "org.Hs.eg.db", "org.Mm.eg.db",
  "clusterProfiler", "enrichplot", "DOSE", "ReactomePA", "pathview",
  "GSVA", "GSEABase", "fgsea"
)

# --- Bioconductor:单细胞分析常用 ---
PKG_BIOC_SCRNA <- c(
  "scater", "scran", "scuttle",
  "batchelor",          # 批次校正(MNN等)
  "celldex", "SingleR", # 自动注释
  "scDblFinder",        # doublet识别
  "ComplexHeatmap", "circlize"
)

# --- 可选重型:拟时序/通讯/ATAC等(依赖多,安装慢)---
PKG_BIOC_HEAVY <- c(
  "slingshot", "tradeSeq", "destiny",
  "monocle",     # monocle2(老)
  "monocle3"     # monocle3(Bioc有时版本/依赖会卡,失败也正常)
)

PKG_CRAN_HEAVY <- c(
  "Signac"      # scATAC-seq(依赖Seurat生态+系统库)
)

# --- GitHub:常用但不在CRAN/Bioc(可能会变动,失败可忽略)---
PKG_GITHUB <- list(
  DoubletFinder = "chris-mcginnis-ucsf/DoubletFinder",
  CellChat      = "jinworks/CellChat",
  scRNAtoolVis  = "junjunlab/scRNAtoolVis",
  ClusterGVis   = "junjunlab/ClusterGVis"
)

# =========================
# 3) 开始安装
# =========================
log_msg("===== 生信常用包全自动安装开始 =====", level = "INFO")
log_msg("CRAN镜像: ", CONFIG_INSTALL$cran_repo, level = "INFO")
log_msg("并行核心数: ", n_cores(), level = "INFO")
log_msg("安装重型可选包: ", CONFIG_INSTALL$install_heavy_optional, level = "INFO")
log_msg("安装GitHub包: ", CONFIG_INSTALL$install_github, level = "INFO")

# 先准备工具
ensure_cran_tools()
ensure_bioc()

# CRAN 安装
install_cran_safe(PKG_CRAN_CORE)
install_cran_safe(PKG_CRAN_SCRNA)

# Bioc 安装
install_bioc_safe(PKG_BIOC_CORE)
install_bioc_safe(PKG_BIOC_SCRNA)

# Heavy optional
if (isTRUE(CONFIG_INSTALL$install_heavy_optional)) {
  install_cran_safe(PKG_CRAN_HEAVY)
  install_bioc_safe(PKG_BIOC_HEAVY)
} else {
  log_msg("已跳过重型可选包(install_heavy_optional=FALSE)", level = "SKIP")
}

# GitHub 安装
install_github_safe(PKG_GITHUB)

# =========================
# 4) 安装总结(缺失包列出)
# =========================
ALL_EXPECTED <- unique(c(
  PKG_CRAN_CORE, PKG_CRAN_SCRNA, PKG_BIOC_CORE, PKG_BIOC_SCRNA,
  if (isTRUE(CONFIG_INSTALL$install_heavy_optional)) PKG_BIOC_HEAVY else character(0),
  if (isTRUE(CONFIG_INSTALL$install_heavy_optional)) PKG_CRAN_HEAVY else character(0),
  if (isTRUE(CONFIG_INSTALL$install_github)) names(PKG_GITHUB) else character(0)
))

missing <- ALL_EXPECTED[!vapply(ALL_EXPECTED, is_installed, logical(1))]

log_msg("===== 安装结束 =====", level = "SUCCESS")
log_msg("期望安装包数: ", length(ALL_EXPECTED), level = "INFO")
log_msg("当前仍缺失包数: ", length(missing), level = "INFO")

if (length(missing) > 0) {
  log_msg("缺失包清单(建议复制出来单独排障安装):", level = "WARN")
  cat(paste(missing, collapse = ", "), "\n")
  try(write(paste(missing, collapse = ", "), file = "missing_bio_pkgs.txt"), silent = TRUE)
  log_msg("已写出 missing_bio_pkgs.txt", level = "INFO")
} else {
  log_msg("全部包已成功安装。", level = "SUCCESS")
}

###############################################################################
# 提示:
# 1) Windows 若出现编译失败:请先安装 Rtools,并确保系统PATH正常
# 2) monocle3/Signac/CellChat 等重型包依赖多,失败不罕见,可看日志定位
# 3) 若GitHub访问不稳定,可把 install_github=FALSE 先完成主体安装
###############################################################################

生信差异分析 + 全套作图 + 富集 + 注释 + 常见输入输出"的 全自动安装脚本 ,把你强调的 limma 、差异分析常用(DESeq2/edgeR/DEGreport/EnhancedVolcano...)、作图(ggplot2体系、热图、火山、韦恩、PCA、富集图、网络图)、以及常见工具包(读取写入、注释库、ID转换)都覆盖进去。脚本包含:CRAN + Bioconductor(可选 GitHub)、失败重试、日志、最后输出缺失包清单。

直接复制保存为 install_DEG_all.R,然后:
source("install_DEG_all.R")

R 复制代码
###############################################################################
#  DEG + Plotting + Enrichment Full Installer (CRAN + Bioconductor + optional GH)
#  覆盖:limma/edgeR/DESeq2,火山图/热图/韦恩/PCA/富集可视化/常用注释工具等
###############################################################################

CONFIG_INSTALL <- list(
  cran_repo = "https://cloud.r-project.org",
  max_retries = 2,
  log_file = "install_deg_full.log",
  install_github = FALSE  # 需要再开 TRUE
)

log_msg <- function(..., level = "INFO") {
  ts <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
  msg <- paste0("[", ts, "] [", level, "] ", paste(..., collapse = ""))
  cat(msg, "\n")
  try(write(msg, file = CONFIG_INSTALL$log_file, append = TRUE), silent = TRUE)
}

is_installed <- function(pkg) requireNamespace(pkg, quietly = TRUE)

ensure_tools <- function() {
  options(repos = c(CRAN = CONFIG_INSTALL$cran_repo))
  if (!is_installed("BiocManager")) install.packages("BiocManager", quiet = TRUE)
  if (!is_installed("remotes")) install.packages("remotes", quiet = TRUE)
}

install_cran_safe <- function(pkgs) {
  options(repos = c(CRAN = CONFIG_INSTALL$cran_repo))
  pkgs <- unique(pkgs)
  pkgs <- pkgs[!vapply(pkgs, is_installed, logical(1))]
  if (!length(pkgs)) return(invisible(TRUE))

  for (p in pkgs) {
    ok <- FALSE
    for (i in seq_len(CONFIG_INSTALL$max_retries)) {
      log_msg("CRAN安装: ", p, " (", i, "/", CONFIG_INSTALL$max_retries, ")")
      ok <- tryCatch({
        install.packages(p, dependencies = TRUE, quiet = TRUE)
        is_installed(p)
      }, error = function(e) {
        log_msg("CRAN失败: ", p, " | ", e$message, level = "WARN")
        FALSE
      })
      if (ok) break
    }
    if (ok) log_msg("✓ CRAN成功: ", p, level = "SUCCESS")
    else    log_msg("✗ CRAN最终失败: ", p, level = "ERROR")
  }
  invisible(TRUE)
}

install_bioc_safe <- function(pkgs) {
  ensure_tools()
  pkgs <- unique(pkgs)
  pkgs <- pkgs[!vapply(pkgs, is_installed, logical(1))]
  if (!length(pkgs)) return(invisible(TRUE))

  for (p in pkgs) {
    ok <- FALSE
    for (i in seq_len(CONFIG_INSTALL$max_retries)) {
      log_msg("Bioc安装: ", p, " (", i, "/", CONFIG_INSTALL$max_retries, ")")
      ok <- tryCatch({
        BiocManager::install(p, ask = FALSE, update = FALSE, quiet = TRUE)
        is_installed(p)
      }, error = function(e) {
        log_msg("Bioc失败: ", p, " | ", e$message, level = "WARN")
        FALSE
      })
      if (ok) break
    }
    if (ok) log_msg("✓ Bioc成功: ", p, level = "SUCCESS")
    else    log_msg("✗ Bioc最终失败: ", p, level = "ERROR")
  }
  invisible(TRUE)
}

install_github_safe <- function(repo_map) {
  if (!isTRUE(CONFIG_INSTALL$install_github)) {
    log_msg("已关闭GitHub安装(install_github=FALSE)", level = "SKIP")
    return(invisible(TRUE))
  }
  ensure_tools()
  for (pkg in names(repo_map)) {
    if (is_installed(pkg)) {
      log_msg("GitHub已安装,跳过: ", pkg, level = "SKIP")
      next
    }
    ok <- FALSE
    for (i in seq_len(CONFIG_INSTALL$max_retries)) {
      log_msg("GitHub安装: ", pkg, " <- ", repo_map[[pkg]], " (", i, "/", CONFIG_INSTALL$max_retries, ")")
      ok <- tryCatch({
        remotes::install_github(repo_map[[pkg]], dependencies = TRUE, upgrade = "never", quiet = TRUE)
        is_installed(pkg)
      }, error = function(e) {
        log_msg("GitHub失败: ", pkg, " | ", e$message, level = "WARN")
        FALSE
      })
      if (ok) break
    }
    if (ok) log_msg("✓ GitHub成功: ", pkg, level = "SUCCESS")
    else    log_msg("✗ GitHub最终失败: ", pkg, level = "ERROR")
  }
  invisible(TRUE)
}

# =========================
# 1) 包清单(差异分析 + 作图 + 富集 + 注释 + IO)
# =========================

# ---- CRAN:通用数据处理/IO/绘图 ----
PKG_CRAN <- c(
  # 数据处理与IO
  "data.table", "dplyr", "tidyr", "tibble", "purrr", "stringr", "readr",
  "openxlsx", "writexl", "readxl",

  # ggplot2生态
  "ggplot2", "ggrepel", "ggpubr", "patchwork", "cowplot", "ggsci",
  "scales", "RColorBrewer",

  # 常见统计/模型辅助
  "matrixStats", "Matrix", "reshape2",

  # 热图/聚类/相关可视化(CRAN)
  "pheatmap", "corrplot",

  # 火山图/韦恩图(CRAN版本)
  "ggvenn", "VennDiagram",

  # 图形与排版
  "gridExtra", "svglite",

  # 其他常用
  "magrittr"
)

# ---- Bioconductor:差异分析(你要的"全装好 limma"就在这里)----
PKG_BIOC_DEG <- c(
  "limma",     # bulk差异分析经典
  "edgeR",     # count-based bulk差异
  "DESeq2",    # count-based bulk差异
  "tximport",  # salmon/kallisto等导入
  "apeglm",    # DESeq2 LFC shrink
  "ashr"       # DESeq2 LFC shrink 备选
)

# ---- Bioconductor:注释、ID转换、基因组基础 ----
PKG_BIOC_ANNOT <- c(
  "AnnotationDbi", "biomaRt",
  "org.Hs.eg.db", "org.Mm.eg.db",   # 人/小鼠(你也可以删一个)
  "GenomicRanges", "IRanges", "S4Vectors", "SummarizedExperiment"
)

# ---- Bioconductor:富集分析与可视化(GO/KEGG/Reactome/GSEA等)----
PKG_BIOC_ENRICH <- c(
  "clusterProfiler", "enrichplot", "DOSE",
  "ReactomePA", "pathview",
  "GSEABase", "GSVA", "fgsea"
)

# ---- Bioconductor:高级作图/图形组件(更"生信风")----
PKG_BIOC_PLOT <- c(
  "ComplexHeatmap", "circlize",
  "EnhancedVolcano"   # 很常用的火山图包(Bioc)
)

# ---- 可选 GitHub:一些"锦上添花"的可视化/工具 ----
PKG_GITHUB <- list(
  DEGreport = "lpantano/DEGreport"  # 表达趋势/聚类图等(有时会比Bioc版本新)
)

# =========================
# 2) 开始安装
# =========================
log_msg("===== 差异分析+作图+富集+注释 全自动安装开始 =====", level = "INFO")
log_msg("CRAN repo: ", CONFIG_INSTALL$cran_repo, level = "INFO")

ensure_tools()

# 先装CRAN基础
install_cran_safe(PKG_CRAN)

# 再装Bioconductor(差异分析/注释/富集/作图)
install_bioc_safe(PKG_BIOC_DEG)
install_bioc_safe(PKG_BIOC_ANNOT)
install_bioc_safe(PKG_BIOC_ENRICH)
install_bioc_safe(PKG_BIOC_PLOT)

# 可选 GitHub
install_github_safe(PKG_GITHUB)

# =========================
# 3) 总结缺失
# =========================
ALL_EXPECTED <- unique(c(
  PKG_CRAN, PKG_BIOC_DEG, PKG_BIOC_ANNOT, PKG_BIOC_ENRICH, PKG_BIOC_PLOT,
  if (isTRUE(CONFIG_INSTALL$install_github)) names(PKG_GITHUB) else character(0)
))
missing <- ALL_EXPECTED[!vapply(ALL_EXPECTED, is_installed, logical(1))]

log_msg("===== 安装结束 =====", level = "SUCCESS")
log_msg("期望包数: ", length(ALL_EXPECTED), " | 缺失: ", length(missing), level = "INFO")

if (length(missing)) {
  log_msg("仍缺失包(复制出来单独排错装):", level = "WARN")
  cat(paste(missing, collapse = ", "), "\n")
  try(write(paste(missing, collapse = ", "), file = "missing_deg_pkgs.txt"), silent = TRUE)
  log_msg("已输出 missing_deg_pkgs.txt", level = "INFO")
} else {
  log_msg("全部包已安装成功 ", level = "SUCCESS")
}

###############################################################################
# 常见问题提示:
# - Windows 编译失败:先装 Rtools
# - org.Hs.eg.db / org.Mm.eg.db 很大,下载慢属正常
# - DESeq2/ComplexHeatmap 依赖多,失败看日志定位缺哪个系统库/包
###############################################################################
相关推荐
牛客企业服务2 小时前
牛客CEO叶向宇:从AI工具迈向AI Agent,构建人机协作新关系
大数据·人工智能
catchadmin2 小时前
PHP 8.5 升级生存指南:避免凌晨两点回滚的检查清单
开发语言·php
3824278272 小时前
JS正则表达式实战:核心语法解析
开发语言·前端·javascript·python·html
zh_xuan2 小时前
kotlin伴生对象
开发语言·kotlin
乾元2 小时前
构建你的个人「网络 AI 实验室」——硬件、模拟器与数据集清单
运维·网络·人工智能·网络协议·架构
你怎么知道我是队长2 小时前
C语言---递归
c语言·开发语言
lkbhua莱克瓦242 小时前
机器学习的演进与深度学习的革命
人工智能·深度学习·机器学习
superman超哥2 小时前
实时互动的基石:Rust WebSocket 实现的架构之美
开发语言·rust·编程语言·rust websocket·rust实施互通·rust架构之美
楚来客2 小时前
AI基础概念之九:神经网络单层感知机的基本原理
人工智能·神经网络·cnn