Linux安装Whisper(C++版)音频解析文本

文中所有相关文件已经上传到资源包中,直接下载

home下创建whisper文件夹

下载
https://github.com/ggerganov/whisper.cpp

上传到whisper目录

解压

下载ffmpeg

cpp 复制代码
wget https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz -O ffmpeg.tar.xz

创建需要的依赖文件夹

cpp 复制代码
mkdir -p offline-rpms && cd offline-rpms

下载依赖的脚本download_rpms.sh

cpp 复制代码
#!/bin/bash
set -e

echo "[INFO] 正在下载离线 RPM 包(CentOS 7.9 + EPEL 3.17.5)..."

BASE_URL="https://mirrors.aliyun.com/centos-vault/7.9.2009/os/x86_64/Packages"
EPEL="https://mirrors.aliyun.com/epel/7/x86_64/Packages"

# 基础编译工具
wget --no-check-certificate -O gcc.rpm "$BASE_URL/gcc-4.8.5-44.el7.x86_64.rpm"
wget --no-check-certificate -O gcc-c++.rpm "$BASE_URL/gcc-c++-4.8.5-44.el7.x86_64.rpm"
wget --no-check-certificate -O make.rpm "$BASE_URL/make-3.82-24.el7.x86_64.rpm"
wget --no-check-certificate -O git.rpm "$BASE_URL/git-1.8.3.1-23.el7_8.x86_64.rpm"
wget --no-check-certificate -O perl-Error.rpm "$BASE_URL/perl-Error-0.17020-2.el7.noarch.rpm"

# cmake3 3.17.5
wget --no-check-certificate -O cmake3.rpm "$EPEL/c/cmake3-3.17.5-1.el7.x86_64.rpm"
wget --no-check-certificate -O cmake3-data.rpm "$EPEL/c/cmake3-data-3.17.5-1.el7.noarch.rpm"

# 依赖(全部使用阿里云 EPEL 中真实存在的版本)
wget --no-check-certificate -O libuv.rpm "$EPEL/l/libuv-1.44.2-1.el7.x86_64.rpm"
wget --no-check-certificate -O jsoncpp.rpm "$EPEL/j/jsoncpp-0.10.5-2.el7.x86_64.rpm"   # ← 关键修正
wget --no-check-certificate -O rhash.rpm "$EPEL/r/rhash-1.3.4-2.el7.x86_64.rpm"

echo "[SUCCESS] 所有 RPM 包下载完成!"
echo "下一步:"
echo "  yum localinstall -y *.rpm"
echo "  ln -sf /usr/bin/cmake3 /usr/bin/cmake"

执行下载脚本下载依赖./download_rpms.sh

构建

cpp 复制代码
 yum localinstall -y *.rpm
或者
rpm -ivh --nodeps *.rpm

cmake链接

cpp 复制代码
ln -sf /usr/bin/cmake3 /usr/bin/cmake
cmake --version

回到上级

cpp 复制代码
cd ..

创建安装脚本 install_whisper.sh

cpp 复制代码
#!/bin/bash
set -e

RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
log() { echo -e "${GREEN}[INFO]${NC} $1"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; }

cd "$(dirname "$0")"

# 检查 root
[ "$EUID" -ne 0 ] && error "请用 root 运行"

if [ -f /opt/rh/devtoolset-9/enable ]; then
    source /opt/rh/devtoolset-9/enable
fi

# =============== 1. 部署 FFmpeg ===============
log "部署 FFmpeg..."
if [ ! -f ffmpeg.tar.xz ]; then
    error "缺少 ffmpeg.tar.xz"
fi

mkdir -p /opt/whisper/{bin,models}
tar -xJf ffmpeg.tar.xz -C /tmp
FF_BIN=$(find /tmp -name ffmpeg -type f | head -n1)
if [ -z "$FF_BIN" ]; then
    error "ffmpeg.tar.xz 中未找到 ffmpeg 可执行文件"
fi
cp "$FF_BIN" /opt/whisper/bin/ffmpeg
chmod +x /opt/whisper/bin/ffmpeg
ln -sf /opt/whisper/bin/ffmpeg /usr/local/bin/ffmpeg
log "FFmpeg 部署完成"

# =============== 2. 编译 whisper.cpp ===============
log "编译 whisper.cpp..."

SRC_DIR=""
if [ -d "whisper.cpp-master" ]; then
    SRC_DIR="whisper.cpp-master"
elif [ -f "whisper.cpp-master.zip" ]; then
    log "解压 whisper.cpp-master.zip..."
    unzip -qo whisper.cpp-master.zip
    SRC_DIR="whisper.cpp-master"
else
    error "缺少 whisper.cpp 源码目录或 ZIP 包"
fi

cd "$SRC_DIR"

# 清理旧构建(可选)
rm -rf build

# 使用 CMake 构建
cmake -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build --config Release -j$(nproc)

# 关键:可执行文件在 build/bin/ 下
BIN_DIR="build/bin"
if [ ! -d "$BIN_DIR" ]; then
    error "构建失败:未找到 build/bin 目录"
fi

# 安装主程序(main -> whisper-cpp)
mkdir -p /opt/whisper/bin /opt/whisper/models
cp "$BIN_DIR/whisper-cli" /opt/whisper/bin/whisper-cpp
chmod +x /opt/whisper/bin/whisper-cpp
ln -sf /opt/whisper/bin/whisper-cpp /usr/local/bin/whisper-cpp

# 复制模型(如果存在)
if ls models/*.bin >/dev/null 2>&1; then
    cp models/*.bin /opt/whisper/models/
    log "模型已复制: $(ls /opt/whisper/models/)"
else
    warn "未在 $SRC_DIR/models/ 中发现 .bin 模型文件,请手动放入"
fi

cd ..

# =============== 3. 创建便捷转写命令 ===============
cat > /usr/local/bin/whisper-transcribe << 'EOF'
#!/bin/bash
if [ $# -lt 2 ]; then
    echo "用法: whisper-transcribe <模型名> <音频文件> [输出文本文件]"
    echo "示例: whisper-transcribe base input.mp3 result.txt"
    exit 1
fi

MODEL="$1"
AUDIO="$2"
OUT="${3:-/dev/stdout}"
BIN="/opt/whisper/bin/whisper-cpp"
MODEL_PATH="/opt/whisper/models/ggml-${MODEL}.bin"

[ ! -f "$MODEL_PATH" ] && { echo "❌ 模型不存在: $MODEL_PATH"; exit 1; }
[ ! -f "$AUDIO" ] && { echo "❌ 音频文件不存在: $AUDIO"; exit 1; }

# 自动转换为 16kHz 单声道 WAV
if [[ "$AUDIO" != *.wav ]]; then
    TMP_WAV="/tmp/whisper_$(basename "$AUDIO" | sha256sum | cut -d' ' -f1).wav"
    /opt/whisper/bin/ffmpeg -y -i "$AUDIO" -ar 16000 -ac 1 -c:a pcm_s16le "$TMP_WAV" >/dev/null 2>&1
    AUDIO="$TMP_WAV"
else
    # 检查采样率和声道(简单处理)
    SR=$(ffprobe -v quiet -select_streams a:0 -show_entries stream=sample_rate -of csv=p=0 "$AUDIO" 2>/dev/null)
    CH=$(ffprobe -v quiet -select_streams a:0 -show_entries stream=channels -of csv=p=0 "$AUDIO" 2>/dev/null)
    if [ "$SR" != "16000" ] || [ "$CH" != "1" ]; then
        TMP_WAV="/tmp/whisper_$(basename "$AUDIO" .wav)_conv.wav"
        /opt/whisper/bin/ffmpeg -y -i "$AUDIO" -ar 16000 -ac 1 -c:a pcm_s16le "$TMP_WAV" >/dev/null 2>&1
        AUDIO="$TMP_WAV"
    fi
fi

# 执行推理并输出文本
"$BIN" -m "$MODEL_PATH" -f "$AUDIO" --output-txt
TXT_FILE="${AUDIO%.wav}.wav.txt"

if [ -f "$TXT_FILE" ]; then
    cat "$TXT_FILE" > "$OUT"
    rm -f "$TMP_WAV" "$TXT_FILE" 2>/dev/null
    echo "✅ 转写完成 → $OUT"
else
    "$BIN" -m "$MODEL_PATH" -f "$AUDIO"
fi
EOF

chmod +x /usr/local/bin/whisper-transcribe

log "✅ Whisper (C++版) 部署成功!"
echo
echo "📌 使用方法:"
echo "  whisper-transcribe base your_audio.mp3 output.txt"
echo
echo "💡 提示:"
echo "  - 模型需放在 /opt/whisper/models/,如 ggml-base.bin"
echo "  - 中文推荐模型:base、small(不要用 .en 结尾的)"
echo "  - 支持 mp3/wav/m4a 等格式,自动转换"

执行安装脚本安装 ./install_whisper.sh

报错c++版本问题

增加一个修复脚本进行修复fix_centos7_repo.sh

cpp 复制代码
#!/bin/bash
set -e

RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
log() { echo -e "${GREEN}[INFO]${NC} $1"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; }

[ "$EUID" -ne 0 ] && error "请以 root 身份运行"

log "检测是否为 CentOS 7..."
if ! grep -q "CentOS Linux 7" /etc/os-release 2>/dev/null; then
    warn "警告:未检测到 CentOS 7,脚本可能不适用。"
    read -p "是否继续?(y/N): " -n 1 -r
    echo
    [[ ! $REPLY =~ ^[Yy]$ ]] && exit 1
fi

log "备份原有 repo 文件..."
mkdir -p /etc/yum.repos.d/backup
mv /etc/yum.repos.d/*.repo /etc/yum.repos.d/backup/ 2>/dev/null || true

log "创建 CentOS 7 Vault Base 源(阿里云镜像)..."
cat > /etc/yum.repos.d/CentOS-Base.repo << 'EOF'
[base]
name=CentOS-7 - Base
baseurl=https://mirrors.aliyun.com/centos-vault/7.9.2009/os/$basearch/
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/centos-vault/RPM-GPG-KEY-CentOS-7
enabled=1

[updates]
name=CentOS-7 - Updates
baseurl=https://mirrors.aliyun.com/centos-vault/7.9.2009/updates/$basearch/
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/centos-vault/RPM-GPG-KEY-CentOS-7
enabled=1

[extras]
name=CentOS-7 - Extras
baseurl=https://mirrors.aliyun.com/centos-vault/7.9.2009/extras/$basearch/
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/centos-vault/RPM-GPG-KEY-CentOS-7
enabled=1
EOF

log "创建 SCL 源(devtoolset),禁用 GPG 检查..."
cat > /etc/yum.repos.d/CentOS-SCL.repo << 'EOF'
[centos-sclo-rh]
name=CentOS-7 - SCLo rh
baseurl=https://mirrors.aliyun.com/centos-vault/7.9.2009/sclo/$basearch/rh/
gpgcheck=0
enabled=1

[centos-sclo-sclo]
name=CentOS-7 - SCLo sclo
baseurl=https://mirrors.aliyun.com/centos-vault/7.9.2009/sclo/$basearch/sclo/
gpgcheck=0
enabled=0
EOF

log "清理并重建 YUM 缓存..."
yum clean all >/dev/null 2>&1
rm -rf /var/cache/yum
yum makecache >/dev/null 2>&1

log "安装 devtoolset-9(GCC 9.x)..."
yum install -y devtoolset-9-gcc devtoolset-9-gcc-c++

log "验证 GCC 版本..."
source /opt/rh/devtoolset-9/enable
gcc_version=$(gcc --version | head -n1)
log "GCC 版本: $gcc_version"

if [[ $gcc_version != *" 9."* ]]; then
    error "GCC 9 未正确安装"
fi

# ==============================
# 🔧 自动生成 whisper 构建脚本
# ==============================
WHISPER_DIR="$(pwd)"
BUILD_SCRIPT="$WHISPER_DIR/build-whisper.sh"

log "生成自动构建脚本: $BUILD_SCRIPT"

cat > "$BUILD_SCRIPT" << 'EOF'
#!/bin/bash
# 自动启用 devtoolset-9 并构建 whisper.cpp
set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"

if [ -f /opt/rh/devtoolset-9/enable ]; then
    source /opt/rh/devtoolset-9/enable
    echo "[INFO] 已启用 devtoolset-9 (GCC $(gcc --version | head -n1 | cut -d' ' -f4))"
else
    echo "[WARN] devtoolset-9 未找到,使用系统默认编译器"
fi

# 执行原始安装脚本
exec ./install_whisper.sh
EOF

chmod +x "$BUILD_SCRIPT"

log "✅ 修复完成!请使用以下命令构建 whisper:"
echo
echo "  cd $WHISPER_DIR"
echo "  ./install_whisper.sh"
echo
echo "💡 此脚本会自动启用 GCC 9,无需手动 source"

执行修复脚本./fix_centos7_repo.sh

再次安装./install_whisper.sh

安装成功后

下载模型(科学上网)
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin

将模型文件放到指定路径 /opt/whisper/models

测试:指定音频解析文本

cpp 复制代码
whisper-transcribe base /home/whisper/0d3b1eab-a6dd-4ef5-9560-4d7583124585.mp3  output.txt

开放API 访问,响应字符串

脚本:install_whisper_api.sh

cpp 复制代码
#!/bin/bash
set -e

RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
log() { echo -e "${GREEN}[INFO]${NC} $1"; }
error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; }

[ "$EUID" -ne 0 ] && error "请用 root 运行"
# 自动启用 devtoolset-9(如果存在)
if [ -f /opt/rh/devtoolset-9/enable ]; then
    source /opt/rh/devtoolset-9/enable
    echo "[INFO] 已启用 devtoolset-9 (GCC $(gcc --version | head -n1 | cut -d' ' -f4))"
fi
WHISPER_SRC="/home/whisper/whisper.cpp-master"
API_BIN="/opt/whisper/bin/whisper-api"   # 自定义名字,没问题
PORT=6688

[ ! -d "$WHISPER_SRC" ] && error "未找到 whisper.cpp 源码目录: $WHISPER_SRC"

cd "$WHISPER_SRC"

log "修改 server.cpp 为纯文本输出..."
# 注意:源文件名仍是 server.cpp,但编译后叫 whisper-server
sed -i 's/res\.set_content(result\.dump(), "application\/json");/res.set_content(text, "text\/plain; charset=utf-8");/' examples/server/server.cpp

log "清理旧构建..."
rm -rf build

log "配置 CMake(启用 SERVER + 内置依赖)..."
cmake -B build \
  -DCMAKE_BUILD_TYPE=Release \
  -DBUILD_SERVER=ON \
  -DWHISPER_USE_LOCAL_LIBS=ON

log "编译 whisper-server(可能需要 1~3 分钟)..."
cmake --build build --config Release -j$(nproc)

# ✅ 关键修正:检查 whisper-server 而不是 server
if [ ! -f "build/bin/whisper-server" ]; then
    error "编译失败:未生成 build/bin/whisper-server"
fi

log "安装 API 服务到 /opt/whisper/bin/whisper-api..."
mkdir -p /opt/whisper/bin
cp build/bin/whisper-server "$API_BIN"
chmod +x "$API_BIN"

log "创建 systemd 服务..."
cat > /etc/systemd/system/whisper-api.service <<EOF
[Unit]
Description=Whisper.cpp Plain-Text API Server
After=network.target

[Service]
Type=simple
User=root
WorkingDirectory=/opt/whisper
ExecStart=$API_BIN -m /opt/whisper/models/ggml-base.bin --port $PORT
Restart=always
RestartSec=5

[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload

log "✅ 部署成功!"

echo
echo "📌 启动服务:"
echo "  systemctl start whisper-api"
echo "  systemctl enable whisper-api"
echo
echo "📌 手动测试:"
echo "  $API_BIN -m /opt/whisper/models/ggml-base.bin --port 6688"
echo
echo "📌 API 调用示例:"
echo "  curl -X POST http://localhost:6688/inference -F \"file=@your_audio.mp3\""

执行脚本./install_whisper_api.sh

启动服务

cpp 复制代码
systemctl start whisper-api

测试:

cpp 复制代码
curl -X POST http://localhost:6688/inference    -F "file=@/home/whisper/0d3b1eab-a6dd-4ef5-9560-4d7583124585.mp3"

我发现这个翻译的不准

换个ggml-small.bin模型
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin

下载好之后放到:/opt/whisper/models/

修改 systemd 服务,使用新模型

cpp 复制代码
vi /etc/systemd/system/whisper-api.service

-m 参数模型改为:

cpp 复制代码
ExecStart=/opt/whisper/bin/whisper-api -m /opt/whisper/models/ggml-small.bin --port 6688

保存后重载并重启:

cpp 复制代码
systemctl daemon-reload
cpp 复制代码
systemctl restart whisper-api

我尝试强制指定语言为中文进行解析

cpp 复制代码
 curl -X POST "http://localhost:6688/inference?language=zh"   -F "file=@/home/whisper/0d3b1eab-a6dd-4ef5-9560-4d7583124585.mp3"

我发现怎么返回的是英文呢

cpp 复制代码
[root@localhost whisper]# curl -X POST "http://localhost:6688/inference?language=zh"   -F "file=@/home/whisper/0d3b1eab-a6dd-4ef5-9560-4d7583124585.mp3"
{"text":" and the image quality is high.\n"}[root@localhost whisper]# 

更改配置指定中文

cpp 复制代码
vi /etc/systemd/system/whisper-api.service
cpp 复制代码
ExecStart=/opt/whisper/bin/whisper-api -m /opt/whisper/models/ggml-small.bin --language zh --port 6688

再次保存重启测试

cpp 复制代码
systemctl daemon-reload
cpp 复制代码
systemctl restart whisper-api
cpp 复制代码
curl -X POST "http://localhost:6688/inference"   -F "file=@/home/whisper/0d3b1eab-a6dd-4ef5-9560-4d7583124585.mp3"

虽然有偏差,但最起码是简体中文了

配置nginx

cpp 复制代码
 location /whisper/ {
    proxy_pass http://172.26.104.237:6688/;  
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
    proxy_connect_timeout 360s;
    proxy_send_timeout 600s;
    proxy_read_timeout 600s;
}

重启后进行测试

cnf 复制代码
http://IP/whisper/inference

请求头设置multipart/form-data
post请求 form-data传参,参数音频文件

相关推荐
Trouvaille ~5 小时前
【LInux】进程程序替换与shell实现:从fork到exec的完整闭环
linux·运维·c语言·c++·ssh·进程替换·基础入门
橘颂TA5 小时前
【Linux】不允许你还不会——信号保存(3)
linux·服务器·网络·数据库
Neolnfra5 小时前
系统敏感安全文件路径
linux·windows·安全·web安全·网络安全·adb·系统安全
麒qiqi5 小时前
Linux 线程(POSIX)核心教程
linux·算法
再遇当年5 小时前
因为研究平台arm,RK3588交叉编译误把我笔记本X86平台的/x86_64-linux-gnu文件删除,导致联想拯救者笔记本中的ubuntu系统损坏
linux·arm开发·ros·gnu·交叉编译·x86
_OP_CHEN5 小时前
【从零开始的Qt开发指南】(十)Qt 常用控件之输入类控件全攻略:7 大控件从入门到实战,覆盖所有输入场景
开发语言·c++·qt·前端开发·qt常用控件·gui图形化界面·qt输入类控件
枫叶丹45 小时前
【Qt开发】Qt窗口(十) -> QInputDialog 输入对话框
c语言·开发语言·数据库·c++·qt
云青山水林5 小时前
算法竞赛从入门到跳楼(ACM-XCPC、蓝桥杯软件赛等)
c++·算法·蓝桥杯
李斯维5 小时前
第14 章 使用 shell:初始化文件
linux·bash·unix