windows11,idea,java25,javaFx25
下载cmake
下载Visual Studio 的 C++ 构建工具,Visual Studio Community 2022(免费版),勾选 Desktop development with C++
1.打开 Visual Studio(管理员),切换到 whisper.cpp 源代码目录:
c
E:
cd E:\My_Dream\whisper.cpp
2. 创建并配置构建目录(这会在 whisper.cpp 文件夹内创建一个名为 build 的子文件夹)
c
cmake -B build
3.构建项目(使用 -j 可以并行编译;在 Windows 上默认使用所有可用核心)
c
cmake --build build --config Release -j
成功构建后,可执行文件(如 whisper-cli.exe、main.exe 等)将位于 build\bin\Release\ 目录下。

推荐手动下载模型!!!
- 访问 Hugging Face 仓库。
- 找到文件 ggml-small.bin(不是 ggml-small.en.bin,除非您只用英语)。
- 点击下载(或直接链接:https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin)
- 将下载的文件放到您的项目目录下:E:\My_Dream\whisper.cpp\models\ggml-small.bin
- 如果 models 文件夹不存在,先创建它:mkdir models
- 验证:在命令提示符中运行 dir models,确认看到 ggml-small.bin 且文件大小约 488 MB。
重新构建 whisper.cpp(静态链接,避免 DLL 兼容问题)
进入项目目录:
c
cd E:\My_Dream\whisper.cpp
删除旧构建目录(清理):
c
rmdir /s /q build
重新配置并构建(明确指定静态共享库关闭):
c
cmake -B build -DBUILD_SHARED_LIBS=OFF
cmake --build build --config Release -j
第三步:运行测试
c
build\bin\Release\whisper-cli.exe -m models/ggml-small.bin -f samples/jfk.wav
预期输出
c
[00:00:00.000 --> 00:00:11.000] And so, my fellow Americans: ask not what your country can do for you---ask what you can do for your country.
项目添加依赖
xml
<!-- whisper-jni-->
<dependency>
<groupId>io.github.givimad</groupId>
<artifactId>whisper-jni</artifactId>
<version>1.7.1</version>
</dependency>
fxml页面与控制器
xml
<?xml version="1.0" encoding="UTF-8"?>
<?import javafx.geometry.Insets?>
<?import javafx.scene.control.*?>
<?import javafx.scene.layout.*?>
<?import javafx.collections.FXCollections?>
<?import java.lang.String?>
<VBox spacing="15.0" alignment="TOP_CENTER" xmlns="http://javafx.com/javafx" xmlns:fx="http://javafx.com/fxml/1"
fx:controller="com.flower.controller.WhisperTranscribeController"> <!-- 请自行创建对应 Controller -->
<padding>
<Insets top="20" right="30" bottom="20" left="30"/>
</padding>
<Label text="本地语音转文字(Whisper)" style="-fx-font-size: 20px; -fx-font-weight: bold;"/>
<GridPane hgap="10" vgap="12" alignment="CENTER">
<columnConstraints>
<ColumnConstraints hgrow="SOMETIMES" minWidth="120"/>
<ColumnConstraints hgrow="ALWAYS"/>
</columnConstraints>
<!-- 模型路径 -->
<Label text="模型文件:" GridPane.rowIndex="0" GridPane.columnIndex="0"/>
<HBox spacing="8" alignment="CENTER_LEFT" GridPane.rowIndex="0" GridPane.columnIndex="1">
<TextField fx:id="modelPathField" prefWidth="400" promptText="选择 ggml-*.bin 模型文件"/>
<Button text="浏览..." onAction="#chooseModelFile"/>
</HBox>
<!-- 音频文件路径 -->
<Label text="音频文件:" GridPane.rowIndex="1" GridPane.columnIndex="0"/>
<HBox spacing="8" alignment="CENTER_LEFT" GridPane.rowIndex="1" GridPane.columnIndex="1">
<TextField fx:id="audioPathField" prefWidth="400" promptText="支持 wav、mp3 等(会自动转 16k 单声道)"/>
<Button text="浏览..." onAction="#chooseAudioFile"/>
</HBox>
<!-- 语言 -->
<Label text="语言:" GridPane.rowIndex="2" GridPane.columnIndex="0"/>
<ComboBox fx:id="languageCombo" prefWidth="200" GridPane.rowIndex="2" GridPane.columnIndex="1">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="auto"/>
<String fx:value="en"/>
<String fx:value="zh"/>
<String fx:value="ja"/>
<String fx:value="ko"/>
<String fx:value="fr"/>
<String fx:value="de"/>
<String fx:value="es"/>
<!-- 可继续添加 -->
</FXCollections>
</items>
<value><String fx:value="auto"/></value>
</ComboBox>
<!-- 是否翻译 -->
<CheckBox fx:id="translateCheck" text="翻译成英语" GridPane.rowIndex="3" GridPane.columnIndex="1"/>
<!-- 是否时间戳 -->
<CheckBox fx:id="timestampsCheck" text="输出带时间戳(字幕格式)" selected="true" GridPane.rowIndex="4" GridPane.columnIndex="1"/>
<!-- 输出格式 -->
<Label text="输出格式:" GridPane.rowIndex="5" GridPane.columnIndex="0"/>
<ComboBox fx:id="outputFormatCombo" prefWidth="200" GridPane.rowIndex="5" GridPane.columnIndex="1">
<items>
<FXCollections fx:factory="observableArrayList">
<String fx:value="txt"/>
<String fx:value="srt"/>
<String fx:value="vtt"/>
</FXCollections>
</items>
<value><String fx:value="txt"/></value>
</ComboBox>
<!-- 保存地址 -->
<Label text="保存位置:" GridPane.rowIndex="6" GridPane.columnIndex="0"/>
<HBox spacing="8" alignment="CENTER_LEFT" GridPane.rowIndex="6" GridPane.columnIndex="1">
<TextField fx:id="savePathField" prefWidth="400" promptText="选择保存文件或文件夹"/>
<Button text="浏览..." onAction="#chooseSavePath"/>
</HBox>
</GridPane>
<HBox spacing="20" alignment="CENTER">
<Button text="开始转录" fx:id="startButton" style="-fx-font-size: 16px; -fx-padding: 10 20;" onAction="#startTranscription"/>
<ProgressBar fx:id="progressBar" prefWidth="300" visible="false"/>
</HBox>
<Separator/>
<Label text="转录结果:" style="-fx-font-size: 16px;"/>
<TextArea fx:id="resultArea" prefHeight="300" wrapText="true" editable="false"/>
</VBox>
java
package com.flower.controller;
import io.github.givimad.whisperjni.WhisperContext;
import io.github.givimad.whisperjni.WhisperFullParams;
import io.github.givimad.whisperjni.WhisperJNI;
import javafx.application.Platform;
import javafx.concurrent.Task;
import javafx.fxml.FXML;
import javafx.scene.control.*;
import javafx.stage.FileChooser;
import javafx.stage.Stage;
import org.springframework.stereotype.Component;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ResourceBundle;
@Component
public class WhisperTranscribeController extends BaseFormController{
@FXML
private TextField modelPathField;
@FXML private TextField audioPathField;
@FXML private TextField savePathField;
@FXML private ComboBox<String> languageCombo;
@FXML private CheckBox translateCheck;
@FXML private CheckBox timestampsCheck;
@FXML private ComboBox<String> outputFormatCombo;
@FXML private Button startButton;
@FXML private ProgressBar progressBar;
@FXML private TextArea resultArea;
private Stage stage;
private WhisperJNI whisperJNI;
@Override
public void initialize(URL url, ResourceBundle resourceBundle) {
// 初始化语言默认值
languageCombo.getSelectionModel().select("auto");
outputFormatCombo.getSelectionModel().select("txt");
// 初始化 whisper-jni
try {
WhisperJNI.loadLibrary();
whisperJNI = new WhisperJNI();
resultArea.appendText("Whisper JNI 加载成功\n");
resultArea.appendText("系统信息:\n" + whisperJNI.getSystemInfo() + "\n");
} catch (Exception e) {
showAlert("错误", "无法加载 Whisper JNI 库: " + e.getMessage());
e.printStackTrace();
}
// 默认模型路径(可根据您的实际路径修改)
modelPathField.setText("E:/My_Dream/whisper.cpp/models/ggml-small.bin");
}
public void setStage(Stage stage) {
this.stage = stage;
}
@FXML
private void chooseModelFile() {
File file = showFileChooser("选择 Whisper 模型文件", "Whisper 模型 (*.bin)", "*.bin");
if (file != null) {
modelPathField.setText(file.getAbsolutePath());
}
}
/**
* Java Sound API(javax.sound.sampled) 默认只支持有限的音频格式
*/
@FXML
private void chooseAudioFile() {
File file = showFileChooser("选择音频文件", "音频文件", "*.wav");
if (file != null) {
audioPathField.setText(file.getAbsolutePath());
}
}
@FXML
private void chooseSavePath() {
FileChooser fileChooser = new FileChooser();
fileChooser.setTitle("保存转录结果");
String format = outputFormatCombo.getValue();
String ext = format != null ? format : "txt";
fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter(
"转录文件 (*." + ext + ")", "*." + ext));
fileChooser.setInitialFileName("transcription." + ext);
File file = fileChooser.showSaveDialog(stage);
if (file != null) {
savePathField.setText(file.getAbsolutePath());
}
}
private File showFileChooser(String title, String filterName, String... extensions) {
FileChooser fileChooser = new FileChooser();
fileChooser.setTitle(title);
fileChooser.getExtensionFilters().add(new FileChooser.ExtensionFilter(filterName, extensions));
return fileChooser.showOpenDialog(stage);
}
@FXML
private void startTranscription() {
String modelPath = modelPathField.getText().trim();
String audioPath = audioPathField.getText().trim();
String savePath = savePathField.getText().trim();
if (modelPath.isEmpty() || audioPath.isEmpty()) {
showAlert("错误", "请先选择模型文件和音频文件!");
return;
}
startButton.setDisable(true);
progressBar.setVisible(true);
progressBar.setProgress(ProgressBar.INDETERMINATE_PROGRESS);
resultArea.clear();
resultArea.appendText("正在加载模型和处理音频,请稍等...\n");
Task<String> task = new Task<>() {
@Override
protected String call() throws Exception {
Path model = Paths.get(modelPath);
try (WhisperContext ctx = whisperJNI.init(model)) {
float[] samples = loadAudioSamples(audioPath);
WhisperFullParams params = new WhisperFullParams();
String lang = languageCombo.getValue();
params.language = "auto".equals(lang) ? null : lang;
params.translate = translateCheck.isSelected();
params.printTimestamps = timestampsCheck.isSelected();
int result = whisperJNI.full(ctx, params, samples, samples.length);
if (result != 0) {
throw new RuntimeException("Whisper 转录失败,错误码: " + result);
}
int segments = whisperJNI.fullNSegments(ctx);
StringBuilder transcription = new StringBuilder();
for (int i = 0; i < segments; i++) {
String text = whisperJNI.fullGetSegmentText(ctx, i).trim();
long t0 = whisperJNI.fullGetSegmentTimestamp0(ctx, i);
long t1 = whisperJNI.fullGetSegmentTimestamp1(ctx, i);
if (timestampsCheck.isSelected()) {
transcription.append(formatTimestamp(t0))
.append(" --> ")
.append(formatTimestamp(t1))
.append("\n");
}
transcription.append(text).append("\n\n");
}
// 保存文件
if (!savePath.isEmpty()) {
String format = outputFormatCombo.getValue();
String content = switch (format) {
case "srt" -> toSrt(transcription.toString(), segments, ctx);
case "vtt" -> toVtt(transcription.toString());
default -> transcription.toString();
};
saveToFile(savePath, content);
}
return transcription.toString();
}
}
@Override
protected void succeeded() {
Platform.runLater(() -> {
resultArea.appendText("\n转录完成!\n\n");
resultArea.appendText(getValue());
if (!savePath.isEmpty()) {
resultArea.appendText("\n文件已保存至:\n" + savePath + "\n");
}
finishTask();
});
}
@Override
protected void failed() {
Platform.runLater(() -> {
resultArea.appendText("\n转录失败:\n" + getException().getMessage() + "\n");
getException().printStackTrace();
finishTask();
});
}
};
new Thread(task).start();
}
private void finishTask() {
progressBar.setVisible(false);
startButton.setDisable(false);
}
// 与 WhisperTest.java 中相同的音频加载方法
private float[] loadAudioSamples(String audioPathStr) throws IOException, UnsupportedAudioFileException {
File audioFile = new File(audioPathStr);
try (AudioInputStream audioStream = AudioSystem.getAudioInputStream(audioFile)) {
AudioFormat baseFormat = audioStream.getFormat();
AudioFormat targetFormat = new AudioFormat(
AudioFormat.Encoding.PCM_FLOAT,
16000.0f,
32,
1,
4,
16000.0f,
baseFormat.isBigEndian()
);
try (AudioInputStream convertedStream = AudioSystem.getAudioInputStream(targetFormat, audioStream)) {
byte[] bytes = convertedStream.readAllBytes();
ByteBuffer buffer = ByteBuffer.wrap(bytes);
buffer.order(baseFormat.isBigEndian() ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN);
float[] samples = new float[bytes.length / 4];
buffer.asFloatBuffer().get(samples);
return samples;
} catch (IllegalArgumentException e) {
// 回退到 PCM_SIGNED 16-bit
AudioFormat pcmFormat = new AudioFormat(
AudioFormat.Encoding.PCM_SIGNED,
16000.0f,
16,
1,
2,
16000.0f,
baseFormat.isBigEndian()
);
try (AudioInputStream pcmStream = AudioSystem.getAudioInputStream(pcmFormat, audioStream)) {
byte[] pcmBytes = pcmStream.readAllBytes();
float[] samples = new float[pcmBytes.length / 2];
ByteBuffer buffer = ByteBuffer.wrap(pcmBytes);
buffer.order(pcmFormat.isBigEndian() ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN);
for (int i = 0; i < samples.length; i++) {
samples[i] = buffer.getShort() / 32768.0f;
}
return samples;
}
}
}
}
private String formatTimestamp(long ms) {
long hours = ms / 3600000;
long minutes = (ms % 3600000) / 60000;
long seconds = (ms % 60000) / 1000;
long millis = ms % 1000;
return String.format("%02d:%02d:%02d,%03d", hours, minutes, seconds, millis);
}
private String toSrt(String text, int segments, WhisperContext ctx) {
StringBuilder srt = new StringBuilder();
for (int i = 0; i < segments; i++) {
long t0 = whisperJNI.fullGetSegmentTimestamp0(ctx, i);
long t1 = whisperJNI.fullGetSegmentTimestamp1(ctx, i);
String segmentText = whisperJNI.fullGetSegmentText(ctx, i).trim();
srt.append(i + 1).append("\n");
srt.append(formatTimestamp(t0)).append(" --> ").append(formatTimestamp(t1)).append("\n");
srt.append(segmentText).append("\n\n");
}
return srt.toString();
}
private String toVtt(String text) {
return "WEBVTT\n\n" + text.replaceAll("(\\d{2}:\\d{2}:\\d{2},\\d{3}) --> (\\d{2}:\\d{2}:\\d{2},\\d{3})", "$1 --> $2");
}
private void saveToFile(String path, String content) throws IOException {
try (BufferedWriter writer = new BufferedWriter(new FileWriter(path))) {
writer.write(content);
}
}
private void showAlert(String title, String message) {
Alert alert = new Alert(Alert.AlertType.ERROR);
alert.setTitle(title);
alert.setHeaderText(null);
alert.setContentText(message);
alert.showAndWait();
}
@Override
public void resetForm() {
}
@Override
public boolean validateForm() {
return false;
}
}
