Java离线视频提取音频+音频提取文案

需引入依赖javacv、vosk相关依赖,

至于javacv依赖,网上有很多缩减方案,注释部分是可行的缩减方案,至于视频提取视频这里无需安装ffmpeg,只需引入依赖。而vosk需要下载模型方可使用,并且下载比较慢,可先用小模型跑通。

XML 复制代码
    

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <javacv.version>1.5.6</javacv.version>
    <system.windowsx64>windows-x86_64</system.windowsx64>
  </properties>

<!--  javacv+javacpp -->
<!--    <dependency>-->
<!--      <groupId>org.bytedeco</groupId>-->
<!--      <artifactId>javacv</artifactId>-->
<!--      <version>${javacv.version}</version>-->
<!--    </dependency>-->
<!--    <dependency>-->
<!--      <groupId>org.bytedeco</groupId>-->
<!--      <artifactId>javacpp-platform</artifactId>-->
<!--      <version>${javacv.version}</version>-->
<!--    </dependency>-->
<!--    &lt;!&ndash; ffmpeg最小依赖包,必须包含上面的javacv+javacpp核心库 &ndash;&gt;-->
<!--    <dependency>-->
<!--      <groupId>org.bytedeco</groupId>-->
<!--      <artifactId>ffmpeg</artifactId>-->
<!--      <version>4.4-${javacv.version}</version>-->
<!--      <classifier>${system.windowsx64}</classifier>-->
<!--    </dependency>-->
<!--&lt;!&ndash;     最小opencv依赖包 ,必须包含上面的javacv+javacpp&ndash;&gt;-->
<!--        <dependency>-->
<!--          <groupId>org.bytedeco</groupId>-->
<!--          <artifactId>opencv</artifactId>-->
<!--          <version>4.5.1-${javacv.version}</version>-->
<!--          <classifier>${system.windowsx64}</classifier>-->
<!--        </dependency>-->
<!--        <dependency>-->
<!--          <groupId>org.bytedeco</groupId>-->
<!--          <artifactId>openblas</artifactId>-->
<!--          <version>0.3.13-${javacv.version}</version>-->
<!--          <classifier>${system.windowsx64}</classifier>-->
<!--        </dependency>-->
<!--    <dependency>-->
<!--      <groupId>org.bytedeco</groupId>-->
<!--      <artifactId>flycapture</artifactId>-->
<!--      <version>2.13.3.31-${javacv.version}</version>-->
<!--      <classifier>${system.windowsx64}</classifier>-->
<!--    </dependency>-->

  <dependencies>
    <!-- 视频提取音频信息 -->
    <dependency>
      <groupId>org.bytedeco</groupId>
      <artifactId>javacv-platform</artifactId>
      <version>1.5.10</version>
    </dependency>

    <!-- 获取音频信息 -->
    <dependency>
      <groupId>org</groupId>
      <artifactId>jaudiotagger</artifactId>
      <version>2.0.3</version>
    </dependency>
    <dependency>
      <groupId>net.java.dev.jna</groupId>
      <artifactId>jna</artifactId>
      <version>5.13.0</version>
    </dependency>
    <dependency>
      <groupId>com.alphacephei</groupId>
      <artifactId>vosk</artifactId>
      <version>0.3.45</version>
    </dependency>
    <!-- JAVE2(Java音频视频编码器)库是ffmpeg项目上的Java包装器。 -->
    <dependency>
      <groupId>ws.schild</groupId>
      <artifactId>jave-core</artifactId>
      <version>3.1.1</version>
    </dependency>

    <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>fastjson</artifactId>
      <version>1.2.83</version>
    </dependency>
  </dependencies>

视频提取音频

java 复制代码
package org.example;

import org.bytedeco.ffmpeg.global.avcodec;
import org.bytedeco.javacv.FFmpegFrameGrabber;
import org.bytedeco.javacv.FFmpegFrameRecorder;
import org.bytedeco.javacv.Frame;


public class Test {

    public static void extractVoice(String sourceFileName, String audioUrl) throws FFmpegFrameGrabber.Exception, FFmpegFrameRecorder.Exception {
        //抓取资源
        FFmpegFrameGrabber frameGrabber = new FFmpegFrameGrabber(sourceFileName);
        Frame frame = null;
        FFmpegFrameRecorder recorder = null;
        frameGrabber.start();
        //转录为单轨, 16K采样率, wav格式
        recorder = new FFmpegFrameRecorder(audioUrl, frameGrabber.getAudioChannels());
        recorder.setFormat(frameGrabber.getFormat());
        recorder.setSampleRate(frameGrabber.getSampleRate());//frameGrabber.getSampleRate()
        //recorder.setAudioBitrate(128000);// 音频比特率
        recorder.setTimestamp(frameGrabber.getTimestamp());
        recorder.setVideoCodec(avcodec.AV_CODEC_ID_NONE); // 不录制视频

        recorder.start();
        int index = 0;
        while (true) {
            frame = frameGrabber.grabSamples();
            if (frame == null) break;
            if (frame.samples != null) {
                recorder.recordSamples(frame.sampleRate, frame.audioChannels, frame.samples);
                recorder.setTimestamp(frameGrabber.getTimestamp());
            }
            index++;
        }
        recorder.stop();
        recorder.release();
        frameGrabber.stop();
        frameGrabber.release();
    }

    public static void main(String[] args) throws FFmpegFrameGrabber.Exception, FFmpegFrameRecorder.Exception {
        String videoFilePath = "I:\\workspace\\test.mp4"; // 视频文件路径
        String audioOutputPath = "I:\\workspace\\test_audio.wav"; // 输出的音频文件路径
        long s = System.currentTimeMillis();
        extractVoice(videoFilePath, audioOutputPath);
        System.out.println(System.currentTimeMillis() - s);
    }

}

音频提取文字

至于model可去此网站下载,解压使用。大模型下载较慢

VOSK Models

java 复制代码
package org.example;

import com.alibaba.fastjson.JSON;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;

import javax.sound.sampled.*;
import java.io.*;
import java.util.Optional;

public class Test3 {
    public static void main(String[] args) {
        StringBuilder result = new StringBuilder();
        LibVosk.setLogLevel(LogLevel.DEBUG);

            AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 44100, 16, 2, 4, 44100, false);
        DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
        TargetDataLine microphone;
        SourceDataLine speakers;

        try (Model model = new Model("I:\\workspace\\vosk-model-small-cn-0.22");
             InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("I:\\workspace\\test_audio.wav")));
             Recognizer recognizer = new Recognizer(model, 120000)) {
            try {

                microphone = (TargetDataLine) AudioSystem.getLine(info);
                microphone.open(format);
                microphone.start();

                ByteArrayOutputStream out = new ByteArrayOutputStream();
                int numBytesRead;
                int CHUNK_SIZE = 1024;
                int bytesRead = 0;

                DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
                speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
                speakers.open(format);
                speakers.start();
                byte[] b = new byte[4096];

                while (bytesRead <= 100000000) {
                    byte[] audioData = new byte[CHUNK_SIZE];
                    numBytesRead = ais.read(audioData, 0, CHUNK_SIZE);
                    bytesRead += numBytesRead;

                    out.write(audioData, 0, numBytesRead);

                    speakers.write(audioData, 0, numBytesRead);

                    if (recognizer.acceptWaveForm(audioData, numBytesRead)) {
                        result.append(getResult(recognizer.getResult()));
                    } else {
                        result.append(getResult(recognizer.getPartialResult()));
                    }
                }
                result.append(getResult(recognizer.getFinalResult()));
                speakers.drain();
                speakers.close();
                microphone.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
            System.out.println(result.toString());
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (UnsupportedAudioFileException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * 获取返回结果
     *
     * @param result
     * @return
     */
    private static String getResult(String result) {
        VoskResult vr = JSON.parseObject(result,VoskResult.class);
        return  Optional.ofNullable(vr).map(VoskResult::getText).orElse("");

    }

    public static void main1(String[] argv) throws IOException, UnsupportedAudioFileException {
        LibVosk.setLogLevel(LogLevel.DEBUG);
        StringBuilder result = new StringBuilder();
        try (Model model = new Model("I:\\workspace\\vosk-model-small-cn-0.22");
             InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("I:\\workspace\\test_audio.wav")));
             Recognizer recognizer = new Recognizer(model, 120000)) {

            int nbytes;
            byte[] b = new byte[4096];
            while ((nbytes = ais.read(b)) >= 0) {
                if (recognizer.acceptWaveForm(b, nbytes)) {
                    result.append(getResult(recognizer.getResult()));
                } else {
                    result.append(getResult(recognizer.getPartialResult()));
                }
            }
            result.append(getResult(recognizer.getFinalResult()));
        }
        System.out.println(result);
    }
}

感谢网上各位大佬能分享这些信息

测试可行,识别率没有做过对比、大模型也没有试过。这里也就提供一种可行的离线解决方案。

相关推荐
财经资讯数据_灵砚智能几秒前
基于全球经济类多源新闻的NLP情感分析与数据可视化(夜间-次晨)2026年6月7日
大数据·人工智能·python·ai·信息可视化·自然语言处理·灵砚智能
va学弟1 分钟前
Java 网络通信编程(9):从 BIO 到 NIO
java·运维·服务器·网络
凡人叶枫2 分钟前
Effective C++ 条款05:了解 C++ 默默编写并调用哪些函数
java·linux·开发语言·c++·effective c++·编程范式
Full Stack Developme3 分钟前
G1回收器的工作机制
java·jvm
FII工业富联科技服务5 分钟前
智慧园区统一运营平台技术架构解析:全景3D世界模型+视频AI+物联网闭环实践
大数据·人工智能·物联网·3d·ai·制造
砍材农夫6 分钟前
物联网实战:Spring Boot + Netty 搭建 MQTT平台 | 多协议适配与模块化设计
java·spring boot·后端·物联网·spring
云烟成雨TD9 分钟前
Spring AI 1.x 系列【41】接入高德 MCP 服务
java·人工智能·spring
xixixi7777711 分钟前
英伟达 Cosmos3 开源物理世界模型、国内具身智能评测标准落地、宇树冲刺人形机器人第一股|具身智能进入技术、标准、商业化三重爆发期
大数据·人工智能·ai·机器人·开源·英伟达·人形机器人
winlife_12 分钟前
全程用 AI 做一款商业级手游 · EP7 表现层与手感:从“能跑“到“摸起来爽“
java·开发语言·人工智能·unity·ai编程·游戏开发·mcp
千纸鹤の脉搏12 分钟前
多线程的初步使用
java·开发语言·学习·多线程