《QT实用小工具·五十七》基于QT的语音识别

1、概述
源码放在文章末尾

该文章实现了简单的语音识别功能,首先,语音识别要做三件事情 :

1.记录用户的语音文件到本地

2.将用户语音编码 使用flac或者speex进行编码

3.使用第三方语音识别API或者SDK进行分析识别语音 目前做的比较简单就是使用flac文件对wav音频文件进行编码 基于Mac OSX和Win 7平台的 win 7下使用flac.exe,具体exe帮助,读者可以使用flac.exe --help > help.txt 重定向到一个help文件中,方便查阅. mac osx下面安装flac.dmg的安装包即可使用flac命令 我们先看音频的录入 Qt集成了音频模块

项目部分代码如下所示:

cpp 复制代码
/*
 * Based on Qt Example
 * PCM2WAV is not mine, I found it in Google and modified it.
 */

#ifndef SPEECHINPUT
#define SPEECHINPUT

#include <QPixmap>
#include <QWidget>
#include <QObject>
#include <QPushButton>
#include <QByteArray>
//#include <Phonon/AudioOutput>
#include <QtMultimedia>
#include <QIODevice>
#include <QFile>

class WavPcmFile : public QFile {
public:
        WavPcmFile(const QString & name, const QAudioFormat & format, QObject *parent = 0);
        bool open();
        void close();

private:
        void writeHeader();
        bool hasSupportedFormat();
        QAudioFormat format;
};

class AudioInfo : public QIODevice
{
        Q_OBJECT
public:
        AudioInfo(const QAudioFormat &format, QObject *parent, const QString &filename = "./data/tmp/speechInput.wav");
        ~AudioInfo();

        void start();
        void stop();

        qreal level() const { return m_level; }

        qint64 readData(char *data, qint64 maxlen);
        qint64 writeData(const char *data, qint64 len);

private:
        const QAudioFormat m_format;
        quint16 m_maxAmplitude;
        qreal m_level; // 0.0 <= m_level <= 1.0

        WavPcmFile * m_file;

signals:
        void update();
};


class RenderArea : public QPushButton
{
        Q_OBJECT

public:
        RenderArea(QWidget *parent = 0);

        void setLevel(qreal value);

protected:
        void paintEvent(QPaintEvent *event);

private:
        qreal m_level;
        QPixmap m_pixmap;
};

#endif
cpp 复制代码
/*
 * Based on Qt Example
 * PCM2WAV is not mine, I found it in Google and modified it.
 */

#include "speechInput.h"

#include <QtEndian>
#include <QDebug>
#include <QPainter>

WavPcmFile::WavPcmFile(const QString & name, const QAudioFormat & format_, QObject *parent_)
        : QFile(name, parent_), format(format_)
{
}

bool WavPcmFile::hasSupportedFormat()
{
        return (format.sampleSize() == 8
                && format.sampleType() == QAudioFormat::UnSignedInt)
                || (format.sampleSize() > 8
                && format.sampleType() == QAudioFormat::SignedInt
                && format.byteOrder() == QAudioFormat::LittleEndian);
}

bool WavPcmFile::open()
{
        if (!hasSupportedFormat()) {
                setErrorString("Wav PCM supports only 8-bit unsigned samples "
                        "or 16-bit (or more) signed samples (in little endian)");
                return false;
        } else {
                if (!QFile::open(ReadWrite | Truncate))
                        return false;
                writeHeader();
                return true;
        }
}

void WavPcmFile::writeHeader()
{
        QDataStream out(this);
        out.setByteOrder(QDataStream::LittleEndian);

        // RIFF chunk
        out.writeRawData("RIFF", 4);
        out << quint32(0); // Placeholder for the RIFF chunk size (filled by close())
        out.writeRawData("WAVE", 4);

        // Format description chunk
        out.writeRawData("fmt ", 4);
        out << quint32(16); // "fmt " chunk size (always 16 for PCM)
        out << quint16(1);  // data format (1 => PCM)
        out << quint16(format.channelCount());
        out << quint32(format.sampleRate());
        out << quint32(format.sampleRate() * format.channelCount()
                * format.sampleSize() / 8 ); // bytes per second
        out << quint16(format.channelCount() * format.sampleSize() / 8); // Block align
        out << quint16(format.sampleSize()); // Significant Bits Per Sample

        // Data chunk
        out.writeRawData("data", 4);
        out << quint32(0);  // Placeholder for the data chunk size (filled by close())

        Q_ASSERT(pos() == 44); // Must be 44 for WAV PCM
}

void WavPcmFile::close()
{
        // Fill the header size placeholders
        quint32 fileSize = size();

        QDataStream out(this);
        // RIFF chunk size
        seek(4);
        out << quint32(fileSize - 8);

        // data chunk size
        seek(40);
        out << quint32(fileSize - 44);

        QFile::close();
}

AudioInfo::AudioInfo(const QAudioFormat &format, QObject *parent, const QString &filename)
        :   QIODevice(parent)
        ,   m_format(format)
        ,   m_maxAmplitude(0)
        ,   m_level(0.0)

{
        switch (m_format.sampleSize()) {
        case 8:
                switch (m_format.sampleType()) {
                case QAudioFormat::UnSignedInt:
                        m_maxAmplitude = 255;
                        break;
                case QAudioFormat::SignedInt:
                        m_maxAmplitude = 127;
                        break;
                default:
                        break;
                }
                break;
        case 16:
                switch (m_format.sampleType()) {
                case QAudioFormat::UnSignedInt:
                        m_maxAmplitude = 65535;
                        break;
                case QAudioFormat::SignedInt:
                        m_maxAmplitude = 32767;
                        break;
                default:
                        break;
                }
                break;
        default:
                break;
        }

        m_file = new WavPcmFile(filename,format,this);

}

AudioInfo::~AudioInfo()
{
}

void AudioInfo::start()
{
        m_file->open();
        open(QIODevice::WriteOnly);
}

void AudioInfo::stop()
{
        close();
        m_file->close();
}

qint64 AudioInfo::readData(char *data, qint64 maxlen)
{
        Q_UNUSED(data)
                Q_UNUSED(maxlen)

                return 0;
}

qint64 AudioInfo::writeData(const char *data, qint64 len)
{
        if (m_maxAmplitude) {
                Q_ASSERT(m_format.sampleSize() % 8 == 0);
                const int channelBytes = m_format.sampleSize() / 8;
                const int sampleBytes = m_format.channelCount() * channelBytes;
                Q_ASSERT(len % sampleBytes == 0);
                const int numSamples = len / sampleBytes;

                quint16 maxValue = 0;
                const unsigned char *ptr = reinterpret_cast<const unsigned char *>(data);

                for (int i = 0; i < numSamples; ++i) {
                        for(int j = 0; j < m_format.channelCount(); ++j) {
                                quint16 value = 0;

                                if (m_format.sampleSize() == 8 && m_format.sampleType() == QAudioFormat::UnSignedInt) {
                                        value = *reinterpret_cast<const quint8*>(ptr);
                                } else if (m_format.sampleSize() == 8 && m_format.sampleType() == QAudioFormat::SignedInt) {
                                        value = qAbs(*reinterpret_cast<const qint8*>(ptr));
                                } else if (m_format.sampleSize() == 16 && m_format.sampleType() == QAudioFormat::UnSignedInt) {
                                        if (m_format.byteOrder() == QAudioFormat::LittleEndian)
                                                value = qFromLittleEndian<quint16>(ptr);
                                        else
                                                value = qFromBigEndian<quint16>(ptr);
                                } else if (m_format.sampleSize() == 16 && m_format.sampleType() == QAudioFormat::SignedInt) {
                                        if (m_format.byteOrder() == QAudioFormat::LittleEndian)
                                                value = qAbs(qFromLittleEndian<qint16>(ptr));
                                        else
                                                value = qAbs(qFromBigEndian<qint16>(ptr));
                                }

                                maxValue = qMax(value, maxValue);
                                ptr += channelBytes;
                        }
                }

                maxValue = qMin(maxValue, m_maxAmplitude);
                m_level = qreal(maxValue) / m_maxAmplitude;
        }

        m_file->write(data,len);

        emit update();
        return len;
}


RenderArea::RenderArea(QWidget *parent)
        : QPushButton(parent)
{
        setBackgroundRole(QPalette::Base);
        setAutoFillBackground(true);

        m_level = 0;
        setMinimumHeight(30);
        setMinimumWidth(80);

}

void RenderArea::paintEvent(QPaintEvent * /* event */)
{
        QPainter painter(this);
        QPixmap pixmap = QPixmap(":/images/button_default.png").scaled(this->size());
        painter.drawPixmap(this->rect(), pixmap);

//        painter.setPen(Qt::black);
//        painter.drawRect(QRect(painter.viewport().left(),
//                painter.viewport().top(),
//                painter.viewport().right()-20,
//                painter.viewport().bottom()-20));
        if (m_level == 0.0)
                return;
        painter.setPen(Qt::darkGray);
        int pos = ((painter.viewport().right()-20)-(painter.viewport().left()+11))*m_level;
        for (int i = 0; i < 10; ++i) {
                int x1 = painter.viewport().left()+11;
                int y1 = painter.viewport().top()+10+i;
                int x2 = painter.viewport().left()+20+pos;
                int y2 = painter.viewport().top()+10+i;
                if (x2 < painter.viewport().left()+10)
                        x2 = painter.viewport().left()+10;

                painter.drawLine(QPoint(x1+10, y1+10),QPoint(x2+10, y2+10));
        }
}

void RenderArea::setLevel(qreal value)
{
        m_level = value;
        repaint();
}

源码下载

相关推荐
cch891818 小时前
汇编与Java:底层与高层的编程对决
java·开发语言·汇编
荒川之神19 小时前
拉链表概念与基本设计
java·开发语言·数据库
chushiyunen19 小时前
python中的@Property和@Setter
java·开发语言·python
小樱花的樱花19 小时前
C++ new和delete用法详解
linux·开发语言·c++
froginwe1119 小时前
C 运算符
开发语言
fengfuyao98520 小时前
低数据极限下模型预测控制的非线性动力学的稀疏识别 MATLAB实现
开发语言·matlab
摇滚侠20 小时前
搭建前端开发环境 安装 nodejs 设置淘宝镜像 最简化最标准版本 不使用 NVM NVM 高版本无法安装低版本 nodejs
java·开发语言·node.js
t1987512820 小时前
MATLAB十字路口车辆通行情况模拟系统
开发语言·matlab
yyk的萌21 小时前
AI 应用开发工程师基础学习计划
开发语言·python·学习·ai·lua
Amumu1213821 小时前
Js:正则表达式(一)
开发语言·javascript·正则表达式