语音识别、翻译及语音合成

Speech recognition, translation and speech synthesis.

python 复制代码
#语音识别、翻译及语音合成
#pip install SpeechRecognition gtts googletrans==3.1.0a0 pyaudio sounddevice soundfile keyboard

import speech_recognition as sr
from gtts import gTTS
from googletrans import Translator
import os
import time
import pyaudio
import logging
import argparse
import sounddevice as sd
import soundfile as sf
import numpy as np
import keyboard
import requests
from datetime import datetime

class SpeechTranslationApp:
    def __init__(self, source_language='en-US', target_language='zh-CN', input_type='microphone', save_transcript=False):
        """
        Initialize the Speech Translation Application
        
        :param source_language: Language of input speech
        :param target_language: Language to translate to
        :param input_type: Input method (microphone or system_sound)
        :param save_transcript: Whether to save speech recognition results
        """
        # Configure logging
        logging.basicConfig(
            level=logging.INFO, 
            format='%(asctime)s - %(levelname)s: %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )
        self.logger = logging.getLogger(__name__)

        # Speech recognition setup
        self.recognizer = sr.Recognizer()
        self.translator = Translator()
        
        # Configuration
        self.source_language = source_language
        self.target_language = target_language
        self.input_type = input_type
        self.save_transcript = save_transcript

        # Audio parameters
        self.CHUNK = 1024
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 44100
        self.RECORD_SECONDS = 5

        # API endpoint (replace with the actual API URL from the GitHub project)
        self.ASR_API_URL = "https://api.example.com/asr"  # Update with actual API endpoint

    def recognize_speech(self):
        """
        Recognize speech from either microphone or system sound
        
        :return: Dictionary with recognition results
        """
        result = {
            'success': False,
            'text': None,
            'error': None
        }

        try:
            if self.input_type == "microphone":
                with sr.Microphone() as source:
                    self.logger.info(f"Listening (Language: {self.source_language})...")
                    audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=5)
            elif self.input_type == "system_sound":
                # Record system audio using sounddevice
                self.logger.info(f"Capturing system sound (Language: {self.source_language})...")
                recording = sd.rec(
                    int(self.RECORD_SECONDS * self.RATE), 
                    samplerate=self.RATE, 
                    channels=self.CHANNELS
                )
                sd.wait()
                
                # Save recording to temporary file
                temp_audio_file = 'temp_system_audio.wav'
                sf.write(temp_audio_file, recording, self.RATE)
                
                # Convert to speech_recognition format
                with sr.AudioFile(temp_audio_file) as source:
                    audio = self.recognizer.record(source)
                
                # Clean up temporary file
                os.remove(temp_audio_file)
            else:
                raise ValueError("Invalid input type. Choose 'microphone' or 'system_sound'")

            # Recognize speech
            text = self.recognizer.recognize_google(audio, language=self.source_language)
            
            # Send to ASR API
            self.send_to_asr_api(audio)
            
            result['success'] = True
            result['text'] = text
            self.logger.info(f"Speech Recognition Result: {text}")
            
            # Save transcript if enabled
            if self.save_transcript:
                self.save_speech_to_file(text)

        except sr.WaitTimeoutError:
            result['error'] = "Listening timed out. No speech detected."
            self.logger.warning(result['error'])
        except sr.UnknownValueError:
            result['error'] = "Could not understand the audio"
            self.logger.warning(result['error'])
        except sr.RequestError as e:
            result['error'] = f"Could not request results from Google Speech Recognition service; {e}"
            self.logger.error(result['error'])
        except Exception as e:
            result['error'] = f"An unexpected error occurred: {e}"
            self.logger.error(result['error'])
        
        return result

    def send_to_asr_api(self, audio):
        """
        Send audio to ASR API for processing
        
        :param audio: Recognized audio data
        """
        try:
            # Convert audio to a format suitable for API upload
            audio_data = audio.get_wav_data()
            
            # Prepare files for upload
            files = {'audio': ('speech.wav', audio_data, 'audio/wav')}
            
            # Send to ASR API (replace with actual API call)
            response = requests.post(self.ASR_API_URL, files=files)
            
            if response.status_code == 200:
                self.logger.info("Successfully sent audio to ASR API")
            else:
                self.logger.warning(f"ASR API request failed with status {response.status_code}")
        
        except Exception as e:
            self.logger.error(f"Error sending audio to ASR API: {e}")

    def save_speech_to_file(self, text):
        """
        Save recognized speech to a text file
        
        :param text: Recognized text
        """
        try:
            # Create transcripts directory if it doesn't exist
            os.makedirs('transcripts', exist_ok=True)
            
            # Generate filename with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"transcripts/speech_transcript_{timestamp}.txt"
            
            with open(filename, 'w', encoding='utf-8') as f:
                f.write(text)
            
            self.logger.info(f"Transcript saved to {filename}")
        
        except Exception as e:
            self.logger.error(f"Error saving transcript: {e}")

    def translate_text(self, text):
        """
        Translate text to target language
        
        :param text: Text to translate
        :return: Translated text
        """
        try:
            translation = self.translator.translate(text, dest=self.target_language)
            self.logger.info(f"Translation Result: {translation.text}")
            return translation.text
        except Exception as e:
            self.logger.error(f"Translation error: {e}")
            return None

    def speak_text(self, text):
        """
        Convert text to speech and play
        
        :param text: Text to convert to speech
        """
        try:
            tts = gTTS(text=text, lang=self.target_language)
            output_file = "translation_output.mp3"
            tts.save(output_file)
            
            # Cross-platform audio playback
            if os.name == 'nt':  # Windows
                os.system(f"start {output_file}")
            elif os.name == 'posix':  # macOS and Linux
                os.system(f"mpg123 {output_file}")
            
            # Remove temporary file after playback
            time.sleep(2)  # Give time for playback
            os.remove(output_file)
        except Exception as e:
            self.logger.error(f"Text-to-speech error: {e}")

    def run(self):
        """
        Main application loop
        """
        self.logger.info("Speech Translation App Started")
        
        print("Press 'Esc' to exit the application")
        
        try:
            while not keyboard.is_pressed('esc'):
                # Recognize speech
                recognition_result = self.recognize_speech()
                
                if recognition_result['success']:
                    # Translate recognized text
                    translated_text = self.translate_text(recognition_result['text'])
                    
                    if translated_text:
                        # Speak translated text
                        self.speak_text(translated_text)
                else:
                    # Log or handle unsuccessful recognition
                    if recognition_result['error']:
                        print(f"Recognition error: {recognition_result['error']}")
                
                # Pause to prevent excessive processing
                time.sleep(2)
        
        except KeyboardInterrupt:
            self.logger.info("Application stopped by user")
        finally:
            print("Speech Translation App Closed")

def main():
    """
    Parse command-line arguments and start the application
    """
    parser = argparse.ArgumentParser(description='Speech Translation Application')
    parser.add_argument('--source_lang', default='en-US', help='Source language code')
    parser.add_argument('--target_lang', default='zh-CN', help='Target language code')
    parser.add_argument('--input', choices=['microphone', 'system_sound'], default='microphone', help='Input method')
    parser.add_argument('--save_transcript', action='store_true', help='Save speech transcripts')
    
    args = parser.parse_args()
    
    app = SpeechTranslationApp(
        source_language=args.source_lang, 
        target_language=args.target_lang, 
        input_type=args.input,
        save_transcript=args.save_transcript
    )
    
    app.run()

if __name__ == "__main__":
    main()
相关推荐
2601_949146531 天前
Python语音通知API示例代码汇总:基于Requests库的语音接口调用实战
开发语言·python
去码头整点薯条981 天前
python第五次作业
linux·前端·python
有代理ip1 天前
Python 与 Golang 爬虫的隐藏优势
爬虫·python·golang
数研小生1 天前
1688商品列表API:高效触达批发电商海量商品数据的技术方案
大数据·python·算法·信息可视化·json
Coder_Boy_1 天前
基于SpringAI的在线考试系统-企业级教育考试系统核心架构(完善版)
开发语言·人工智能·spring boot·python·架构·领域驱动
铁蛋AI编程实战1 天前
AI调用人类服务入门与Python实现(30分钟搭建“AI+真人”协作系统)
开发语言·人工智能·python
zhougl9961 天前
Java 常见异常梳理
java·开发语言·python
sensen_kiss1 天前
Jupter Notebook 使用教程
大数据·人工智能·python·学习·数据分析
多恩Stone1 天前
【3D-AICG 系列-1】Trellis v1 和 Trellis v2 的区别和改进
人工智能·pytorch·python·算法·3d·aigc
狂奔蜗牛飙车1 天前
Python学习之路-Python3 迭代器与生成器学习详解
开发语言·python·学习·#python学习笔记·python迭代器生成器