接入 AI 模型学习如何与第三方 API 进行集成,处理自然语言生成任务,以及如何优化请求和响应管理来实现智能对话功能。
1. 聊天逻辑
- 接入Deepseek chat模型,本来想用OpenAI的,但是他家的不支持国内信用卡注册,无奈花费10块充值了deepseek,这也是目前唯一消费
- 输入:语音和文字,输出:文字 转音频播放
- 浏览器原生支持:
SpeechRecognition
浏览器录音转文字,SpeechSynthesisUtterance
文字转音频播放
2. 接口设计
- 创建 Deepseek api token, 拿到
DEEPSEEK_API_KEY
- 设计接口:
/api/callGpt
,可以参考官方案例
ts
// /api/callGpt/route.ts
import { NextResponse } from 'next/server'
export async function POST(req: Request) {
const { message } = await req.json()
const deepseekRes = await fetch('https://api.deepseek.com/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${process.env.DEEPSEEK_API_KEY!}`,
},
body: JSON.stringify({
model: 'deepseek-chat',
stream: false,
messages: [
{
role: 'user',
content: message,
},
],
}),
})
if (!deepseekRes.ok) {
return NextResponse.json(
{ error: 'Failed to fetch from DeepSeek API' },
{ status: 500 },
)
}
const result = await deepseekRes.json()
const text = result.choices?.[0]?.message?.content || ''
return NextResponse.json({ text })
}
3. 前端设计
recognition.continuous = true
说一句完整的话再结束,而不是瞬时响应speak
函数将文字转音频,浏览器兼容性一般,手机上放不出来
tsx
//aiTalk/page.tsx
'use client'
import MarkdownView from '@/components/MarkdownView'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { ChevronRight } from 'lucide-react'
import { useLocale, useTranslations } from 'next-intl'
import { useEffect, useRef, useState } from 'react'
export default function PageAiTalk() {
const t = useTranslations('PageAiTalk')
const locale = useLocale()
const recognitionRef = useRef<SpeechRecognition | null>(null)
const synthRef = useRef<SpeechSynthesisUtterance | null>(null)
const chatContainerRef = useRef<HTMLDivElement>(null)
const [message, setMessage] = useState('')
const [response, setResponse] = useState('')
const [isLoading, setIsLoading] = useState(false)
const [chatHistory, setChatHistory] = useState<
{ message: string; response: string }[]
>([])
const [inputMode, setInputMode] = useState<'voice' | 'text'>('voice')
const speak = (text: string) => {
if (!window.speechSynthesis) return
window.speechSynthesis.cancel()
const utterance = new SpeechSynthesisUtterance(text)
utterance.lang = locale === 'zh' ? 'zh-CN' : 'en-US'
utterance.rate = 1.1
utterance.pitch = 1
const voices = speechSynthesis.getVoices()
const matchedVoice = voices.find((v) =>
locale === 'zh' ? v.lang.includes('zh') : v.lang.includes('en'),
)
if (matchedVoice) utterance.voice = matchedVoice
synthRef.current = utterance
window.speechSynthesis.speak(utterance)
}
const fetchChatGPT = async (text: string) => {
setIsLoading(true)
try {
const res = await fetch('/api/callGpt', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ message: text }),
})
const data = await res.json()
const fullReply = data.text
setResponse(fullReply)
speak(fullReply)
setChatHistory((prev) => [
...prev,
{ message: text, response: fullReply },
])
setMessage('')
} catch (error) {
console.error('Error:', error)
} finally {
setIsLoading(false)
}
}
useEffect(() => {
if (
!('webkitSpeechRecognition' in window || 'SpeechRecognition' in window)
) {
alert(t('browserNotSupported'))
return
}
const SpeechRecognition =
window.SpeechRecognition || window.webkitSpeechRecognition
const recognition = new SpeechRecognition() as any
recognition.lang = locale === 'zh' ? 'zh-CN' : 'en-US'
recognition.interimResults = false
recognition.continuous = true
recognition.onstart = () => setMessage(`🎤 ${t('listening')}`)
recognition.onresult = (event: any) => {
const transcript = event.results[0][0].transcript
console.log('识别到文本:', transcript)
setMessage(transcript)
fetchChatGPT(transcript)
}
recognition.onerror = (e: any) => {
console.error('识别错误:', e)
setMessage(`❌ ${t('recognitionError')}`)
}
recognition.onend = () => {
console.log('识别结束')
setMessage('')
}
recognitionRef.current = recognition
return () => recognition.stop()
}, [locale])
useEffect(() => {
if (chatContainerRef.current) {
chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight
}
}, [chatHistory])
const holdTimer = useRef<NodeJS.Timeout | null>(null)
const handlePressStart = (e: any) => {
e.preventDefault()
holdTimer.current = setTimeout(() => {
if (!recognitionRef.current) return
console.log('开始识别')
recognitionRef.current.start()
}, 200)
}
const handlePressEnd = (e: any) => {
e.preventDefault()
clearTimeout(holdTimer.current as NodeJS.Timeout)
if (!recognitionRef.current) return
console.log('停止识别')
recognitionRef.current.stop()
}
return (
<div className="page-wrapper py-6">
<div className="mx-auto max-w-[680px]">
<h1 className="mb-4 text-center text-xl font-bold">🎙️ {t('title')}</h1>
<div className="mb-4 flex w-full justify-center">
<Button
variant="outline"
onClick={() =>
setInputMode((prev) => (prev === 'voice' ? 'text' : 'voice'))
}
>
{t('currentMode')}: {inputMode === 'voice' ? t('voice') : t('text')}
<ChevronRight className="size-4" />
</Button>
</div>
<div
ref={chatContainerRef}
className="md:[60vh] bg-muted mb-4 h-[calc(100vh-20rem)] overflow-y-auto rounded-lg p-4"
>
<ul className="space-y-3">
{chatHistory.map((chat, index) => (
<li key={index} className="bg-background rounded-lg p-3 shadow">
<p className="font-semibold">
{t('yourMessage')}:
<span className="font-normal">{chat.message}</span>
</p>
<div className="">
<div className="font-semibold">AI:</div>
<div className="">
<MarkdownView content={chat.response} />
</div>
</div>
</li>
))}
</ul>
{isLoading && (
<div className="mt-4 flex items-center justify-center gap-1">
<div className="inline-flex space-x-2">
<div className="bg-muted-foreground size-1 animate-bounce rounded-full [animation-delay:-0.3s]"></div>
<div className="bg-muted-foreground size-1 animate-bounce rounded-full [animation-delay:-0.15s]"></div>
<div className="bg-muted-foreground size-1 animate-bounce rounded-full"></div>
</div>
</div>
)}
</div>
<div className="flex w-full justify-center">
{inputMode === 'voice' ? (
<Button
className="bg-foreground active:bg-foreground/80 w-full cursor-pointer select-none rounded-lg py-2 font-bold transition duration-200 md:w-[50%]"
onMouseDown={handlePressStart}
onMouseUp={handlePressEnd}
onTouchStart={handlePressStart}
onTouchEnd={handlePressEnd}
size="lg"
>
{t('pressAndSpeak')}
</Button>
) : (
<div className="flex w-full items-center gap-2">
<Input
type="text"
className="h-10 flex-1 px-4"
placeholder={t('enterText')}
value={message}
onChange={(e) => setMessage(e.target.value)}
/>
<Button
className="cursor-pointer px-4 py-2"
size="lg"
onClick={() => {
if (message.trim()) {
fetchChatGPT(message.trim())
}
}}
>
{t('send')}
</Button>
</div>
)}
</div>
</div>
</div>
)
}