openai Realtime API (实时语音)

https://openai.com/index/introducing-the-realtime-api/

官方demo

https://github.com/openai/openai-realtime-console

官方demo使用到的插件

https://github.com/openai/openai-realtime-api-beta?tab=readme-ov-file

装包配置

修改yarn.lock 这个包是从github下载的

"@openai/realtime-api-beta@openai/openai-realtime-api-beta":

version "0.0.0"

resolved "https://codeload.github.com/openai/openai-realtime-api-beta/tar.gz/a5cb94824f625423858ebacb9f769226ca98945f"

dependencies:

ws "^8.18.0"

前端代码

javascript 复制代码
import { RealtimeClient } from '@openai/realtime-api-beta'

nginx配置

RealtimeClient需要配置一个wss地址

wss和https使用相同的加密协议,不需要单独配置,直接配置一个转发就可以了

bash 复制代码
    # https
    server {
        listen       443 ssl; 
        server_name  chat.xutongbao.top;
        # 付费
        ssl_certificate         /temp/ssl/chat.xutongbao.top/chat.xutongbao.top_cert_chain.pem;   # nginx的ssl证书文件
        ssl_certificate_key     /temp/ssl/chat.xutongbao.top/chat.xutongbao.top_key.key;  # nginx的ssl证书验证密码

        # 免费
        # ssl_certificate         /temp/ssl/cersign/chat.xutongbao.top/chat.xutongbao.top.crt;   # nginx的ssl证书文件
        # ssl_certificate_key     /temp/ssl/cersign/chat.xutongbao.top/chat.xutongbao.top_rsa.key;  # nginx的ssl证书验证密码

        proxy_send_timeout 6000s;    # 设置发送超时时间,
        proxy_read_timeout 6000s;    # 设置读取超时时间。

        #配置根目录
        location / {
            root    /temp/yuying;
            index  index.html index.htm;
            add_header Content-Security-Policy upgrade-insecure-requests;

        }

        location /api/ {
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header REMOTE-HOST $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-NginX-Proxy true;

            proxy_set_header Connection '';
            proxy_http_version 1.1;
            chunked_transfer_encoding off;
            proxy_buffering off;
            proxy_cache off;

            proxy_pass http://yuying-api.xutongbao.top;
        }

        location /socket.io/ {
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header REMOTE-HOST $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-NginX-Proxy true;
            proxy_pass http://127.0.0.1:84;

            # 关键配置 start
            proxy_http_version 1.1;
            proxy_set_header Upgrade $http_upgrade;
            proxy_set_header Connection "upgrade";
            # 关键配置 end
        }

        location /ws {
            proxy_pass http://52.247.xxx.xxx:86/;
            proxy_read_timeout              500;
            proxy_set_header                Host    $http_host;
            proxy_set_header                X-Real-IP          $remote_addr;
            proxy_set_header                X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_http_version 1.1;
            
            # ws 协议专用头
            proxy_set_header                Upgrade $http_upgrade;
            proxy_set_header                Connection "Upgrade";
        }

        location /ws-test {
            proxy_pass http://52.247.xxx.xxx:92/;
            proxy_read_timeout              500;
            proxy_set_header                Host    $http_host;
            proxy_set_header                X-Real-IP          $remote_addr;
            proxy_set_header                X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_http_version 1.1;
            
            # ws 协议专用头
            proxy_set_header                Upgrade $http_upgrade;
            proxy_set_header                Connection "Upgrade";
        }


        # 匹配sslCnd开头的请求,实际转发的请求去掉多余的sslCnd这三个字母
        location ^~/sslCnd/ {
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header REMOTE-HOST $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-NginX-Proxy true;
            proxy_pass http://cdn.xutongbao.top/;
        }           
    }   

建立连接时如何通过token确认用户身份

javascript 复制代码
  let apiKeyValue = `${localStorage.getItem(
    'token'
  )}divide${localStorage.getItem('talkId')}`
  const clientRef = useRef(
    new RealtimeClient(
      LOCAL_RELAY_SERVER_URL
        ? {
            url: LOCAL_RELAY_SERVER_URL,
            apiKey: apiKeyValue,
            dangerouslyAllowAPIKeyInBrowser: true,
          }
        : {
            apiKey: apiKey,
            dangerouslyAllowAPIKeyInBrowser: true,
          }
    )
  )

前端完整代码

realtimePlus/pages/ConsolePage.js:

javascript 复制代码
import { connect } from 'react-redux'
import { withRouter } from 'react-router-dom'
import { useEffect, useRef, useCallback, useState } from 'react'
import { RealtimeClient } from '@openai/realtime-api-beta'
import { WavRecorder, WavStreamPlayer } from '../lib/wavtools/index.js'
import { instructions } from '../utils/conversation_config.js'
import { WavRenderer } from '../utils/wav_renderer'
import { X, ArrowUp, ArrowDown } from 'react-feather'
import { Button, Dropdown, Input, Select } from 'antd'
import { SinglePageHeader, Icon } from '../../../../../../components/light'
import { isPC } from '../../../../../../utils/tools.js'
import { realTimeBaseURL } from '../../../../../../utils/config.js'
import { message as antdMessage } from 'antd'
import Api from '../../../../../../api/index.js'

import './ConsolePage.css'
import './index.css'
const LOCAL_RELAY_SERVER_URL = realTimeBaseURL //'wss://chat.xutongbao.top/ws'

const Option = Select.Option
let isPCFlag = isPC()
let isAddStart = false
let addIdHistory = []

function Index() {
  //#region 配置
  const apiKey = LOCAL_RELAY_SERVER_URL
    ? ''
    : localStorage.getItem('tmp::voice_api_key') ||
      prompt('OpenAI API Key') ||
      ''
  if (apiKey !== '') {
    localStorage.setItem('tmp::voice_api_key', apiKey)
  }

  const wavRecorderRef = useRef(new WavRecorder({ sampleRate: 24000 }))
  const wavStreamPlayerRef = useRef(new WavStreamPlayer({ sampleRate: 24000 }))
  let apiKeyValue = `${localStorage.getItem(
    'token'
  )}divide${localStorage.getItem('talkId')}`
  const clientRef = useRef(
    new RealtimeClient(
      LOCAL_RELAY_SERVER_URL
        ? {
            url: LOCAL_RELAY_SERVER_URL,
            apiKey: apiKeyValue,
            dangerouslyAllowAPIKeyInBrowser: true,
          }
        : {
            apiKey: apiKey,
            dangerouslyAllowAPIKeyInBrowser: true,
          }
    )
  )

  const clientCanvasRef = useRef(null)
  const serverCanvasRef = useRef(null)
  const eventsScrollHeightRef = useRef(0)
  const eventsScrollRef = useRef(null)
  const startTimeRef = useRef(new Date().toISOString())

  const [items, setItems] = useState([])
  const [realtimeEvents, setRealtimeEvents] = useState([])
  const [expandedEvents, setExpandedEvents] = useState({})
  const [isConnected, setIsConnected] = useState(false)
  const [canPushToTalk, setCanPushToTalk] = useState(true)
  const [isRecording, setIsRecording] = useState(false)
  const [message, setMessage] = useState('')
  const [messageType, setMessageType] = useState('none')
  //#endregion

  const getItems = () => {
    const items = [
      {
        key: 'chrome',
        label: (
          <>
            {/* eslint-disable-next-line */}
            <a
              href={`https://static.xutongbao.top/app/ChromeSetup.exe`}
              target="_blank"
            >
              下载chrome浏览器(推荐)
            </a>
          </>
        ),
        icon: <Icon name="chrome" className="m-realtime-menu-icon"></Icon>,
      },
    ]
    return items
  }

  //#region 基础
  const formatTime = useCallback((timestamp) => {
    const startTime = startTimeRef.current
    const t0 = new Date(startTime).valueOf()
    const t1 = new Date(timestamp).valueOf()
    const delta = t1 - t0
    const hs = Math.floor(delta / 10) % 100
    const s = Math.floor(delta / 1000) % 60
    const m = Math.floor(delta / 60_000) % 60
    const pad = (n) => {
      let s = n + ''
      while (s.length < 2) {
        s = '0' + s
      }
      return s
    }
    return `${pad(m)}:${pad(s)}.${pad(hs)}`
  }, [])

  const connectConversation = useCallback(async () => {
    const client = clientRef.current
    const wavRecorder = wavRecorderRef.current
    const wavStreamPlayer = wavStreamPlayerRef.current

    startTimeRef.current = new Date().toISOString()
    setIsConnected(true)
    setRealtimeEvents([])
    setItems(client.conversation.getItems())

    try {
      // Connect to microphone
      await wavRecorder.begin()
    } catch (error) {
      console.log(error)
    }

    // Connect to audio output
    await wavStreamPlayer.connect()

    // Connect to realtime API
    await client.connect()
    // let isAutoAsk = true
    // if (isAutoAsk) {
    // client.sendUserMessageContent([
    //   {
    //     type: `input_text`,
    //     text: `你好!`,
    //   },
    // ])

    if (client.getTurnDetectionType() === 'server_vad') {
      await wavRecorder.record((data) => client.appendInputAudio(data.mono))
    }
  }, [])

  const handleTest = () => {
    const client = clientRef.current
    client.sendUserMessageContent([
      {
        type: `input_text`,
        text: message,
      },
    ])
    setMessage('')
  }

  const handleMessage = (event) => {
    setMessage(event.target.value)
  }

  /**
   * Disconnect and reset conversation state
   */
  const disconnectConversation = useCallback(async () => {
    setIsConnected(false)
    setRealtimeEvents([])
    // setItems([])

    const client = clientRef.current
    client.disconnect()

    const wavRecorder = wavRecorderRef.current
    await wavRecorder.end()

    const wavStreamPlayer = wavStreamPlayerRef.current
    await wavStreamPlayer.interrupt()
  }, [])

  const deleteConversationItem = useCallback(async (id) => {
    const client = clientRef.current
    client.deleteItem(id)
  }, [])

  /**
   * In push-to-talk mode, start recording
   * .appendInputAudio() for each sample
   */
  const startRecording = async () => {
    setIsRecording(true)
    const client = clientRef.current
    const wavRecorder = wavRecorderRef.current
    const wavStreamPlayer = wavStreamPlayerRef.current
    const trackSampleOffset = await wavStreamPlayer.interrupt()
    if (trackSampleOffset?.trackId) {
      const { trackId, offset } = trackSampleOffset
      await client.cancelResponse(trackId, offset)
    }
    try {
      await wavRecorder.record((data) => client.appendInputAudio(data.mono))
    } catch (error) {
      console.log(error)
    }
  }

  /**
   * In push-to-talk mode, stop recording
   */
  const stopRecording = async () => {
    setIsRecording(false)
    const client = clientRef.current
    const wavRecorder = wavRecorderRef.current
    try {
      await wavRecorder.pause()
    } catch (error) {
      console.log(error)
    }
    try {
      client.createResponse()
    } catch (error) {
      console.log(error)
    }
  }

  /**
   * Switch between Manual <> VAD mode for communication
   */
  const changeTurnEndType = async (messageType) => {
    setMessageType(messageType)
    let value
    if (messageType === 'server_vad') {
      value = 'server_vad'
    } else if (messageType === 'none' || messageType === 'input') {
      value = 'none'
    }
    const client = clientRef.current
    const wavRecorder = wavRecorderRef.current
    if (value === 'none' && wavRecorder.getStatus() === 'recording') {
      await wavRecorder.pause()
    }
    client.updateSession({
      turn_detection: value === 'none' ? null : { type: 'server_vad' },
    })
    if (value === 'server_vad' && client.isConnected()) {
      await wavRecorder.record((data) => client.appendInputAudio(data.mono))
    }
    setCanPushToTalk(messageType === 'none')
  }

  const handleSearch = () => {
    let params = {
      talkId: localStorage.getItem('talkId'),
      gptVersion: 'realtime',
      pageNum: 1,
      pageSize: 20,
      isGetNewest: true,
    }

    const client = clientRef.current
    // client.conversation.processEvent({
    //   type: 'conversation.item.created',
    //   event_id: 'item_ARaEpHPCznsNlBGN5DGFp',
    //   item: {
    //     id: 'item_ARaEpHPCznsNlBGN5DGFp',
    //     object: 'realtime.item',
    //     type: 'message',
    //     status: 'completed',
    //     role: 'user',
    //     content: [{ type: 'input_text', text: '你好' }],
    //     formatted: { audio: {}, text: '你好', transcript: '' },
    //   }
    // })
    // let items = client.conversation.getItems()
    // console.log('items', items)

    Api.h5.chatSearch(params).then((res) => {
      // let list = [
      //   {
      //     id: 'item_ARaEpHPCznsNlBGN5DGFp',
      //     object: 'realtime.item',
      //     type: 'message',
      //     status: 'completed',
      //     role: 'user',
      //     content: [{ type: 'input_text', text: '你好' }],
      //     formatted: { audio: {}, text: '你好', transcript: '' },
      //   },
      //   {
      //     id: 'item_ARaEpLuspCKg6raB95pFr',
      //     object: 'realtime.item',
      //     type: 'message',
      //     status: 'in_progress',
      //     role: 'assistant',
      //     content: [{ type: 'audio', transcript: '你好!' }],
      //     formatted: { audio: {}, text: '', transcript: '你好!' },
      //   },
      // ]

      if (res.code === 200) {
        let list = res.data.list.map((item) => {
          return {
            id: item.uid,
            object: 'realtime.item',
            type: 'message',
            status: 'completed',
            role: item.messageType === '1' ? 'user' : 'assistant',
            content: [
              {
                type: item.messageType === '1' ? 'input_text' : 'text',
                text: item.message,
                transcript: item.message,
              },
            ],
            formatted: {
              audio: {},
              text: item.message,
              transcript: item.message,
            },
          }
        })
        setItems(list)
        list.forEach((item) => {
          client.conversation.processEvent({
            type: 'conversation.item.created',
            event_id: item.id,
            item: {
              ...item,
            },
          })
        })
        let items = client.conversation.getItems()
        console.log('items', items)
      }
    })
  }

  //#endregion

  //#region  useEffect
  /**
   * Auto-scroll the event logs
   */
  useEffect(() => {
    if (eventsScrollRef.current) {
      const eventsEl = eventsScrollRef.current
      const scrollHeight = eventsEl.scrollHeight
      // Only scroll if height has just changed
      if (scrollHeight !== eventsScrollHeightRef.current) {
        eventsEl.scrollTop = scrollHeight
        eventsScrollHeightRef.current = scrollHeight
      }
    }
  }, [realtimeEvents])

  /**
   * Auto-scroll the conversation logs
   */
  useEffect(() => {
    const conversationEls = [].slice.call(
      document.body.querySelectorAll('[data-conversation-content]')
    )
    for (const el of conversationEls) {
      const conversationEl = el
      conversationEl.scrollTop = conversationEl.scrollHeight
    }
  }, [items])

  /**
   * Set up render loops for the visualization canvas
   */
  useEffect(() => {
    let isLoaded = true

    const wavRecorder = wavRecorderRef.current
    const clientCanvas = clientCanvasRef.current
    let clientCtx = null

    const wavStreamPlayer = wavStreamPlayerRef.current
    const serverCanvas = serverCanvasRef.current
    let serverCtx = null

    const render = () => {
      if (isLoaded) {
        if (clientCanvas) {
          if (!clientCanvas.width || !clientCanvas.height) {
            clientCanvas.width = clientCanvas.offsetWidth
            clientCanvas.height = clientCanvas.offsetHeight
          }
          clientCtx = clientCtx || clientCanvas.getContext('2d')
          if (clientCtx) {
            clientCtx.clearRect(0, 0, clientCanvas.width, clientCanvas.height)
            const result = wavRecorder.recording
              ? wavRecorder.getFrequencies('voice')
              : { values: new Float32Array([0]) }
            WavRenderer.drawBars(
              clientCanvas,
              clientCtx,
              result.values,
              '#0099ff',
              10,
              0,
              8
            )
          }
        }
        if (serverCanvas) {
          if (!serverCanvas.width || !serverCanvas.height) {
            serverCanvas.width = serverCanvas.offsetWidth
            serverCanvas.height = serverCanvas.offsetHeight
          }
          serverCtx = serverCtx || serverCanvas.getContext('2d')
          if (serverCtx) {
            serverCtx.clearRect(0, 0, serverCanvas.width, serverCanvas.height)
            const result = wavStreamPlayer.analyser
              ? wavStreamPlayer.getFrequencies('voice')
              : { values: new Float32Array([0]) }
            WavRenderer.drawBars(
              serverCanvas,
              serverCtx,
              result.values,
              '#009900',
              10,
              0,
              8
            )
          }
        }
        window.requestAnimationFrame(render)
      }
    }
    render()

    return () => {
      isLoaded = false
    }
  }, [])

  /**
   * Core RealtimeClient and audio capture setup
   * Set all of our instructions, tools, events and more
   */
  useEffect(() => {
    // Get refs
    const wavStreamPlayer = wavStreamPlayerRef.current
    const client = clientRef.current

    // Set instructions
    client.updateSession({ instructions: instructions })
    // Set transcription, otherwise we don't get user transcriptions back
    client.updateSession({ input_audio_transcription: { model: 'whisper-1' } })

    // handle realtime events from client + server for event logging
    client.on('realtime.event', (realtimeEvent) => {
      if (realtimeEvent.event.code === 400) {
        antdMessage.warning(realtimeEvent.event.message)
        disconnectConversation()
        return
      }
      setRealtimeEvents((realtimeEvents) => {
        const lastEvent = realtimeEvents[realtimeEvents.length - 1]
        if (lastEvent?.event.type === realtimeEvent.event.type) {
          // if we receive multiple events in a row, aggregate them for display purposes
          lastEvent.count = (lastEvent.count || 0) + 1
          return realtimeEvents.slice(0, -1).concat(lastEvent)
        } else {
          return realtimeEvents.concat(realtimeEvent)
        }
      })
    })
    client.on('error', (event) => console.error(event))
    client.on('conversation.interrupted', async () => {
      const trackSampleOffset = await wavStreamPlayer.interrupt()
      if (trackSampleOffset?.trackId) {
        const { trackId, offset } = trackSampleOffset
        await client.cancelResponse(trackId, offset)
      }
    })
    client.on('conversation.updated', async ({ item, delta }) => {
      const items = client.conversation.getItems()
      if (delta?.audio) {
        wavStreamPlayer.add16BitPCM(delta.audio, item.id)
      }
      if (item.status === 'completed' && item.formatted.audio?.length) {
        const wavFile = await WavRecorder.decode(
          item.formatted.audio,
          24000,
          24000
        )
        item.formatted.file = wavFile
      }
      setItems(items)
      isAddStart = true
    })

    setItems(client.conversation.getItems())
    handleSearch()

    return () => {
      // cleanup; resets to defaults
      client.reset()
    }
    // eslint-disable-next-line
  }, [])

  useEffect(() => {
    if (Array.isArray(items) && items.length > 0) {
      let lastItem = items[items.length - 1]

      let addIdHistoryIndex = addIdHistory.findIndex(
        (item) => item === lastItem.id
      )
      if (
        lastItem?.status === 'completed' &&
        lastItem?.role === 'assistant' &&
        isAddStart === true &&
        addIdHistoryIndex < 0
      ) {
        addIdHistory.push(lastItem.id)
        let message = items[items.length - 2].formatted.transcript
          ? items[items.length - 2].formatted.transcript
          : items[items.length - 2].formatted.text
        let robotMessage = lastItem.formatted.transcript
        Api.h5
          .chatRealTimeAdd({
            talkId: localStorage.getItem('talkId'),
            name: localStorage.getItem('nickname'),
            message,
            robotMessage,
          })
          .then((res) => {
            if (res.code === 40006) {
              antdMessage.warning(res.message)
              disconnectConversation()
            }
          })
      }
    }

    // eslint-disable-next-line
  }, [items, isAddStart])
  //#endregion

  return (
    <div className="m-realtime-wrap-box">
      <div className={`m-realtime-wrap-chat`}>
        <SinglePageHeader
          goBackPath="/ai/index/home/chatList"
          title="Realtime"
        ></SinglePageHeader>
        <div className="m-realtime-list" id="scrollableDiv">
          {window.platform === 'rn' ? null : (
            <Dropdown
              menu={{ items: getItems() }}
              className="m-realtime-dropdown"
              trigger={['click', 'hover']}
            >
              <Icon name="more" className="m-realtime-menu-btn"></Icon>
            </Dropdown>
          )}
          <div data-component="ConsolePage">
            <div className="content-main">
              <div className="content-logs">
                <div className="content-block events">
                  <div className="visualization">
                    <div className="visualization-entry client">
                      <canvas ref={clientCanvasRef} />
                    </div>
                    <div className="visualization-entry server">
                      <canvas ref={serverCanvasRef} />
                    </div>
                  </div>
                  <div className="content-block-body" ref={eventsScrollRef}>
                    {!realtimeEvents.length && `等待连接...`}
                    {realtimeEvents.map((realtimeEvent, i) => {
                      const count = realtimeEvent.count
                      const event = { ...realtimeEvent.event }
                      if (event.type === 'input_audio_buffer.append') {
                        event.audio = `[trimmed: ${event.audio.length} bytes]`
                      } else if (event.type === 'response.audio.delta') {
                        event.delta = `[trimmed: ${event.delta.length} bytes]`
                      }
                      return (
                        <div className="event" key={event.event_id}>
                          <div className="event-timestamp">
                            {formatTime(realtimeEvent.time)}
                          </div>
                          <div className="event-details">
                            <div
                              className="event-summary"
                              onClick={() => {
                                // toggle event details
                                const id = event.event_id
                                const expanded = { ...expandedEvents }
                                if (expanded[id]) {
                                  delete expanded[id]
                                } else {
                                  expanded[id] = true
                                }
                                setExpandedEvents(expanded)
                              }}
                            >
                              <div
                                className={`event-source ${
                                  event.type === 'error'
                                    ? 'error'
                                    : realtimeEvent.source
                                }`}
                              >
                                {realtimeEvent.source === 'client' ? (
                                  <ArrowUp />
                                ) : (
                                  <ArrowDown />
                                )}
                                <span>
                                  {event.type === 'error'
                                    ? 'error!'
                                    : realtimeEvent.source}
                                </span>
                              </div>
                              <div className="event-type">
                                {event.type}
                                {count && ` (${count})`}
                              </div>
                            </div>
                            {!!expandedEvents[event.event_id] && (
                              <div className="event-payload">
                                {JSON.stringify(event, null, 2)}
                              </div>
                            )}
                          </div>
                        </div>
                      )
                    })}
                  </div>
                </div>
                <div className="content-block conversation">
                  <div className="content-block-body" data-conversation-content>
                    {!items.length && `等待连接...`}
                    {items.map((conversationItem, i) => {
                      return (
                        <div
                          className="conversation-item"
                          key={conversationItem.id}
                        >
                          <div
                            className={`speaker ${conversationItem.role || ''}`}
                          >
                            <div>
                              {(
                                conversationItem.role || conversationItem.type
                              ).replaceAll('_', ' ')}
                            </div>
                            <div
                              className="close"
                              onClick={() =>
                                deleteConversationItem(conversationItem.id)
                              }
                            >
                              <X />
                            </div>
                          </div>
                          <div className={`speaker-content`}>
                            {/* tool response */}
                            {conversationItem.type ===
                              'function_call_output' && (
                              <div>{conversationItem.formatted.output}</div>
                            )}
                            {/* tool call */}
                            {!!conversationItem.formatted.tool && (
                              <div>
                                {conversationItem.formatted.tool.name}(
                                {conversationItem.formatted.tool.arguments})
                              </div>
                            )}
                            {!conversationItem.formatted.tool &&
                              conversationItem.role === 'user' && (
                                <div className="m-realtime-message">
                                  {conversationItem.formatted.transcript ||
                                    (conversationItem.formatted.audio?.length
                                      ? '(awaiting transcript)'
                                      : conversationItem.formatted.text ||
                                        '(item sent)')}
                                </div>
                              )}
                            {!conversationItem.formatted.tool &&
                              conversationItem.role === 'assistant' && (
                                <div className="m-realtime-message">
                                  {conversationItem.formatted.transcript ||
                                    conversationItem.formatted.text ||
                                    '(truncated)'}
                                </div>
                              )}
                            {conversationItem.formatted.file && (
                              <audio
                                src={conversationItem.formatted.file.url}
                                controls
                              />
                            )}
                          </div>
                        </div>
                      )
                    })}
                  </div>
                </div>
                <div className="content-actions">
                  <Select
                    value={messageType}
                    onChange={(value) => changeTurnEndType(value)}
                    placeholder="请选择"
                  >
                    <Option value="none">手动</Option>
                    <Option value="server_vad">自动</Option>
                    <Option value="input">打字</Option>
                  </Select>
                  <div className="spacer" />
                  {isConnected && canPushToTalk && (
                    <>
                      {isPCFlag ? (
                        <Button
                          type="primary"
                          label={
                            isRecording ? 'release to send' : 'push to talk'
                          }
                          disabled={!isConnected || !canPushToTalk}
                          onMouseDown={startRecording}
                          onMouseUp={stopRecording}
                          className={`m-realtime-recorad-btn ${
                            isRecording ? 'active' : ''
                          }`}
                        >
                          {isRecording ? '松开发送' : '按住说话'}
                        </Button>
                      ) : (
                        <Button
                          type="primary"
                          label={
                            isRecording ? 'release to send' : 'push to talk'
                          }
                          disabled={!isConnected || !canPushToTalk}
                          onTouchStart={startRecording}
                          onTouchEnd={stopRecording}
                          className={`m-realtime-recorad-btn ${
                            isRecording ? 'active' : ''
                          }`}
                        >
                          {isRecording ? '松开发送' : '按住说话'}
                        </Button>
                      )}
                    </>
                  )}
                  {isConnected && messageType === 'input' ? (
                    <div className="m-realtime-input-wrap">
                      <Input.TextArea
                        value={message}
                        onChange={(event) => handleMessage(event)}
                        placeholder="请输入"
                      ></Input.TextArea>
                      <Button
                        type="primary"
                        onClick={() => handleTest()}
                        className="m-realtime-send-btn"
                      >
                        发送
                      </Button>
                    </div>
                  ) : null}
                  <div className="spacer" />
                  <Button
                    type="primary"
                    danger={isConnected ? true : false}
                    onClick={
                      isConnected ? disconnectConversation : connectConversation
                    }
                  >
                    {isConnected ? '已连接' : '连接'}
                  </Button>
                </div>
              </div>
            </div>
          </div>
        </div>
      </div>
    </div>
  )
}

const mapStateToProps = (state) => {
  return {
    collapsed: state.getIn(['light', 'collapsed']),
    isRNGotToken: state.getIn(['light', 'isRNGotToken']),
  }
}

const mapDispatchToProps = (dispatch) => {
  return {
    onSetState(key, value) {
      dispatch({ type: 'SET_LIGHT_STATE', key, value })
    },
    onDispatch(action) {
      dispatch(action)
    },
  }
}

export default connect(mapStateToProps, mapDispatchToProps)(withRouter(Index))

后端通过请求头获取token

javascript 复制代码
  async handleUserAuth(req) {
    let index = req.rawHeaders.findIndex((item) =>
      item.includes('realtime, openai-insecure-api-key.')
    )
    let infoValue = ''
    if (index >= 0) {
      infoValue = req.rawHeaders[index]
    }
    infoValue = infoValue.replace('realtime, openai-insecure-api-key.', '')
    infoValue = infoValue.replace(', openai-beta.realtime-v1', '')
    let infoValueArr = infoValue.split('divide')
    let realTimeAuthRes = await axios.post(
      `${baseURL}/api/light/chat/realTimeAuth`,
      {
        token: infoValueArr[0],
        talkId: infoValueArr[1],
        apiKey,
      }
    )
    return realTimeAuthRes
  }

后端完整代码

relay.js:

javascript 复制代码
const { WebSocketServer } = require('ws')
const axios = require('axios')

let baseURL = process.env.aliIPAddressWithPort
let apiKey = process.env.apiKeyOnServer

class RealtimeRelay {
  constructor(apiKey) {
    this.apiKey = apiKey
    this.sockets = new WeakMap()
    this.wss = null
  }

  listen(port) {
    this.wss = new WebSocketServer({ port })
    this.wss.on('connection', this.connectionHandler.bind(this))
    this.log(`Listening on ws://localhost:${port}`)
  }

  async handleUserAuth(req) {
    let index = req.rawHeaders.findIndex((item) =>
      item.includes('realtime, openai-insecure-api-key.')
    )
    let infoValue = ''
    if (index >= 0) {
      infoValue = req.rawHeaders[index]
    }
    infoValue = infoValue.replace('realtime, openai-insecure-api-key.', '')
    infoValue = infoValue.replace(', openai-beta.realtime-v1', '')
    let infoValueArr = infoValue.split('divide')
    let realTimeAuthRes = await axios.post(
      `${baseURL}/api/light/chat/realTimeAuth`,
      {
        token: infoValueArr[0],
        talkId: infoValueArr[1],
        apiKey,
      }
    )
    return realTimeAuthRes
  }

  async connectionHandler(ws, req) {
    if (global.isAzure) {
      let realTimeAuthRes = await this.handleUserAuth(req)
      if (realTimeAuthRes.data.code === 200) {
        let Realtime = await import('@openai/realtime-api-beta')
        const { RealtimeClient } = Realtime
        if (!req.url) {
          this.log('No URL provided, closing connection.')
          ws.close()
          return
        }

        const url = new URL(req.url, `http://${req.headers.host}`)
        const pathname = url.pathname

        if (pathname !== '/') {
          this.log(`Invalid pathname: "${pathname}"`)
          ws.close()
          return
        }

        // Instantiate new client
        this.log(`Connecting with key "${this.apiKey.slice(0, 3)}..."`)
        const client = new RealtimeClient({ apiKey: this.apiKey })

        // Relay: OpenAI Realtime API Event -> Browser Event
        client.realtime.on('server.*', (event) => {
          this.log(`Relaying "${event.type}" to Client`)
          ws.send(JSON.stringify(event))
        })
        client.realtime.on('close', () => ws.close())

        // Relay: Browser Event -> OpenAI Realtime API Event
        // We need to queue data waiting for the OpenAI connection
        const messageQueue = []
        const messageHandler = (data) => {
          try {
            const event = JSON.parse(data)
            this.log(`Relaying "${event.type}" to OpenAI`)
            client.realtime.send(event.type, event)
          } catch (e) {
            console.error(e.message)
            this.log(`Error parsing event from client: ${data}`)
          }
        }
        ws.on('message', (data) => {
          if (!client.isConnected()) {
            messageQueue.push(data)
          } else {
            messageHandler(data)
          }
        })
        ws.on('close', () => client.disconnect())

        // Connect to OpenAI Realtime API
        try {
          this.log(`Connecting to OpenAI...`)
          await client.connect()
        } catch (e) {
          this.log(`Error connecting to OpenAI: ${e.message}`)
          ws.close()
          return
        }
        this.log(`Connected to OpenAI successfully!`)
        while (messageQueue.length) {
          messageHandler(messageQueue.shift())
        }
      } else {
        ws.send(
          JSON.stringify({
            ...realTimeAuthRes.data,
          })
        )
      }
    }
  }

  // eslint-disable-next-line
  log(...args) {
    // console.log(`[RealtimeRelay]`, ...args)
  }
}

module.exports = {
  RealtimeRelay,
}

调用上面的代码:

javascript 复制代码
  const relay = new RealtimeRelay(process.env.openaiToken)
  relay.listen(PORT)

人工智能学习网站

https://chat.xutongbao.top

相关推荐
摸着石头过河的石头3 小时前
Service Worker 深度解析:让你的 Web 应用离线也能飞
前端·javascript·性能优化
不爱吃糖的程序媛4 小时前
Electron 如何判断运行平台是鸿蒙系统(OpenHarmony)
javascript·electron·harmonyos
Hilaku4 小时前
我用AI重构了一段500行的屎山代码,这是我的Prompt和思考过程
前端·javascript·架构
Cxiaomu4 小时前
React Native App 自动检测版本更新完整实现指南
javascript·react native·react.js
掘金安东尼5 小时前
前端周刊第439期(2025年11月3日–11月9日)
前端·javascript·vue.js
起这个名字5 小时前
微前端应用通信使用和原理
前端·javascript·vue.js
鹏多多6 小时前
Web使用natapp进行内网穿透和预览本地页面
前端·javascript
钱端工程师6 小时前
uniapp封装uni.request请求,实现重复接口请求中断上次请求(防抖)
前端·javascript·uni-app
茶憶6 小时前
uni-app app移动端实现纵向滑块功能,并伴随自动播放
javascript·vue.js·uni-app·html·scss
茶憶6 小时前
uniapp移动端实现触摸滑动功能:上下滑动展开收起内容,左右滑动删除列表
前端·javascript·vue.js·uni-app