openai Realtime API (实时语音)

https://openai.com/index/introducing-the-realtime-api/

官方demo

https://github.com/openai/openai-realtime-console

官方demo使用到的插件

https://github.com/openai/openai-realtime-api-beta?tab=readme-ov-file

装包配置

修改yarn.lock 这个包是从github下载的

"@openai/realtime-api-beta@openai/openai-realtime-api-beta":

version "0.0.0"

resolved "https://codeload.github.com/openai/openai-realtime-api-beta/tar.gz/a5cb94824f625423858ebacb9f769226ca98945f"

dependencies:

ws "^8.18.0"

前端代码

javascript 复制代码
import { RealtimeClient } from '@openai/realtime-api-beta'

nginx配置

RealtimeClient需要配置一个wss地址

wss和https使用相同的加密协议,不需要单独配置,直接配置一个转发就可以了

bash 复制代码
    # https
    server {
        listen       443 ssl; 
        server_name  chat.xutongbao.top;
        # 付费
        ssl_certificate         /temp/ssl/chat.xutongbao.top/chat.xutongbao.top_cert_chain.pem;   # nginx的ssl证书文件
        ssl_certificate_key     /temp/ssl/chat.xutongbao.top/chat.xutongbao.top_key.key;  # nginx的ssl证书验证密码

        # 免费
        # ssl_certificate         /temp/ssl/cersign/chat.xutongbao.top/chat.xutongbao.top.crt;   # nginx的ssl证书文件
        # ssl_certificate_key     /temp/ssl/cersign/chat.xutongbao.top/chat.xutongbao.top_rsa.key;  # nginx的ssl证书验证密码

        proxy_send_timeout 6000s;    # 设置发送超时时间,
        proxy_read_timeout 6000s;    # 设置读取超时时间。

        #配置根目录
        location / {
            root    /temp/yuying;
            index  index.html index.htm;
            add_header Content-Security-Policy upgrade-insecure-requests;

        }

        location /api/ {
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header REMOTE-HOST $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-NginX-Proxy true;

            proxy_set_header Connection '';
            proxy_http_version 1.1;
            chunked_transfer_encoding off;
            proxy_buffering off;
            proxy_cache off;

            proxy_pass http://yuying-api.xutongbao.top;
        }

        location /socket.io/ {
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header REMOTE-HOST $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-NginX-Proxy true;
            proxy_pass http://127.0.0.1:84;

            # 关键配置 start
            proxy_http_version 1.1;
            proxy_set_header Upgrade $http_upgrade;
            proxy_set_header Connection "upgrade";
            # 关键配置 end
        }

        location /ws {
            proxy_pass http://52.247.xxx.xxx:86/;
            proxy_read_timeout              500;
            proxy_set_header                Host    $http_host;
            proxy_set_header                X-Real-IP          $remote_addr;
            proxy_set_header                X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_http_version 1.1;
            
            # ws 协议专用头
            proxy_set_header                Upgrade $http_upgrade;
            proxy_set_header                Connection "Upgrade";
        }

        location /ws-test {
            proxy_pass http://52.247.xxx.xxx:92/;
            proxy_read_timeout              500;
            proxy_set_header                Host    $http_host;
            proxy_set_header                X-Real-IP          $remote_addr;
            proxy_set_header                X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_http_version 1.1;
            
            # ws 协议专用头
            proxy_set_header                Upgrade $http_upgrade;
            proxy_set_header                Connection "Upgrade";
        }


        # 匹配sslCnd开头的请求,实际转发的请求去掉多余的sslCnd这三个字母
        location ^~/sslCnd/ {
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header REMOTE-HOST $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-NginX-Proxy true;
            proxy_pass http://cdn.xutongbao.top/;
        }           
    }   

建立连接时如何通过token确认用户身份

javascript 复制代码
  let apiKeyValue = `${localStorage.getItem(
    'token'
  )}divide${localStorage.getItem('talkId')}`
  const clientRef = useRef(
    new RealtimeClient(
      LOCAL_RELAY_SERVER_URL
        ? {
            url: LOCAL_RELAY_SERVER_URL,
            apiKey: apiKeyValue,
            dangerouslyAllowAPIKeyInBrowser: true,
          }
        : {
            apiKey: apiKey,
            dangerouslyAllowAPIKeyInBrowser: true,
          }
    )
  )

前端完整代码

realtimePlus/pages/ConsolePage.js:

javascript 复制代码
import { connect } from 'react-redux'
import { withRouter } from 'react-router-dom'
import { useEffect, useRef, useCallback, useState } from 'react'
import { RealtimeClient } from '@openai/realtime-api-beta'
import { WavRecorder, WavStreamPlayer } from '../lib/wavtools/index.js'
import { instructions } from '../utils/conversation_config.js'
import { WavRenderer } from '../utils/wav_renderer'
import { X, ArrowUp, ArrowDown } from 'react-feather'
import { Button, Dropdown, Input, Select } from 'antd'
import { SinglePageHeader, Icon } from '../../../../../../components/light'
import { isPC } from '../../../../../../utils/tools.js'
import { realTimeBaseURL } from '../../../../../../utils/config.js'
import { message as antdMessage } from 'antd'
import Api from '../../../../../../api/index.js'

import './ConsolePage.css'
import './index.css'
const LOCAL_RELAY_SERVER_URL = realTimeBaseURL //'wss://chat.xutongbao.top/ws'

const Option = Select.Option
let isPCFlag = isPC()
let isAddStart = false
let addIdHistory = []

function Index() {
  //#region 配置
  const apiKey = LOCAL_RELAY_SERVER_URL
    ? ''
    : localStorage.getItem('tmp::voice_api_key') ||
      prompt('OpenAI API Key') ||
      ''
  if (apiKey !== '') {
    localStorage.setItem('tmp::voice_api_key', apiKey)
  }

  const wavRecorderRef = useRef(new WavRecorder({ sampleRate: 24000 }))
  const wavStreamPlayerRef = useRef(new WavStreamPlayer({ sampleRate: 24000 }))
  let apiKeyValue = `${localStorage.getItem(
    'token'
  )}divide${localStorage.getItem('talkId')}`
  const clientRef = useRef(
    new RealtimeClient(
      LOCAL_RELAY_SERVER_URL
        ? {
            url: LOCAL_RELAY_SERVER_URL,
            apiKey: apiKeyValue,
            dangerouslyAllowAPIKeyInBrowser: true,
          }
        : {
            apiKey: apiKey,
            dangerouslyAllowAPIKeyInBrowser: true,
          }
    )
  )

  const clientCanvasRef = useRef(null)
  const serverCanvasRef = useRef(null)
  const eventsScrollHeightRef = useRef(0)
  const eventsScrollRef = useRef(null)
  const startTimeRef = useRef(new Date().toISOString())

  const [items, setItems] = useState([])
  const [realtimeEvents, setRealtimeEvents] = useState([])
  const [expandedEvents, setExpandedEvents] = useState({})
  const [isConnected, setIsConnected] = useState(false)
  const [canPushToTalk, setCanPushToTalk] = useState(true)
  const [isRecording, setIsRecording] = useState(false)
  const [message, setMessage] = useState('')
  const [messageType, setMessageType] = useState('none')
  //#endregion

  const getItems = () => {
    const items = [
      {
        key: 'chrome',
        label: (
          <>
            {/* eslint-disable-next-line */}
            <a
              href={`https://static.xutongbao.top/app/ChromeSetup.exe`}
              target="_blank"
            >
              下载chrome浏览器(推荐)
            </a>
          </>
        ),
        icon: <Icon name="chrome" className="m-realtime-menu-icon"></Icon>,
      },
    ]
    return items
  }

  //#region 基础
  const formatTime = useCallback((timestamp) => {
    const startTime = startTimeRef.current
    const t0 = new Date(startTime).valueOf()
    const t1 = new Date(timestamp).valueOf()
    const delta = t1 - t0
    const hs = Math.floor(delta / 10) % 100
    const s = Math.floor(delta / 1000) % 60
    const m = Math.floor(delta / 60_000) % 60
    const pad = (n) => {
      let s = n + ''
      while (s.length < 2) {
        s = '0' + s
      }
      return s
    }
    return `${pad(m)}:${pad(s)}.${pad(hs)}`
  }, [])

  const connectConversation = useCallback(async () => {
    const client = clientRef.current
    const wavRecorder = wavRecorderRef.current
    const wavStreamPlayer = wavStreamPlayerRef.current

    startTimeRef.current = new Date().toISOString()
    setIsConnected(true)
    setRealtimeEvents([])
    setItems(client.conversation.getItems())

    try {
      // Connect to microphone
      await wavRecorder.begin()
    } catch (error) {
      console.log(error)
    }

    // Connect to audio output
    await wavStreamPlayer.connect()

    // Connect to realtime API
    await client.connect()
    // let isAutoAsk = true
    // if (isAutoAsk) {
    // client.sendUserMessageContent([
    //   {
    //     type: `input_text`,
    //     text: `你好!`,
    //   },
    // ])

    if (client.getTurnDetectionType() === 'server_vad') {
      await wavRecorder.record((data) => client.appendInputAudio(data.mono))
    }
  }, [])

  const handleTest = () => {
    const client = clientRef.current
    client.sendUserMessageContent([
      {
        type: `input_text`,
        text: message,
      },
    ])
    setMessage('')
  }

  const handleMessage = (event) => {
    setMessage(event.target.value)
  }

  /**
   * Disconnect and reset conversation state
   */
  const disconnectConversation = useCallback(async () => {
    setIsConnected(false)
    setRealtimeEvents([])
    // setItems([])

    const client = clientRef.current
    client.disconnect()

    const wavRecorder = wavRecorderRef.current
    await wavRecorder.end()

    const wavStreamPlayer = wavStreamPlayerRef.current
    await wavStreamPlayer.interrupt()
  }, [])

  const deleteConversationItem = useCallback(async (id) => {
    const client = clientRef.current
    client.deleteItem(id)
  }, [])

  /**
   * In push-to-talk mode, start recording
   * .appendInputAudio() for each sample
   */
  const startRecording = async () => {
    setIsRecording(true)
    const client = clientRef.current
    const wavRecorder = wavRecorderRef.current
    const wavStreamPlayer = wavStreamPlayerRef.current
    const trackSampleOffset = await wavStreamPlayer.interrupt()
    if (trackSampleOffset?.trackId) {
      const { trackId, offset } = trackSampleOffset
      await client.cancelResponse(trackId, offset)
    }
    try {
      await wavRecorder.record((data) => client.appendInputAudio(data.mono))
    } catch (error) {
      console.log(error)
    }
  }

  /**
   * In push-to-talk mode, stop recording
   */
  const stopRecording = async () => {
    setIsRecording(false)
    const client = clientRef.current
    const wavRecorder = wavRecorderRef.current
    try {
      await wavRecorder.pause()
    } catch (error) {
      console.log(error)
    }
    try {
      client.createResponse()
    } catch (error) {
      console.log(error)
    }
  }

  /**
   * Switch between Manual <> VAD mode for communication
   */
  const changeTurnEndType = async (messageType) => {
    setMessageType(messageType)
    let value
    if (messageType === 'server_vad') {
      value = 'server_vad'
    } else if (messageType === 'none' || messageType === 'input') {
      value = 'none'
    }
    const client = clientRef.current
    const wavRecorder = wavRecorderRef.current
    if (value === 'none' && wavRecorder.getStatus() === 'recording') {
      await wavRecorder.pause()
    }
    client.updateSession({
      turn_detection: value === 'none' ? null : { type: 'server_vad' },
    })
    if (value === 'server_vad' && client.isConnected()) {
      await wavRecorder.record((data) => client.appendInputAudio(data.mono))
    }
    setCanPushToTalk(messageType === 'none')
  }

  const handleSearch = () => {
    let params = {
      talkId: localStorage.getItem('talkId'),
      gptVersion: 'realtime',
      pageNum: 1,
      pageSize: 20,
      isGetNewest: true,
    }

    const client = clientRef.current
    // client.conversation.processEvent({
    //   type: 'conversation.item.created',
    //   event_id: 'item_ARaEpHPCznsNlBGN5DGFp',
    //   item: {
    //     id: 'item_ARaEpHPCznsNlBGN5DGFp',
    //     object: 'realtime.item',
    //     type: 'message',
    //     status: 'completed',
    //     role: 'user',
    //     content: [{ type: 'input_text', text: '你好' }],
    //     formatted: { audio: {}, text: '你好', transcript: '' },
    //   }
    // })
    // let items = client.conversation.getItems()
    // console.log('items', items)

    Api.h5.chatSearch(params).then((res) => {
      // let list = [
      //   {
      //     id: 'item_ARaEpHPCznsNlBGN5DGFp',
      //     object: 'realtime.item',
      //     type: 'message',
      //     status: 'completed',
      //     role: 'user',
      //     content: [{ type: 'input_text', text: '你好' }],
      //     formatted: { audio: {}, text: '你好', transcript: '' },
      //   },
      //   {
      //     id: 'item_ARaEpLuspCKg6raB95pFr',
      //     object: 'realtime.item',
      //     type: 'message',
      //     status: 'in_progress',
      //     role: 'assistant',
      //     content: [{ type: 'audio', transcript: '你好!' }],
      //     formatted: { audio: {}, text: '', transcript: '你好!' },
      //   },
      // ]

      if (res.code === 200) {
        let list = res.data.list.map((item) => {
          return {
            id: item.uid,
            object: 'realtime.item',
            type: 'message',
            status: 'completed',
            role: item.messageType === '1' ? 'user' : 'assistant',
            content: [
              {
                type: item.messageType === '1' ? 'input_text' : 'text',
                text: item.message,
                transcript: item.message,
              },
            ],
            formatted: {
              audio: {},
              text: item.message,
              transcript: item.message,
            },
          }
        })
        setItems(list)
        list.forEach((item) => {
          client.conversation.processEvent({
            type: 'conversation.item.created',
            event_id: item.id,
            item: {
              ...item,
            },
          })
        })
        let items = client.conversation.getItems()
        console.log('items', items)
      }
    })
  }

  //#endregion

  //#region  useEffect
  /**
   * Auto-scroll the event logs
   */
  useEffect(() => {
    if (eventsScrollRef.current) {
      const eventsEl = eventsScrollRef.current
      const scrollHeight = eventsEl.scrollHeight
      // Only scroll if height has just changed
      if (scrollHeight !== eventsScrollHeightRef.current) {
        eventsEl.scrollTop = scrollHeight
        eventsScrollHeightRef.current = scrollHeight
      }
    }
  }, [realtimeEvents])

  /**
   * Auto-scroll the conversation logs
   */
  useEffect(() => {
    const conversationEls = [].slice.call(
      document.body.querySelectorAll('[data-conversation-content]')
    )
    for (const el of conversationEls) {
      const conversationEl = el
      conversationEl.scrollTop = conversationEl.scrollHeight
    }
  }, [items])

  /**
   * Set up render loops for the visualization canvas
   */
  useEffect(() => {
    let isLoaded = true

    const wavRecorder = wavRecorderRef.current
    const clientCanvas = clientCanvasRef.current
    let clientCtx = null

    const wavStreamPlayer = wavStreamPlayerRef.current
    const serverCanvas = serverCanvasRef.current
    let serverCtx = null

    const render = () => {
      if (isLoaded) {
        if (clientCanvas) {
          if (!clientCanvas.width || !clientCanvas.height) {
            clientCanvas.width = clientCanvas.offsetWidth
            clientCanvas.height = clientCanvas.offsetHeight
          }
          clientCtx = clientCtx || clientCanvas.getContext('2d')
          if (clientCtx) {
            clientCtx.clearRect(0, 0, clientCanvas.width, clientCanvas.height)
            const result = wavRecorder.recording
              ? wavRecorder.getFrequencies('voice')
              : { values: new Float32Array([0]) }
            WavRenderer.drawBars(
              clientCanvas,
              clientCtx,
              result.values,
              '#0099ff',
              10,
              0,
              8
            )
          }
        }
        if (serverCanvas) {
          if (!serverCanvas.width || !serverCanvas.height) {
            serverCanvas.width = serverCanvas.offsetWidth
            serverCanvas.height = serverCanvas.offsetHeight
          }
          serverCtx = serverCtx || serverCanvas.getContext('2d')
          if (serverCtx) {
            serverCtx.clearRect(0, 0, serverCanvas.width, serverCanvas.height)
            const result = wavStreamPlayer.analyser
              ? wavStreamPlayer.getFrequencies('voice')
              : { values: new Float32Array([0]) }
            WavRenderer.drawBars(
              serverCanvas,
              serverCtx,
              result.values,
              '#009900',
              10,
              0,
              8
            )
          }
        }
        window.requestAnimationFrame(render)
      }
    }
    render()

    return () => {
      isLoaded = false
    }
  }, [])

  /**
   * Core RealtimeClient and audio capture setup
   * Set all of our instructions, tools, events and more
   */
  useEffect(() => {
    // Get refs
    const wavStreamPlayer = wavStreamPlayerRef.current
    const client = clientRef.current

    // Set instructions
    client.updateSession({ instructions: instructions })
    // Set transcription, otherwise we don't get user transcriptions back
    client.updateSession({ input_audio_transcription: { model: 'whisper-1' } })

    // handle realtime events from client + server for event logging
    client.on('realtime.event', (realtimeEvent) => {
      if (realtimeEvent.event.code === 400) {
        antdMessage.warning(realtimeEvent.event.message)
        disconnectConversation()
        return
      }
      setRealtimeEvents((realtimeEvents) => {
        const lastEvent = realtimeEvents[realtimeEvents.length - 1]
        if (lastEvent?.event.type === realtimeEvent.event.type) {
          // if we receive multiple events in a row, aggregate them for display purposes
          lastEvent.count = (lastEvent.count || 0) + 1
          return realtimeEvents.slice(0, -1).concat(lastEvent)
        } else {
          return realtimeEvents.concat(realtimeEvent)
        }
      })
    })
    client.on('error', (event) => console.error(event))
    client.on('conversation.interrupted', async () => {
      const trackSampleOffset = await wavStreamPlayer.interrupt()
      if (trackSampleOffset?.trackId) {
        const { trackId, offset } = trackSampleOffset
        await client.cancelResponse(trackId, offset)
      }
    })
    client.on('conversation.updated', async ({ item, delta }) => {
      const items = client.conversation.getItems()
      if (delta?.audio) {
        wavStreamPlayer.add16BitPCM(delta.audio, item.id)
      }
      if (item.status === 'completed' && item.formatted.audio?.length) {
        const wavFile = await WavRecorder.decode(
          item.formatted.audio,
          24000,
          24000
        )
        item.formatted.file = wavFile
      }
      setItems(items)
      isAddStart = true
    })

    setItems(client.conversation.getItems())
    handleSearch()

    return () => {
      // cleanup; resets to defaults
      client.reset()
    }
    // eslint-disable-next-line
  }, [])

  useEffect(() => {
    if (Array.isArray(items) && items.length > 0) {
      let lastItem = items[items.length - 1]

      let addIdHistoryIndex = addIdHistory.findIndex(
        (item) => item === lastItem.id
      )
      if (
        lastItem?.status === 'completed' &&
        lastItem?.role === 'assistant' &&
        isAddStart === true &&
        addIdHistoryIndex < 0
      ) {
        addIdHistory.push(lastItem.id)
        let message = items[items.length - 2].formatted.transcript
          ? items[items.length - 2].formatted.transcript
          : items[items.length - 2].formatted.text
        let robotMessage = lastItem.formatted.transcript
        Api.h5
          .chatRealTimeAdd({
            talkId: localStorage.getItem('talkId'),
            name: localStorage.getItem('nickname'),
            message,
            robotMessage,
          })
          .then((res) => {
            if (res.code === 40006) {
              antdMessage.warning(res.message)
              disconnectConversation()
            }
          })
      }
    }

    // eslint-disable-next-line
  }, [items, isAddStart])
  //#endregion

  return (
    <div className="m-realtime-wrap-box">
      <div className={`m-realtime-wrap-chat`}>
        <SinglePageHeader
          goBackPath="/ai/index/home/chatList"
          title="Realtime"
        ></SinglePageHeader>
        <div className="m-realtime-list" id="scrollableDiv">
          {window.platform === 'rn' ? null : (
            <Dropdown
              menu={{ items: getItems() }}
              className="m-realtime-dropdown"
              trigger={['click', 'hover']}
            >
              <Icon name="more" className="m-realtime-menu-btn"></Icon>
            </Dropdown>
          )}
          <div data-component="ConsolePage">
            <div className="content-main">
              <div className="content-logs">
                <div className="content-block events">
                  <div className="visualization">
                    <div className="visualization-entry client">
                      <canvas ref={clientCanvasRef} />
                    </div>
                    <div className="visualization-entry server">
                      <canvas ref={serverCanvasRef} />
                    </div>
                  </div>
                  <div className="content-block-body" ref={eventsScrollRef}>
                    {!realtimeEvents.length && `等待连接...`}
                    {realtimeEvents.map((realtimeEvent, i) => {
                      const count = realtimeEvent.count
                      const event = { ...realtimeEvent.event }
                      if (event.type === 'input_audio_buffer.append') {
                        event.audio = `[trimmed: ${event.audio.length} bytes]`
                      } else if (event.type === 'response.audio.delta') {
                        event.delta = `[trimmed: ${event.delta.length} bytes]`
                      }
                      return (
                        <div className="event" key={event.event_id}>
                          <div className="event-timestamp">
                            {formatTime(realtimeEvent.time)}
                          </div>
                          <div className="event-details">
                            <div
                              className="event-summary"
                              onClick={() => {
                                // toggle event details
                                const id = event.event_id
                                const expanded = { ...expandedEvents }
                                if (expanded[id]) {
                                  delete expanded[id]
                                } else {
                                  expanded[id] = true
                                }
                                setExpandedEvents(expanded)
                              }}
                            >
                              <div
                                className={`event-source ${
                                  event.type === 'error'
                                    ? 'error'
                                    : realtimeEvent.source
                                }`}
                              >
                                {realtimeEvent.source === 'client' ? (
                                  <ArrowUp />
                                ) : (
                                  <ArrowDown />
                                )}
                                <span>
                                  {event.type === 'error'
                                    ? 'error!'
                                    : realtimeEvent.source}
                                </span>
                              </div>
                              <div className="event-type">
                                {event.type}
                                {count && ` (${count})`}
                              </div>
                            </div>
                            {!!expandedEvents[event.event_id] && (
                              <div className="event-payload">
                                {JSON.stringify(event, null, 2)}
                              </div>
                            )}
                          </div>
                        </div>
                      )
                    })}
                  </div>
                </div>
                <div className="content-block conversation">
                  <div className="content-block-body" data-conversation-content>
                    {!items.length && `等待连接...`}
                    {items.map((conversationItem, i) => {
                      return (
                        <div
                          className="conversation-item"
                          key={conversationItem.id}
                        >
                          <div
                            className={`speaker ${conversationItem.role || ''}`}
                          >
                            <div>
                              {(
                                conversationItem.role || conversationItem.type
                              ).replaceAll('_', ' ')}
                            </div>
                            <div
                              className="close"
                              onClick={() =>
                                deleteConversationItem(conversationItem.id)
                              }
                            >
                              <X />
                            </div>
                          </div>
                          <div className={`speaker-content`}>
                            {/* tool response */}
                            {conversationItem.type ===
                              'function_call_output' && (
                              <div>{conversationItem.formatted.output}</div>
                            )}
                            {/* tool call */}
                            {!!conversationItem.formatted.tool && (
                              <div>
                                {conversationItem.formatted.tool.name}(
                                {conversationItem.formatted.tool.arguments})
                              </div>
                            )}
                            {!conversationItem.formatted.tool &&
                              conversationItem.role === 'user' && (
                                <div className="m-realtime-message">
                                  {conversationItem.formatted.transcript ||
                                    (conversationItem.formatted.audio?.length
                                      ? '(awaiting transcript)'
                                      : conversationItem.formatted.text ||
                                        '(item sent)')}
                                </div>
                              )}
                            {!conversationItem.formatted.tool &&
                              conversationItem.role === 'assistant' && (
                                <div className="m-realtime-message">
                                  {conversationItem.formatted.transcript ||
                                    conversationItem.formatted.text ||
                                    '(truncated)'}
                                </div>
                              )}
                            {conversationItem.formatted.file && (
                              <audio
                                src={conversationItem.formatted.file.url}
                                controls
                              />
                            )}
                          </div>
                        </div>
                      )
                    })}
                  </div>
                </div>
                <div className="content-actions">
                  <Select
                    value={messageType}
                    onChange={(value) => changeTurnEndType(value)}
                    placeholder="请选择"
                  >
                    <Option value="none">手动</Option>
                    <Option value="server_vad">自动</Option>
                    <Option value="input">打字</Option>
                  </Select>
                  <div className="spacer" />
                  {isConnected && canPushToTalk && (
                    <>
                      {isPCFlag ? (
                        <Button
                          type="primary"
                          label={
                            isRecording ? 'release to send' : 'push to talk'
                          }
                          disabled={!isConnected || !canPushToTalk}
                          onMouseDown={startRecording}
                          onMouseUp={stopRecording}
                          className={`m-realtime-recorad-btn ${
                            isRecording ? 'active' : ''
                          }`}
                        >
                          {isRecording ? '松开发送' : '按住说话'}
                        </Button>
                      ) : (
                        <Button
                          type="primary"
                          label={
                            isRecording ? 'release to send' : 'push to talk'
                          }
                          disabled={!isConnected || !canPushToTalk}
                          onTouchStart={startRecording}
                          onTouchEnd={stopRecording}
                          className={`m-realtime-recorad-btn ${
                            isRecording ? 'active' : ''
                          }`}
                        >
                          {isRecording ? '松开发送' : '按住说话'}
                        </Button>
                      )}
                    </>
                  )}
                  {isConnected && messageType === 'input' ? (
                    <div className="m-realtime-input-wrap">
                      <Input.TextArea
                        value={message}
                        onChange={(event) => handleMessage(event)}
                        placeholder="请输入"
                      ></Input.TextArea>
                      <Button
                        type="primary"
                        onClick={() => handleTest()}
                        className="m-realtime-send-btn"
                      >
                        发送
                      </Button>
                    </div>
                  ) : null}
                  <div className="spacer" />
                  <Button
                    type="primary"
                    danger={isConnected ? true : false}
                    onClick={
                      isConnected ? disconnectConversation : connectConversation
                    }
                  >
                    {isConnected ? '已连接' : '连接'}
                  </Button>
                </div>
              </div>
            </div>
          </div>
        </div>
      </div>
    </div>
  )
}

const mapStateToProps = (state) => {
  return {
    collapsed: state.getIn(['light', 'collapsed']),
    isRNGotToken: state.getIn(['light', 'isRNGotToken']),
  }
}

const mapDispatchToProps = (dispatch) => {
  return {
    onSetState(key, value) {
      dispatch({ type: 'SET_LIGHT_STATE', key, value })
    },
    onDispatch(action) {
      dispatch(action)
    },
  }
}

export default connect(mapStateToProps, mapDispatchToProps)(withRouter(Index))

后端通过请求头获取token

javascript 复制代码
  async handleUserAuth(req) {
    let index = req.rawHeaders.findIndex((item) =>
      item.includes('realtime, openai-insecure-api-key.')
    )
    let infoValue = ''
    if (index >= 0) {
      infoValue = req.rawHeaders[index]
    }
    infoValue = infoValue.replace('realtime, openai-insecure-api-key.', '')
    infoValue = infoValue.replace(', openai-beta.realtime-v1', '')
    let infoValueArr = infoValue.split('divide')
    let realTimeAuthRes = await axios.post(
      `${baseURL}/api/light/chat/realTimeAuth`,
      {
        token: infoValueArr[0],
        talkId: infoValueArr[1],
        apiKey,
      }
    )
    return realTimeAuthRes
  }

后端完整代码

relay.js:

javascript 复制代码
const { WebSocketServer } = require('ws')
const axios = require('axios')

let baseURL = process.env.aliIPAddressWithPort
let apiKey = process.env.apiKeyOnServer

class RealtimeRelay {
  constructor(apiKey) {
    this.apiKey = apiKey
    this.sockets = new WeakMap()
    this.wss = null
  }

  listen(port) {
    this.wss = new WebSocketServer({ port })
    this.wss.on('connection', this.connectionHandler.bind(this))
    this.log(`Listening on ws://localhost:${port}`)
  }

  async handleUserAuth(req) {
    let index = req.rawHeaders.findIndex((item) =>
      item.includes('realtime, openai-insecure-api-key.')
    )
    let infoValue = ''
    if (index >= 0) {
      infoValue = req.rawHeaders[index]
    }
    infoValue = infoValue.replace('realtime, openai-insecure-api-key.', '')
    infoValue = infoValue.replace(', openai-beta.realtime-v1', '')
    let infoValueArr = infoValue.split('divide')
    let realTimeAuthRes = await axios.post(
      `${baseURL}/api/light/chat/realTimeAuth`,
      {
        token: infoValueArr[0],
        talkId: infoValueArr[1],
        apiKey,
      }
    )
    return realTimeAuthRes
  }

  async connectionHandler(ws, req) {
    if (global.isAzure) {
      let realTimeAuthRes = await this.handleUserAuth(req)
      if (realTimeAuthRes.data.code === 200) {
        let Realtime = await import('@openai/realtime-api-beta')
        const { RealtimeClient } = Realtime
        if (!req.url) {
          this.log('No URL provided, closing connection.')
          ws.close()
          return
        }

        const url = new URL(req.url, `http://${req.headers.host}`)
        const pathname = url.pathname

        if (pathname !== '/') {
          this.log(`Invalid pathname: "${pathname}"`)
          ws.close()
          return
        }

        // Instantiate new client
        this.log(`Connecting with key "${this.apiKey.slice(0, 3)}..."`)
        const client = new RealtimeClient({ apiKey: this.apiKey })

        // Relay: OpenAI Realtime API Event -> Browser Event
        client.realtime.on('server.*', (event) => {
          this.log(`Relaying "${event.type}" to Client`)
          ws.send(JSON.stringify(event))
        })
        client.realtime.on('close', () => ws.close())

        // Relay: Browser Event -> OpenAI Realtime API Event
        // We need to queue data waiting for the OpenAI connection
        const messageQueue = []
        const messageHandler = (data) => {
          try {
            const event = JSON.parse(data)
            this.log(`Relaying "${event.type}" to OpenAI`)
            client.realtime.send(event.type, event)
          } catch (e) {
            console.error(e.message)
            this.log(`Error parsing event from client: ${data}`)
          }
        }
        ws.on('message', (data) => {
          if (!client.isConnected()) {
            messageQueue.push(data)
          } else {
            messageHandler(data)
          }
        })
        ws.on('close', () => client.disconnect())

        // Connect to OpenAI Realtime API
        try {
          this.log(`Connecting to OpenAI...`)
          await client.connect()
        } catch (e) {
          this.log(`Error connecting to OpenAI: ${e.message}`)
          ws.close()
          return
        }
        this.log(`Connected to OpenAI successfully!`)
        while (messageQueue.length) {
          messageHandler(messageQueue.shift())
        }
      } else {
        ws.send(
          JSON.stringify({
            ...realTimeAuthRes.data,
          })
        )
      }
    }
  }

  // eslint-disable-next-line
  log(...args) {
    // console.log(`[RealtimeRelay]`, ...args)
  }
}

module.exports = {
  RealtimeRelay,
}

调用上面的代码:

javascript 复制代码
  const relay = new RealtimeRelay(process.env.openaiToken)
  relay.listen(PORT)

人工智能学习网站

https://chat.xutongbao.top

相关推荐
@_猿来如此2 小时前
Web网页制作之JavaScript的应用
前端·javascript·css·html·html5
Excel_VBA表格จุ๊บ5 小时前
wps宏js接入AI功能和接入翻译功能
javascript·wps·js宏
豪宇刘6 小时前
JavaScript 延迟加载的方法
开发语言·javascript
摇光936 小时前
js迭代器模式
开发语言·javascript·迭代器模式
anyup_前端梦工厂8 小时前
了解 ES6 的变量特性:Var、Let、Const
开发语言·javascript·ecmascript
每天都要进步哦9 小时前
Node.js中的fs模块:文件与目录操作(写入、读取、复制、移动、删除、重命名等)
前端·javascript·node.js
布兰妮甜9 小时前
Three.js 渲染技术:打造逼真3D体验的幕后功臣
javascript·3d·three.js·幕后
仿生狮子9 小时前
CSS Layer、Tailwind 和 sass 如何共存?
javascript·css·vue.js
在路上`9 小时前
vue3使用AntV X6 (图可视化引擎)历程[二]
javascript·vue.js
我命由我1234510 小时前
CesiumJS 案例 P34:场景视图(3D 视图、2D 视图)
前端·javascript·3d·前端框架·html·html5·js