RAG(检索增强生成)落地:基于阿里云opensearch视线智能问答机器人与企业知识库

文章目录

一、环境准备

bash 复制代码
# 准备python环境
conda create -n opensearch
conda activate opensearch

# 安装必要的包
pip install alibabacloud_tea_util
pip install alibabacloud_opensearch_util
pip install alibabacloud_credentials

二、阿里云opensearch准备

1、产品文档

新手指引:三步搭建智能问答机器人

需要购买LLM智能问答版的实例。

准备好accesskey、secret、API Key

2、准备我们的数据

3、上传文件

三、对接

1、对接文本问答

py 复制代码
# -*- coding: utf-8 -*-

import time
from typing import Dict, Any

from Tea.core import TeaCore
from Tea.exceptions import TeaException, UnretryableException
from Tea.model import TeaModel
from Tea.request import TeaRequest
from alibabacloud_credentials import models as credential_models
from alibabacloud_credentials.client import Client as CredentialClient
from alibabacloud_opensearch_util.opensearch_util import OpensearchUtil
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient


class Config(TeaModel):
    """
 Config
 用于配置环境相关参数信息.
 """

    def __init__(
            self,
            endpoint: str = None,
            protocol: str = None,
            type: str = None,
            security_token: str = None,
            access_key_id: str = None,
            access_key_secret: str = None,
            user_agent: str = "",
    ):
        self.endpoint = endpoint
        self.protocol = protocol
        self.type = type
        self.security_token = security_token
        self.access_key_id = access_key_id
        self.access_key_secret = access_key_secret
        self.user_agent = user_agent


class Client:
    """
    OpensearchClient
    用于 opensearch Client 请求 参数组装及发送请求.
    """
    _endpoint: str = None
    _protocol: str = None
    _user_agent: str = None
    _credential: CredentialClient = None

    def __init__(
            self,
            config: Config,
    ):
        if UtilClient.is_unset(config):
            raise TeaException({
                'name': 'ParameterMissing',
                'message': "'config' can not be unset"
            })
        if UtilClient.empty(config.type):
            config.type = 'access_key'
        credential_config = credential_models.Config(
            access_key_id=config.access_key_id,
            type=config.type,
            access_key_secret=config.access_key_secret,
            security_token=config.security_token
        )
        self._credential = CredentialClient(credential_config)
        self._endpoint = config.endpoint
        self._protocol = config.protocol
        self._user_agent = config.user_agent

    def _request(
            self,
            method: str,
            pathname: str,
            query: Dict[str, Any],
            headers: Dict[str, str],
            body: Any,
            runtime: util_models.RuntimeOptions,
    ) -> Dict[str, Any]:
        """
        执行 TeaRequest .
        :param request: TeaRequest
        :param runtime: util_models.RuntimeOptions
        :return: Dict[str, Any]
        """
        runtime.validate()
        _runtime = {
            'timeouted': 'retry',
            'readTimeout': runtime.read_timeout,
            'connectTimeout': runtime.connect_timeout,
            'httpProxy': runtime.http_proxy,
            'httpsProxy': runtime.https_proxy,
            'noProxy': runtime.no_proxy,
            'maxIdleConns': runtime.max_idle_conns,
            'retry': {
                'retryable': runtime.autoretry,
                'maxAttempts': UtilClient.default_number(runtime.max_attempts, 3)
            },
            'backoff': {
                'policy': UtilClient.default_string(runtime.backoff_policy, 'no'),
                'period': UtilClient.default_number(runtime.backoff_period, 1)
            },
            'ignoreSSL': runtime.ignore_ssl
        }
        _last_request = None
        _last_exception = None
        _now = time.time()
        _retry_times = 0
        while TeaCore.allow_retry(_runtime.get('retry'), _retry_times, _now):
            if _retry_times > 0:
                _backoff_time = TeaCore.get_backoff_time(_runtime.get('backoff'), _retry_times)
                if _backoff_time > 0:
                    TeaCore.sleep(_backoff_time)
            _retry_times = _retry_times + 1
            try:
                _request = TeaRequest()
                accesskey_id = self._credential.get_access_key_id()
                access_key_secret = self._credential.get_access_key_secret()
                security_token = self._credential.get_security_token()
                _request.protocol = UtilClient.default_string(self._protocol, 'HTTP')
                _request.method = method
                _request.pathname = pathname
                _request.headers = TeaCore.merge({
                    'user-agent': UtilClient.get_user_agent(self._user_agent),
                    'Content-Type': 'application/json',
                    'Date': OpensearchUtil.get_date(),
                    'host': UtilClient.default_string(self._endpoint, f'opensearch-cn-hangzhou.aliyuncs.com'),
                    'X-Opensearch-Nonce': UtilClient.get_nonce()
                }, headers)
                if not UtilClient.is_unset(query):
                    _request.query = UtilClient.stringify_map_value(query)
                if not UtilClient.is_unset(body):
                    req_body = UtilClient.to_jsonstring(body)
                    _request.headers['Content-MD5'] = OpensearchUtil.get_content_md5(req_body)
                    _request.body = req_body
                if not UtilClient.is_unset(security_token):
                    _request.headers["X-Opensearch-Security-Token"] = security_token
                _request.headers['Authorization'] = OpensearchUtil.get_signature(_request, accesskey_id,
                                                                                 access_key_secret)
                _last_request = _request
                _response = TeaCore.do_action(_request, _runtime)
                obj_str = UtilClient.read_as_string(_response.body)
                if UtilClient.is_4xx(_response.status_code) or UtilClient.is_5xx(_response.status_code):
                    raise TeaException({
                        'message': _response.status_message,
                        'data': obj_str,
                        'code': _response.status_code
                    })
                obj = UtilClient.parse_json(obj_str)
                res = UtilClient.assert_as_map(obj)
                return {
                    'body': res,
                    'headers': _response.headers
                }
            except TeaException as e:
                if TeaCore.is_retryable(e):
                    _last_exception = e
                    continue
                raise e
        raise UnretryableException(_last_request, _last_exception)
py 复制代码
# -*- coding: utf-8 -*-

import time, os
from typing import Dict, Any

from Tea.exceptions import TeaException
from Tea.request import TeaRequest
from alibabacloud_tea_util import models as util_models
from BaseRequest import Config, Client


class LLMSearch:
    def __init__(self, config: Config):
        self.Clients = Client(config=config)
        self.runtime = util_models.RuntimeOptions(
            connect_timeout=10000,
            read_timeout=90000,
            autoretry=False,
            ignore_ssl=False,
            max_idle_conns=50,
            max_attempts=3
        )
        self.header = {}

    def searchDoc(self, app_name: str, body: Dict, query_params: dict = {}) -> Dict[str, Any]:
        try:
            response = self.Clients._request(method="POST",
                                             pathname=f'/v3/openapi/apps/{app_name}/actions/knowledge-search',
                                             query=query_params, headers=self.header, body=body, runtime=self.runtime)
            return response
        except TeaException as e:
            print(e)


if __name__ == "__main__":
    # 配置统一的请求入口和  需要去掉http://
    endpoint = "xxxxx-wm3.opensearch-cn-shanghai.aliyuncs.com"

    # 支持 protocol 配置 HTTPS/HTTP
    endpoint_protocol = "HTTP"

    # 用户识别信息
    # 从环境变量读取配置的AccessKey ID和AccessKey Secret,
    # 运行代码示例前必须先配置环境变量,参考文档上面"配置环境变量"步骤
    #access_key_id = os.environ.get("ALIBABA_CLOUD_ACCESS_KEY_ID")
    #access_key_secret = os.environ.get("ALIBABA_CLOUD_ACCESS_KEY_SECRET")

    access_key_id = 'xxxxx'
    access_key_secret = 'xxxxx'

    # 支持 type 配置 sts/access_key 鉴权. 其中 type 默认为 access_key 鉴权. 使用 sts 可配置 RAM-STS 鉴权.
    # 备选参数为:  sts 或者 access_key
    auth_type = "access_key"

    # 如果使用 RAM-STS 鉴权, 请配置 security_token, 可使用 阿里云 AssumeRole 获取 相关 STS 鉴权结构.
    security_token = "OS-xxxxx"

    # 配置请求使用的通用信息.
    # type和security_token 参数如果不是子账号,需要省略
    Configs = Config(endpoint=endpoint, access_key_id=access_key_id, access_key_secret=access_key_secret,
                     security_token=security_token, type=auth_type, protocol=endpoint_protocol)

    # 创建 opensearch 实例
    # 请将<应用名称>替换为您创建的智能问答版实例名称
    ops = LLMSearch(Configs)
    app_name = "test"

    # --------------- 文档搜索 ---------------

    docQuery = {
        "question": {
            "text": "袜子怎么卖的",  # 用户问题
            #"session" : "对话的session,设置了之后,会有多轮对话的功能",
            "type": "TEXT"
        }
    }

    res1 = ops.searchDoc(app_name=app_name, body=docQuery)
    r2 = dict(res1.get('body')).get('result').get('data')[0].get('answer')

    res = r2
    print(res)
相关推荐
寻月隐君7 分钟前
Python 数据结构与算法:课程笔记与实战解析
后端·python·github
红队it23 分钟前
【数据分析大屏】基于Django+Vue汽车销售数据分析可视化大屏(完整系统源码+数据库+开发笔记+详细部署教程+虚拟机分布式启动教程)✅
python·数据分析·spark·汽车·大屏端
蹦蹦跳跳真可爱58931 分钟前
Python----计算机视觉处理(opencv:图片灰度化)
人工智能·python·opencv·计算机视觉
codingPower1 小时前
IDEA接入阿里云百炼中免费的通义千问[2025版]
java·阿里云·intellij-idea
HelloGitHub1 小时前
经过 10 亿级性能验证的隐私计算开源利器
python·开源·github
一号言安2 小时前
牛客python蓝桥杯11-32(自用)
开发语言·python
梦丶晓羽2 小时前
自然语言处理:主题模型
人工智能·python·自然语言处理·lda·主题模型
weixin_525936332 小时前
Python数据分析之机器学习基础
python·机器学习·数据分析
apcipot_rain2 小时前
【密码学——基础理论与应用】李子臣编著 第三章 分组密码 课后习题
python·算法·密码学
慕丹2 小时前
项目工坊 | Python驱动淘宝信息爬虫
爬虫·python·selenium