dataelement / bisheng

Bisheng is an open LLM devops platform for next generation AI applications.

Home Page:https://bisheng.dataelem.com/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

是否支持接入xinference部署的本地模型

linqingxu opened this issue · comments

尝试用customLLMChat接入xinference部署的chatglm2-6b模型,问答时出错,是否不支持xinference部署的本地模型?
image
问答:
image
webUI问答:
image

如果是api调用,是都支持的。
目前看报错是Method Not Allowd, customLLM默认是用post传参的。可以先通过PostMan等工具测试api是否正常,然后再用CustomLLM

url应该是要加上v1/chat/completions之类的

已经解决了,xinference部署的模型服务如果走customLLMChat,会在validate_environment里加上infer,导致模型服务访问失败。新增xinference组件后实现对应功能。
image

求教一下您是怎么改的,我也删除了infer貌似没用

参考:https://dataelem.feishu.cn/wiki/NcOqw2QpzicHvqkmaUqcJjNpnEf 示例2

from typing import Optional, Union
from langchain.llms import BaseLLM
from langchain_community.chat_models.openai import ChatOpenAI

from bisheng import CustomComponent
from bisheng.field_typing import BaseLanguageModel, NestedDict
import openai
import httpx

class ChatOpenAIComponent(CustomComponent):
display_name = "ChatOpenAI-cus"
description = "OpenAI Chat large language models API."

def build_config(self):
    return {
        "max_tokens": {
            "display_name": "Max Tokens",
            "field_type": "int",
            "advanced": False,
            "required": False,
        },
        "model_kwargs": {
            "display_name": "Model Kwargs",
            "field_type": "NestedDict",
            "advanced": True,
            "required": False,
        },
        "model_name": {
            "display_name": "Model Name",
            "field_type": "str",
            "advanced": False,
            "required": False,
            "options": [
                "gpt-4-turbo-preview",
                "gpt-4-0125-preview",
                "gpt-4-1106-preview",
                "gpt-4-vision-preview",
                "gpt-3.5-turbo-0125",
                "gpt-3.5-turbo-1106",
                "qwen1.5-chat",
            ],
        },
        "openai_api_base": {
            "display_name": "OpenAI API Base",
            "field_type": "str",
            "advanced": False,
            "required": False,
            "info": (
                "The base URL of the OpenAI API. Defaults to https://api.openai.com/v1.\n\n"
                "You can change this to use other APIs like JinaChat, LocalAI and Prem."
            ),
        },
        "openai_api_key": {
            "display_name": "OpenAI API Key",
            "field_type": "str",
            "advanced": False,
            "required": False,
            "password": True,
        },
        "openai_proxy": {
            "display_name": "OpenAI Proxy",
            "field_type": "str",
            "advanced": False,
            "required": False,
            "password": True,
        },
        "temperature": {
            "display_name": "Temperature",
            "field_type": "float",
            "advanced": False,
            "required": False,
            "value": 0.7,
        },
    }

def build(
    self,
    max_tokens: Optional[int] = 512,
    model_kwargs: NestedDict = {},
    model_name: str = "qwen1.5-chat",
    openai_api_base: Optional[str] = "http://192.168.1.74:9997/v1",
    openai_api_key: Optional[str] = "not used actually",
    temperature: float = 0.7,
    openai_proxy = None
) -> Union[BaseLanguageModel, BaseLLM]:
    if not openai_api_base:
        openai_api_base = "http://192.168.1.74:9997/v1"
    client_param ={"api_key": openai_api_key}
    client_param['base_url'] = openai_api_base
    client = openai.OpenAI(**client_param).chat.completions
    client_async = openai.AsyncOpenAI(**client_param).chat.completions
    return ChatOpenAI(
        max_tokens=max_tokens,
        model_kwargs=model_kwargs,
        model=model_name,
        base_url=openai_api_base,
        api_key=openai_api_key,
        temperature=temperature,
        streaming=True,
        client = client,
        async_client = client_async
    )