Implement huggingface provider to support running open source models

Question

Implement huggingface provider to support running open source models

matankley opened this issue a year ago · comments

Matan Kleyman commented a year ago

Implement HuggingfaceLLM that inherits from BaseLLM
Implement HuggingfaceOperator that inherits from BaseOperator

Tanjiro · Answer 1 · Wed Oct 04 2023 01:37:47 GMT+0800 (China Standard Time)

I will take this up. @matankley assign it to me

Matan Kleyman · Answer 2 · Wed Oct 04 2023 03:31:51 GMT+0800 (China Standard Time)

Of course! Let me know how it goes and how I can help!

Tanjiro · Answer 3 · Fri Oct 06 2023 00:09:22 GMT+0800 (China Standard Time)

Almost, done with implementation, during testing, I observed HuggingFace LLM unlike OpenAI models will be on the local system, which need to be initialized every time a function will run, so should we think of architecture, where we deploy it first in optimized manner, then call it up like openai, any thoughts @matankley

Tran Hoang Nguyen · Answer 4 · Wed Oct 18 2023 13:50:22 GMT+0800 (China Standard Time)

@Tushar-ml One way for users to make use of OS models is through Python-llama-cpp server. Basically, it's an FastAPI server that is structured in a OpenAI-like manner. One way I like to use it is to deploy a server instance on Google Colab then tunnel it through to a Ngrok public URL.

Currently, it's not compatible with declarai:

import openai, os
from declarai import Declarai

# Ngrok public URL
openai.api_base = "https://7ffb-34-90-201-189.ngrok-free.app/v1"

gpt_35 = Declarai.openai(
    openai_token='fake_key',
    model="openhermes-2-mistral-7b",
)

@gpt_35.task
def rank_by_severity(message: str) -> int:
    """
    Rank the severity of the provided message by it's urgency.
    Urgency is ranked on a scale of 1-5, with 5 being the most urgent.
    :param message: The message to rank
    :return: The urgency of the message
    """

rank_by_severity(message="The server is down!")

Error

---------------------------------------------------------------------------
APIError                                  Traceback (most recent call last)
[<ipython-input-31-c7c93575494a>](https://localhost:8080/#) in <cell line: 20>()
     18     """
     19 
---> 20 rank_by_severity(message="The server is down!")

9 frames
[/usr/local/lib/python3.10/dist-packages/declarai/task.py](https://localhost:8080/#) in __call__(self, llm_params, **kwargs)
    176 
    177         self._call_kwargs = kwargs
--> 178         return self._exec_middlewares(kwargs)
    179 
    180 

[/usr/local/lib/python3.10/dist-packages/declarai/task.py](https://localhost:8080/#) in _exec_middlewares(self, kwargs)
    154             if exec_with_middlewares:
    155                 return exec_with_middlewares()
--> 156         return self._exec(kwargs)
    157 
    158     def __call__(

[/usr/local/lib/python3.10/dist-packages/declarai/task.py](https://localhost:8080/#) in _exec(self, kwargs)
    144             return self.llm_stream_response
    145         else:
--> 146             self.llm_response = self.operator.predict(**kwargs)
    147             return self.operator.parse_output(self.llm_response.response)
    148 

[/usr/local/lib/python3.10/dist-packages/declarai/operators/operator.py](https://localhost:8080/#) in predict(self, llm_params, **kwargs)
    107             llm_params["stream"] = self.streaming  # streaming should be the last param
    108         # provided params during execution should override the ones provided during initialization
--> 109         return self.llm.predict(**self.compile(**kwargs), **llm_params)
    110 
    111     def parse_output(self, output: str) -> Any:

[/usr/local/lib/python3.10/dist-packages/declarai/operators/openai_operators/openai_llm.py](https://localhost:8080/#) in predict(self, messages, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty, stream)
    101             stream = self.stream
    102         openai_messages = [{"role": m.role, "content": m.message} for m in messages]
--> 103         res = self.openai.ChatCompletion.create(
    104             model=model or self.model,
    105             messages=openai_messages,

[/usr/local/lib/python3.10/dist-packages/openai/api_resources/chat_completion.py](https://localhost:8080/#) in create(cls, *args, **kwargs)
     23         while True:
     24             try:
---> 25                 return super().create(*args, **kwargs)
     26             except TryAgain as e:
     27                 if timeout is not None and time.time() > start + timeout:

[/usr/local/lib/python3.10/dist-packages/openai/api_resources/abstract/engine_api_resource.py](https://localhost:8080/#) in create(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)
    153         response, _, api_key = requestor.request(
    154             "post",
--> 155             url,
    156             params=params,
    157             headers=headers,

[/usr/local/lib/python3.10/dist-packages/openai/api_requestor.py](https://localhost:8080/#) in request(self, method, url, params, headers, files, stream, request_id, request_timeout)
    297         )
    298         resp, got_stream = self._interpret_response(result, stream)
--> 299         return resp, got_stream, self.api_key
    300 
    301     @overload

[/usr/local/lib/python3.10/dist-packages/openai/api_requestor.py](https://localhost:8080/#) in _interpret_response(self, result, stream)
    708 
    709     async def _interpret_async_response(
--> 710         self, result: aiohttp.ClientResponse, stream: bool
    711     ) -> Tuple[Union[OpenAIResponse, AsyncGenerator[OpenAIResponse, None]], bool]:
    712         """Returns the response(s) and a bool indicating whether it is a stream."""

[/usr/local/lib/python3.10/dist-packages/openai/api_requestor.py](https://localhost:8080/#) in _interpret_response_line(self, rbody, rcode, rheaders, stream)
    773     user_set_session = openai.aiosession.get()
    774     if user_set_session:
--> 775         yield user_set_session
    776     else:
    777         async with aiohttp.ClientSession() as session:

APIError: [{'type': 'bool_type', 'loc': ('body', 'stream'), 'msg': 'Input should be a valid boolean', 'input': None, 'url': 'https://errors.pydantic.dev/2.4/v/bool_type'}] {"error":{"message":"[{'type': 'bool_type', 'loc': ('body', 'stream'), 'msg': 'Input should be a valid boolean', 'input': None, 'url': 'https://errors.pydantic.dev/2.4/v/bool_type'}]","type":"internal_server_error","param":null,"code":null}} 500 {'error': {'message': "[{'type': 'bool_type', 'loc': ('body', 'stream'), 'msg': 'Input should be a valid boolean', 'input': None, 'url': 'https://errors.pydantic.dev/2.4/v/bool_type'}]", 'type': 'internal_server_error', 'param': None, 'code': None}} {'Content-Length': '241', 'Content-Type': 'application/json', 'Date': 'Wed, 18 Oct 2023 05:46:20 GMT', 'Ngrok-Trace-Id': 'bd8b4b68bc02669aeeb4eeda1585223c', 'Server': 'uvicorn', 'X-Request-Id': '381e26ff1852403e8f14f4ef302b17dc'}