Implement huggingface provider to support running open source models
matankley opened this issue · comments
- Implement HuggingfaceLLM that inherits from BaseLLM
- Implement HuggingfaceOperator that inherits from BaseOperator
I will take this up. @matankley assign it to me
Of course! Let me know how it goes and how I can help!
Almost, done with implementation, during testing, I observed HuggingFace LLM unlike OpenAI models will be on the local system, which need to be initialized every time a function will run, so should we think of architecture, where we deploy it first in optimized manner, then call it up like openai, any thoughts @matankley
@Tushar-ml One way for users to make use of OS models is through Python-llama-cpp server. Basically, it's an FastAPI server that is structured in a OpenAI-like manner. One way I like to use it is to deploy a server instance on Google Colab then tunnel it through to a Ngrok public URL.
Currently, it's not compatible with declarai:
import openai, os
from declarai import Declarai
# Ngrok public URL
openai.api_base = "https://7ffb-34-90-201-189.ngrok-free.app/v1"
gpt_35 = Declarai.openai(
openai_token='fake_key',
model="openhermes-2-mistral-7b",
)
@gpt_35.task
def rank_by_severity(message: str) -> int:
"""
Rank the severity of the provided message by it's urgency.
Urgency is ranked on a scale of 1-5, with 5 being the most urgent.
:param message: The message to rank
:return: The urgency of the message
"""
rank_by_severity(message="The server is down!")
Error
---------------------------------------------------------------------------
APIError Traceback (most recent call last)
[<ipython-input-31-c7c93575494a>](https://localhost:8080/#) in <cell line: 20>()
18 """
19
---> 20 rank_by_severity(message="The server is down!")
9 frames
[/usr/local/lib/python3.10/dist-packages/declarai/task.py](https://localhost:8080/#) in __call__(self, llm_params, **kwargs)
176
177 self._call_kwargs = kwargs
--> 178 return self._exec_middlewares(kwargs)
179
180
[/usr/local/lib/python3.10/dist-packages/declarai/task.py](https://localhost:8080/#) in _exec_middlewares(self, kwargs)
154 if exec_with_middlewares:
155 return exec_with_middlewares()
--> 156 return self._exec(kwargs)
157
158 def __call__(
[/usr/local/lib/python3.10/dist-packages/declarai/task.py](https://localhost:8080/#) in _exec(self, kwargs)
144 return self.llm_stream_response
145 else:
--> 146 self.llm_response = self.operator.predict(**kwargs)
147 return self.operator.parse_output(self.llm_response.response)
148
[/usr/local/lib/python3.10/dist-packages/declarai/operators/operator.py](https://localhost:8080/#) in predict(self, llm_params, **kwargs)
107 llm_params["stream"] = self.streaming # streaming should be the last param
108 # provided params during execution should override the ones provided during initialization
--> 109 return self.llm.predict(**self.compile(**kwargs), **llm_params)
110
111 def parse_output(self, output: str) -> Any:
[/usr/local/lib/python3.10/dist-packages/declarai/operators/openai_operators/openai_llm.py](https://localhost:8080/#) in predict(self, messages, model, temperature, max_tokens, top_p, frequency_penalty, presence_penalty, stream)
101 stream = self.stream
102 openai_messages = [{"role": m.role, "content": m.message} for m in messages]
--> 103 res = self.openai.ChatCompletion.create(
104 model=model or self.model,
105 messages=openai_messages,
[/usr/local/lib/python3.10/dist-packages/openai/api_resources/chat_completion.py](https://localhost:8080/#) in create(cls, *args, **kwargs)
23 while True:
24 try:
---> 25 return super().create(*args, **kwargs)
26 except TryAgain as e:
27 if timeout is not None and time.time() > start + timeout:
[/usr/local/lib/python3.10/dist-packages/openai/api_resources/abstract/engine_api_resource.py](https://localhost:8080/#) in create(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)
153 response, _, api_key = requestor.request(
154 "post",
--> 155 url,
156 params=params,
157 headers=headers,
[/usr/local/lib/python3.10/dist-packages/openai/api_requestor.py](https://localhost:8080/#) in request(self, method, url, params, headers, files, stream, request_id, request_timeout)
297 )
298 resp, got_stream = self._interpret_response(result, stream)
--> 299 return resp, got_stream, self.api_key
300
301 @overload
[/usr/local/lib/python3.10/dist-packages/openai/api_requestor.py](https://localhost:8080/#) in _interpret_response(self, result, stream)
708
709 async def _interpret_async_response(
--> 710 self, result: aiohttp.ClientResponse, stream: bool
711 ) -> Tuple[Union[OpenAIResponse, AsyncGenerator[OpenAIResponse, None]], bool]:
712 """Returns the response(s) and a bool indicating whether it is a stream."""
[/usr/local/lib/python3.10/dist-packages/openai/api_requestor.py](https://localhost:8080/#) in _interpret_response_line(self, rbody, rcode, rheaders, stream)
773 user_set_session = openai.aiosession.get()
774 if user_set_session:
--> 775 yield user_set_session
776 else:
777 async with aiohttp.ClientSession() as session:
APIError: [{'type': 'bool_type', 'loc': ('body', 'stream'), 'msg': 'Input should be a valid boolean', 'input': None, 'url': 'https://errors.pydantic.dev/2.4/v/bool_type'}] {"error":{"message":"[{'type': 'bool_type', 'loc': ('body', 'stream'), 'msg': 'Input should be a valid boolean', 'input': None, 'url': 'https://errors.pydantic.dev/2.4/v/bool_type'}]","type":"internal_server_error","param":null,"code":null}} 500 {'error': {'message': "[{'type': 'bool_type', 'loc': ('body', 'stream'), 'msg': 'Input should be a valid boolean', 'input': None, 'url': 'https://errors.pydantic.dev/2.4/v/bool_type'}]", 'type': 'internal_server_error', 'param': None, 'code': None}} {'Content-Length': '241', 'Content-Type': 'application/json', 'Date': 'Wed, 18 Oct 2023 05:46:20 GMT', 'Ngrok-Trace-Id': 'bd8b4b68bc02669aeeb4eeda1585223c', 'Server': 'uvicorn', 'X-Request-Id': '381e26ff1852403e8f14f4ef302b17dc'}