Integrating Groq LPU Inference Engine API with Verba
bakongi opened this issue · comments
bakongi commented
Hi all!
Here an instruction how to integrate Groq API with Verba.
pip install groq
- Create "GroqGenerator.py" at goldenverba/components/generation folder. Paste this code:
import os
from dotenv import load_dotenv
from groq import AsyncGroq
from collections.abc import Iterator
from goldenverba.components.interfaces import Generator
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
class GroqGenerator(Generator):
"""
Groq Generator.
"""
def __init__(self):
super().__init__()
self.name = "GroqGenerator"
self.description = "Generator using Groq's LPU INFERENCE ENGINE"
self.requires_library = ["groq"]
self.requires_env = ["GROQ_API_KEY"]
self.streamable = True
self.model_name = os.getenv("GROQ_MODEL", "llama3-8b-8192")
self.context_window = 8192
# Initialize Groq client
self.client = AsyncGroq(api_key=GROQ_API_KEY)
# Define model details
self.models = {
"gemma-7b-it": { "tokens": 8192, "developer": "Google"},
"llama3-70b-8192": { "tokens": 8192, "developer": "Meta"},
"llama3-8b-8192": { "tokens": 8192, "developer": "Meta"},
"mixtral-8x7b-32768": { "tokens": 32768, "developer": "Mistral"},
}
# Select a model
self.max_tokens = self.models[self.model_name]["tokens"]
async def generate_stream(self, queries: list[str], context: list[str], conversation: dict = None):
"""
Generate a stream of response dictionaries based on a list of queries, a list of contexts, and includes conversational context. This function uses an asynchronous API to generate responses incrementally, suitable for streaming applications where responses are expected to be delivered in real-time as they are generated.
@parameter queries: list[str] - A list of user queries to which the generator should respond.
@parameter context: list[str] - Contextual information relevant to each query, which helps the model to generate more accurate and relevant responses.
@parameter conversation: dict - A dictionary representing the conversational history and context. This helps in maintaining the flow and relevance of the conversation.
@returns: Iterator[dict] - An iterator that yields dictionaries representing parts of the generated response. Each dictionary includes a 'message' key with the generated text, and a 'finish_reason' key that indicates whether the generation has completed ('stop') or if it is still ongoing ('').
"""
if conversation is None:
conversation = []
messages = self.prepare_messages(queries, context, conversation)
try:
chat_completion = await self.client.chat.completions.create(
model=self.model_name,
messages=messages,
max_tokens=self.max_tokens,
temperature=0.000000001,
stop=None,
stream=True
)
async for chunk in chat_completion:
finish_reason = chunk.choices[0].finish_reason
if chunk.choices[0].delta.content:
yield {
"message": chunk.choices[0].delta.content,
"finish_reason": ""
}
else:
yield {
"message": "",
"finish_reason": finish_reason,
}
except Exception as e:
print(f"An error occurred: {str(e)}")
raise e
def prepare_messages(self, queries: list[str], context: list[str], conversation: list[dict]) -> dict[str, str]:
"""
Prepares a list of messages formatted for a Retrieval Augmented Generation chatbot system, including system instructions, previous conversation, and a new user query with context.
@parameter queries: A list of strings representing the user queries to be answered.
@parameter context: A list of strings representing the context information provided for the queries.
@parameter conversation: A list of previous conversation messages that include the role and content.
@returns A list of message dictionaries formatted for the chatbot. This includes an initial system message, the previous conversation messages, and the new user query encapsulated with the provided context.
"""
messages = [
{
"role": "system",
"content": "You are a Retrieval Augmented Generation chatbot. Please answer user queries only their provided context. If the provided documentation does not provide enough information, say so. If the answer requires code examples encapsulate them with ```programming-language-name ```. Don't do pseudo-code.",
}
]
for message in conversation:
messages.append(message)
query = " ".join(queries)
user_context = " ".join(context)
messages.append(
{
"role": "user",
"content": f"Please answer this query: '{query}' with this provided context: {user_context}",
}
)
return messages
- Modify "manager.py":
from goldenverba.components.generation.ClaudeGenerator import ClaudeGenerator
from goldenverba.components.generation.GroqGenerator import GroqGenerator
class GeneratorManager:
def __init__(self):
self.generators: dict[str, Generator] = {
"ClaudeGenerator": ClaudeGenerator(),
"GPT4Generator": GPT4Generator(),
"GPT3Generator": GPT3Generator(),
"CohereGenerator": CohereGenerator(),
"Llama2Generator": Llama2Generator(),
"GroqGenerator": GroqGenerator(),
}
we added from goldenverba.components.generation.GroqGenerator import GroqGenerator
and "GroqGenerator": GroqGenerator(),
- Modify "verba_manager.py" add:
# Check Groq ENV KEY
try:
import groq
groq_key = os.environ.get("GROQ_API_KEY", "")
if groq_key != "":
self.environment_variables["GROQ_API_KEY"] = True
self.client = groq.Groq(api_key=groq_key)
else:
self.environment_variables["GROQ_API_KEY"] = False
raise ValueError("GROQ_API_KEY environment variable is not set.")
except Exception as e:
self.environment_variables["GROQ_API_KEY"] = False
print(f"Error initializing Groq API: {e}")
somewhere at 200+th line
and
try:
import groq
self.installed_libraries["groq"] = True
except Exception:
self.installed_libraries["groq"] = False
somewhere at 300+th line
- Add this variables to your environment or .env file:
# GROQ
GROQ_API_KEY=<you_groq_api_key>
GROQ_MODEL=llama3-70b-8192
GROQ_MODEL can be:
gemma-7b-it
llama2-70b-4096
llama3-70b-8192
llama3-8b-8192
mixtral-8x7b-32768
Edward commented
Thanks! Feel free to create a PR