weaviate / Verba

Retrieval Augmented Generation (RAG) chatbot powered by Weaviate

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Integrating Groq LPU Inference Engine API with Verba

bakongi opened this issue · comments

Hi all!

Here an instruction how to integrate Groq API with Verba.

Obtain API from

  1. pip install groq
  2. Create "GroqGenerator.py" at goldenverba/components/generation folder. Paste this code:
import os
from dotenv import load_dotenv
from groq import AsyncGroq

from collections.abc import Iterator
from goldenverba.components.interfaces import Generator

load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

class GroqGenerator(Generator):
    """
    Groq Generator.
    """

    def __init__(self):
        super().__init__()
        self.name = "GroqGenerator"
        self.description = "Generator using Groq's LPU INFERENCE ENGINE"
        self.requires_library = ["groq"]
        self.requires_env = ["GROQ_API_KEY"]
        self.streamable = True
        self.model_name = os.getenv("GROQ_MODEL", "llama3-8b-8192")
        self.context_window = 8192

        # Initialize Groq client
        self.client = AsyncGroq(api_key=GROQ_API_KEY)

        # Define model details
        self.models = {
            "gemma-7b-it": { "tokens": 8192, "developer": "Google"},
            "llama3-70b-8192": { "tokens": 8192, "developer": "Meta"},
            "llama3-8b-8192": { "tokens": 8192, "developer": "Meta"},
            "mixtral-8x7b-32768": { "tokens": 32768, "developer": "Mistral"},
        }

        # Select a model
        self.max_tokens = self.models[self.model_name]["tokens"]


    async def generate_stream(self, queries: list[str], context: list[str], conversation: dict = None):
        """
        Generate a stream of response dictionaries based on a list of queries, a list of contexts, and includes conversational context. This function uses an asynchronous API to generate responses incrementally, suitable for streaming applications where responses are expected to be delivered in real-time as they are generated.

        @parameter queries: list[str] - A list of user queries to which the generator should respond.
        @parameter context: list[str] - Contextual information relevant to each query, which helps the model to generate more accurate and relevant responses.
        @parameter conversation: dict - A dictionary representing the conversational history and context. This helps in maintaining the flow and relevance of the conversation.
        
        @returns: Iterator[dict] - An iterator that yields dictionaries representing parts of the generated response. Each dictionary includes a 'message' key with the generated text, and a 'finish_reason' key that indicates whether the generation has completed ('stop') or if it is still ongoing ('').
        """
        if conversation is None:
            conversation = []
        messages = self.prepare_messages(queries, context, conversation)

        try:
            chat_completion = await self.client.chat.completions.create(
                model=self.model_name,
                messages=messages,
                max_tokens=self.max_tokens,
                temperature=0.000000001,
                stop=None,
                stream=True
            )

            async for chunk in chat_completion:
                finish_reason = chunk.choices[0].finish_reason
                if chunk.choices[0].delta.content:
                    yield {
                        "message": chunk.choices[0].delta.content,
                        "finish_reason": ""
                    }
                else:
                    yield {
                        "message": "",
                        "finish_reason": finish_reason,
                    }

        except Exception as e:
            print(f"An error occurred: {str(e)}")
            raise e

    def prepare_messages(self, queries: list[str], context: list[str], conversation: list[dict]) -> dict[str, str]:
        """
        Prepares a list of messages formatted for a Retrieval Augmented Generation chatbot system, including system instructions, previous conversation, and a new user query with context.

        @parameter queries: A list of strings representing the user queries to be answered.
        @parameter context: A list of strings representing the context information provided for the queries.
        @parameter conversation: A list of previous conversation messages that include the role and content.

        @returns A list of message dictionaries formatted for the chatbot. This includes an initial system message, the previous conversation messages, and the new user query encapsulated with the provided context.
        """
        messages = [
            {
                "role": "system",
                "content": "You are a Retrieval Augmented Generation chatbot. Please answer user queries only their provided context. If the provided documentation does not provide enough information, say so. If the answer requires code examples encapsulate them with ```programming-language-name ```. Don't do pseudo-code.",
            }
        ]

        for message in conversation:
            messages.append(message)

        query = " ".join(queries)
        user_context = " ".join(context)

        messages.append(
            {
                "role": "user",
                "content": f"Please answer this query: '{query}' with this provided context: {user_context}",
            }
        )

        return messages
  1. Modify "manager.py":
from goldenverba.components.generation.ClaudeGenerator import ClaudeGenerator
from goldenverba.components.generation.GroqGenerator import GroqGenerator

class GeneratorManager:
    def __init__(self):
        self.generators: dict[str, Generator] = {
            "ClaudeGenerator": ClaudeGenerator(),
            "GPT4Generator": GPT4Generator(),
            "GPT3Generator": GPT3Generator(),
            "CohereGenerator": CohereGenerator(),
            "Llama2Generator": Llama2Generator(),
            "GroqGenerator": GroqGenerator(),     
        }

we added from goldenverba.components.generation.GroqGenerator import GroqGenerator and "GroqGenerator": GroqGenerator(),

  1. Modify "verba_manager.py" add:
        # Check Groq ENV KEY
        try:
            import groq

            groq_key = os.environ.get("GROQ_API_KEY", "")

            if groq_key != "":
                self.environment_variables["GROQ_API_KEY"] = True
                self.client = groq.Groq(api_key=groq_key)
            else:
                self.environment_variables["GROQ_API_KEY"] = False
                raise ValueError("GROQ_API_KEY environment variable is not set.")

        except Exception as e:
            self.environment_variables["GROQ_API_KEY"] = False
            print(f"Error initializing Groq API: {e}")

somewhere at 200+th line

and

        try:
            import groq

            self.installed_libraries["groq"] = True
        except Exception:
            self.installed_libraries["groq"] = False

somewhere at 300+th line

  1. Add this variables to your environment or .env file:
# GROQ
GROQ_API_KEY=<you_groq_api_key>

GROQ_MODEL=llama3-70b-8192 

GROQ_MODEL can be:

gemma-7b-it
llama2-70b-4096
llama3-70b-8192
llama3-8b-8192
mixtral-8x7b-32768

Thanks! Feel free to create a PR