The ChatWithGithub app reloads the repo into vector db each time new question is asked
Vadiml1024 opened this issue · comments
The repo i' m working with is pretty big... it takes 15-20. minutes to load it into chromadb.
Not very usable.
Looking at sreamlit docs i understand now that it reruns the module from the beginning when user enters data on the screen. Si I tried to avoid rerund of database loading:
import tempfile
from embedchain import App
from embedchain.loaders.github import GithubLoader
import streamlit as st
@st.cache_resource
def get_loader():
loader = GithubLoader(
config={
"token":"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",
}
)
return loader
loader = get_loader()
# Define the embedchain_bot function
st.cache_resource
def embedchain_bot(db_path):
return App.from_config(
config={
"llm": {"provider": "ollama", "config": {"model": "llama3:instruct", "max_tokens": 250, "temperature": 0.5, "stream": True, "base_url": 'http://localhost:11434'}},
"vectordb": {"provider": "chroma", "config": {"dir": db_path}},
"embedder": {"provider": "ollama", "config": {"model": "llama3:instruct", "base_url": 'http://localhost:11434'}},
}
)
st.cache_resource
def load_repo(git_repo):
# Add the repo to the knowledge base
print(f"Adding {git_repo} to knowledge base!")
app.add("repo:" + git_repo + " " + "type:repo", data_type="github", loader=loader)
# st.success(f"Added {git_repo} to knowledge base!")
return app
@st.cache_data
def make_db_path():
ret = tempfile.mkdtemp(suffix="chroma")
print(f"Created Chroma DB at {ret}")
return ret
# Create Streamlit app
st.title("Chat with GitHub Repository 💬")
st.caption("This app allows you to chat with a GitHub Repo using Llama-3 running with Ollama")
# Initialize the Embedchain App
db_path = make_db_path()
app = embedchain_bot(db_path)
# Get the GitHub repo from the user
git_repo = st.text_input("Enter the GitHub Repo", type="default")
if git_repo:
app = load_repo(git_repo) # Add the repo to the knowledge base
# Ask a question about the Github Repo
prompt = st.text_input("Ask any question about the GitHub Repo")
# Chat with the GitHub Repo
if prompt:
answer = app.chat(prompt)
st.write(answer)
Unfortunately, it does not help much.
Maybe you have an idea.
Solved in this PR: #13
Thanks for the contribution @Vadiml1024. I will review it soon and merge.
Merged the PR.