This repository provides a set of ROS 2 packages to integrate llama.cpp into ROS 2. By using the llama_ros packages, you can easily incorporate the powerful optimization capabilities of llama.cpp into your ROS 2 projects by running GGUF-based LLMs and VLMs.
- chatbot_ros → This chatbot, integrated into ROS 2, uses whisper_ros, to listen to people speech; and llama_ros, to generate responses. The chatbot is controlled by a state machine created with YASMIN.
- explainable_ros → A ROS 2 tool to explain the behavior of a robot. Using the integration of LangChain, logs are stored in a vector database. Then, RAG is applied to retrieve relevant logs for user questions that are answered with llama_ros.
$ cd ~/ros2_ws/src
$ git clone --recurse-submodules https://github.com/mgonzs13/llama_ros.git
$ pip3 install -r llama_ros/requirements.txt
$ cd ~/ros2_ws
$ colcon build
To run llama_ros with CUDA, you have to install the CUDA Toolkit and the following lines in the CMakeLists.txt of llama_ros package must be uncommented:
option(LLAMA_CUDA "llama: use CUDA" ON)
add_compile_definitions(GGML_USE_CUDA)
First of all, you need to create a launch file to use llama_ros or llava_ros. This launch file will contain the main parameters to download the model from HuggingFace and configure it. Take a look at the following examples and the predefined launch files.
Click to expand
from launch import LaunchDescription
from llama_bringup.utils import create_llama_launch
def generate_launch_description():
return LaunchDescription([
create_llama_launch(
n_ctx=2048, # context of the LLM in tokens
n_batch=8, # batch size in tokens
n_gpu_layers=0, # layers to load in GPU
n_threads=1, # threads
n_predict=2048, # max tokens, -1 == inf
model_repo="TheBloke/Marcoroni-7B-v3-GGUF", # Hugging Face repo
model_filename="marcoroni-7b-v3.Q4_K_M.gguf", # model file in repo
prefix="\n\n### Instruction:\n", # prefix to add at the start of the prompt
suffix="\n\n### Response:\n", # suffix to add at the end of the prompt
stopping_words=["### Instruction:\n"], # stopping words
system_prompt_type="alpaca" # system prompt type
)
])
$ ros2 launch llama_bringup marcoroni.launch.py
Click to expand
from launch import LaunchDescription
from llama_bringup.utils import create_llama_launch
def generate_launch_description():
return LaunchDescription([
create_llama_launch(
use_llava=True, # enable llava
embedding=False, # disable embeddings
n_ctx=8192, # context of the LLM in tokens, use a huge context size to load images
n_batch=512, # batch size in tokens
n_gpu_layers=33, # layers to load in GPU
n_threads=1, # threads
n_predict=8192, # max tokens, -1 == inf
model_repo="cjpais/llava-1.6-mistral-7b-gguf", # Hugging Face repo
model_filename="llava-v1.6-mistral-7b.Q4_K_M.gguf", # model file in repo
mmproj_repo="cjpais/llava-1.6-mistral-7b-gguf", # Hugging Face repo
mmproj_filename="mmproj-model-f16.gguf", # mmproj file in repo
prefix="[INST]", # prefix to add at the start of the prompt
suffix="[/INST]", # suffix to add at the start of the prompt
stopping_words=[]"[INST]"], # stopping words
system_prompt_type="mistral" # system prompt type
)
])
$ ros2 launch llama_bringup llava.launch.py
Both llama_ros and llava_ros provide ROS 2 interfaces to access the main functionalities of the models. Here you have some examples of how to use them inside ROS 2 nodes. Moreover, take a look to the llama_client_node.py and llava_client_node.py examples.
Click to expand
from rclpy.node import Node
from llama_msgs.srv import Tokenize
class ExampleNode(Node):
def __init__(self) -> None:
super().__init__("example_node")
# create the client
self.srv_client = self.create_client(Tokenize, "/llama/tokenize")
# create the request
req = Tokenize.Request()
req.prompt = "Example text"
# call the tokenize service
self.srv_client.wait_for_service()
res = self.srv_client.call(req)
tokens = res.tokens
Click to expand
from rclpy.node import Node
from llama_msgs.srv import Embeddings
class ExampleNode(Node):
def __init__(self) -> None:
super().__init__("example_node")
# create the client
self.srv_client = self.create_client(Embeddings, "/llama/generate_embeddings")
# create the request
req = Embeddings.Request()
req.prompt = "Example text"
req.normalize = True
# call the embedding service
self.srv_client.wait_for_service()
res = self.srv_client.call(req)
embeddings = res.embeddings
Click to expand
import rclpy
from rclpy.node import Node
from rclpy.action import ActionClient
from llama_msgs.action import GenerateResponse
class ExampleNode(Node):
def __init__(self) -> None:
super().__init__("example_node")
# create the client
self.action_client = ActionClient(
self, GenerateResponse, "/llama/generate_response")
# create the goal and set the sampling config
goal = GenerateResponse.Goal()
goal.prompt = self.prompt
goal.sampling_config.temp = 0.2
# wait for the server and send the goal
self.action_client.wait_for_server()
send_goal_future = self.action_client.send_goal_async(
goal)
# wait for the server
rclpy.spin_until_future_complete(self, send_goal_future)
get_result_future = send_goal_future.result().get_result_async()
# wait again and take the result
rclpy.spin_until_future_complete(self, get_result_future)
result: GenerateResponse.Result = get_result_future.result().result
Click to expand
import cv2
from cv_bridge import CvBridge
import rclpy
from rclpy.node import Node
from rclpy.action import ActionClient
from llama_msgs.action import GenerateResponse
class ExampleNode(Node):
def __init__(self) -> None:
super().__init__("example_node")
# create a cv bridge for the image
self.cv_bridge = CvBridge()
# create the client
self.action_client = ActionClient(
self, GenerateResponse, "/llama/generate_response")
# create the goal and set the sampling config
goal = GenerateResponse.Goal()
goal.prompt = self.prompt
goal.sampling_config.temp = 0.2
# add your image to the goal
image = cv2.imread("/path/to/your/image", cv2.IMREAD_COLOR)
goal.image = self.cv_bridge.cv2_to_imgmsg(image)
# wait for the server and send the goal
self.action_client.wait_for_server()
send_goal_future = self.action_client.send_goal_async(
goal)
# wait for the server
rclpy.spin_until_future_complete(self, send_goal_future)
get_result_future = send_goal_future.result().get_result_async()
# wait again and take the result
rclpy.spin_until_future_complete(self, get_result_future)
result: GenerateResponse.Result = get_result_future.result().result
There is a llama_ros integration for LangChain. Thus, prompt engineering techniques could be applied. Here you have an example to use it.
Click to expand
import rclpy
from llama_ros.langchain import LlamaROS
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
rclpy.init()
# create the llama_ros llm for langchain
llm = LlamaROS()
# create a prompt template
prompt_template = "tell me a joke about {topic}"
prompt = PromptTemplate(
input_variables=["topic"],
template=prompt_template
)
# create a chain with the llm and the prompt template
chain = prompt | model | StrOutputParser()
# run the chain
text = llm_chain.invoke({"topic": "bears"})
rclpy.shutdown()
Click to expand
import rclpy
from langchain_community.vectorstores import Chroma
from llama_ros.langchain import LlamaROSEmbeddings
rclpy.init()
# create the llama_ros embeddings for lanchain
embeddings = LlamaROSEmbeddings()
# create a vector database and assign it
db = Chroma(embedding_function=embeddings)
# create the retriever
retriever = self.db.as_retriever(search_kwargs={"k": 5})
# add your texts
db.add_texts(texts=["your_texts"])
# retrieve documents
docuemnts = self.retriever.get_relevant_documents("your_query")
rclpy.shutdown()
$ ros2 launch llama_bringup marcoroni.launch.py
$ ros2 run llama_ros llama_demo_node --ros-args -p prompt:="your prompt"
llama_ros_gpu_new_1.mp4
$ ros2 launch llama_bringup llava.launch.py
$ ros2 run llama_ros llava_demo_node --ros-args -p prompt:="your prompt" -p image_url:="url of the image" -p use_image:="whether to send the image"