Automatically add all models
simonw opened this issue · comments
The model list is hard-coded at the moment:
llm-anyscale-endpoints/llm_anyscale_endpoints.py
Lines 4 to 13 in 083fd3a
Looks like there's an undocumented API endpoint at /v1/models
that could be used to get the models dynamically instead.
To try that API:
export ANYSCALE=$(cat "$(llm keys path)" | jq '."anyscale-endpoints"' -r)
Then:
curl 'https://api.endpoints.anyscale.com/v1/models' -H "Authorization: Bearer $ANYSCALE" | jq
Output:
{
"data": [
{
"id": "meta-llama/Llama-2-7b-chat-hf",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 4096,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "<<SYS>>\n{instruction}\n<</SYS>>\n\n",
"assistant": " {instruction} </s><s>",
"trailing_assistant": "",
"user": "[INST] {system}{instruction} [/INST]",
"default_system_message": "",
"system_in_user": true,
"add_system_tags_even_if_message_is_empty": false,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": []
}
}
}
},
{
"id": "meta-llama/Llama-2-13b-chat-hf",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 4096,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "<<SYS>>\n{instruction}\n<</SYS>>\n\n",
"assistant": " {instruction} </s><s>",
"trailing_assistant": "",
"user": "[INST] {system}{instruction} [/INST]",
"default_system_message": "",
"system_in_user": true,
"add_system_tags_even_if_message_is_empty": false,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": []
}
}
}
},
{
"id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 32768,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "{instruction} + ",
"assistant": "{instruction}</s> ",
"trailing_assistant": "",
"user": "[INST] {system}{instruction} [/INST]",
"default_system_message": "Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.",
"system_in_user": true,
"add_system_tags_even_if_message_is_empty": false,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": []
}
}
}
},
{
"id": "mistralai/Mistral-7B-Instruct-v0.1",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 16384,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "{instruction} + ",
"assistant": "{instruction}</s> ",
"trailing_assistant": "",
"user": "[INST] {system}{instruction} [/INST]",
"default_system_message": "Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.",
"system_in_user": true,
"add_system_tags_even_if_message_is_empty": false,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": []
}
}
}
},
{
"id": "meta-llama/Llama-2-70b-chat-hf",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 4096,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "<<SYS>>\n{instruction}\n<</SYS>>\n\n",
"assistant": " {instruction} </s><s>",
"trailing_assistant": "",
"user": "[INST] {system}{instruction} [/INST]",
"default_system_message": "",
"system_in_user": true,
"add_system_tags_even_if_message_is_empty": false,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": []
}
}
}
},
{
"id": "thenlper/gte-large",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "embedding",
"max_total_tokens": 512,
"model_url": null,
"model_description": null
}
}
},
{
"id": "BAAI/bge-large-en-v1.5",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "embedding",
"max_total_tokens": 512,
"model_url": null,
"model_description": null
}
}
},
{
"id": "codellama/CodeLlama-70b-Instruct-hf",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 4096,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "Source: system\n\n {instruction} <step> ",
"assistant": "Source: assistant\n\n {instruction} <step> ",
"trailing_assistant": "Source: assistant\nDestination: user\n\n ",
"user": "Source: user\n\n {instruction} <step> ",
"default_system_message": "",
"system_in_user": false,
"add_system_tags_even_if_message_is_empty": true,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": [
"<step>"
]
}
}
}
},
{
"id": "mistralai/Mixtral-8x22B-Instruct-v0.1",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 65536,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "{instruction}\n\n ",
"assistant": "{instruction}</s> ",
"trailing_assistant": "",
"user": "[INST] {system}{instruction} [/INST]",
"default_system_message": "Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.",
"system_in_user": true,
"add_system_tags_even_if_message_is_empty": false,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": []
}
}
}
},
{
"id": "mlabonne/NeuralHermes-2.5-Mistral-7B",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 16384,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "<|im_start|>system\n{instruction}<|im_end|>\n",
"assistant": "<|im_start|>assistant\n{instruction}<|im_end|>\n",
"trailing_assistant": "<|im_start|>assistant\n",
"user": "<|im_start|>user\n{instruction}<|im_end|>\n",
"default_system_message": "Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.",
"system_in_user": false,
"add_system_tags_even_if_message_is_empty": false,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": []
}
}
}
},
{
"id": "google/gemma-7b-it",
"object": "model",
"owned_by": "organization-owner",
"permission": [],
"rayllm_metadata": {
"engine_config": {
"model_type": "text-generation",
"max_total_tokens": 8192,
"input_modality": "text",
"model_url": null,
"model_description": null,
"generation": {
"prompt_format": {
"system": "{instruction}\n",
"assistant": "<start_of_turn>model\n{instruction}<end_of_turn>\n",
"trailing_assistant": "<start_of_turn>model\n",
"user": "<start_of_turn>user\n{system}{instruction}<end_of_turn>\n",
"default_system_message": "",
"system_in_user": true,
"add_system_tags_even_if_message_is_empty": false,
"strip_whitespace": true
},
"generate_kwargs": {},
"stopping_sequences": []
}
}
}
}
],
"object": "list"
}
Some of those are embedding models though. Filtering for the ones with "model_type": "text-generation"
is probably the right thing to do, though the lack of documentation makes me nervous.
How should I cache this information? I don't want LLM making at HTTP call to that API endpoint every time anything within LLM decides to get a list of models.
I'm going to cache this permanently the first time it's accessed. If you want to refresh the cached list of models I'll add a custom command for that, maybe:
llm anyscale-endpoints refresh
Actually I'll bake the original list of models into the tool. Running refresh
will attempt to update it, but might fail (if the undocumented API has changed) at which point it will show an error but the baked-in models will continue to work.