Make system content as variable
lucafirefox opened this issue · comments
I am using keybert for a topic extraction task and I have the need of making the system content as a variable so I can change accordingly based on my task. Here below I represent what I mean.
Current implementation:
class OpenAI(BaseLLM):
[...]
def __init__(self,
client,
model: str = "gpt-3.5-turbo-instruct",
prompt: str = None,
generator_kwargs: Mapping[str, Any] = {},
delay_in_seconds: float = None,
exponential_backoff: bool = False,
chat: bool = False,
verbose: bool = False
):
self.client = client
self.model = model
if prompt is None:
self.prompt = DEFAULT_CHAT_PROMPT if chat else DEFAULT_PROMPT
else:
self.prompt = prompt
self.default_prompt_ = DEFAULT_CHAT_PROMPT if chat else DEFAULT_PROMPT
self.delay_in_seconds = delay_in_seconds
self.exponential_backoff = exponential_backoff
self.chat = chat
self.verbose = verbose
self.generator_kwargs = generator_kwargs
if self.generator_kwargs.get("model"):
self.model = generator_kwargs.get("model")
if self.generator_kwargs.get("prompt"):
del self.generator_kwargs["prompt"]
if not self.generator_kwargs.get("stop") and not chat:
self.generator_kwargs["stop"] = "\n"
def extract_keywords(self, documents: List[str], candidate_keywords: List[List[str]] = None):
[..]
# Use a chat model
if self.chat:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
[..]
Proposed implementation:
class OpenAI(BaseLLM):
[...]
def __init__(self,
client,
model: str = "gpt-3.5-turbo-instruct",
prompt: str = None,
system_content: str = "You are a helpful assistant.", </strong>
generator_kwargs: Mapping[str, Any] = {},
delay_in_seconds: float = None,
exponential_backoff: bool = False,
chat: bool = False,
verbose: bool = False
):
self.client = client
self.model = model
if prompt is None:
self.prompt = DEFAULT_CHAT_PROMPT if chat else DEFAULT_PROMPT
else:
self.prompt = prompt
self.system_content = system_content
self.default_prompt_ = DEFAULT_CHAT_PROMPT if chat else DEFAULT_PROMPT
self.delay_in_seconds = delay_in_seconds
self.exponential_backoff = exponential_backoff
self.chat = chat
self.verbose = verbose
self.generator_kwargs = generator_kwargs
if self.generator_kwargs.get("model"):
self.model = generator_kwargs.get("model")
if self.generator_kwargs.get("prompt"):
del self.generator_kwargs["prompt"]
if not self.generator_kwargs.get("stop") and not chat:
self.generator_kwargs["stop"] = "\n"
def extract_keywords(self, documents: List[str], candidate_keywords: List[List[str]] = None):
[..]
# Use a chat model
if self.chat:
messages = [
{"role": "system", "content": self.system_content},
{"role": "user", "content": prompt}
]
[..]
It implies replacing the hard coded string "You are a helpful assistant." with a variable coming from init which default value is actually the existing content. Retrocompatibility is guaranteed.
This logic can be applied both on keybert/llm/_openai.py
and keybert/llm/_litellm.py
files.
That seems more than reasonable. Makes you wonder why I didn't do that when I created this... 😅 That said, if you want to work on this, that would be great! If not, I'll put it on my (very long) to-do list.
No problem, PR done. Thanks for the support.