MaartenGr / KeyBERT

Minimal keyword extraction with BERT

Home Page:https://MaartenGr.github.io/KeyBERT/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Langchain produces error based on instructions in sourcecode

maximoguerrero opened this issue · comments

Name: langchain
Version: 0.1.1

Following directions listed in the source code. Following Yields and error.

`
from keybert.llm import LangChain
from keybert import KeyLLM

Create your LLM

llm = LangChain(chain)

Load it in KeyLLM

kw_model = KeyLLM(llm)

Extract keywords

document = "The website mentions that it only takes a couple of days to deliver but I still have not received mine."
keywords = kw_model.extract_keywords(document)
`

Error:
`

AttributeError Traceback (most recent call last)
Cell In[35], line 12
10 # Extract keywords
11 document = "The website mentions that it only takes a couple of days to deliver but I still have not received mine."
---> 12 keywords = kw_model.extract_keywords(document)

File ~/miniconda3/lib/python3.11/site-packages/keybert/_llm.py:126, in KeyLLM.extract_keywords(self, docs, check_vocab, candidate_keywords, threshold, embeddings)
123 keywords = [in_cluster_keywords[index] for index in range(len(docs))]
124 else:
125 # Extract keywords using a Large Language Model (LLM)
--> 126 keywords = self.llm.extract_keywords(docs, candidate_keywords)
128 # Only extract keywords that appear in the input document
129 if check_vocab:

File ~/miniconda3/lib/python3.11/site-packages/keybert/llm/_langchain.py:100, in LangChain.extract_keywords(self, documents, candidate_keywords)
98 prompt = prompt.replace("[CANDIDATES]", ", ".join(candidates))
99 input_document = Document(page_content=document)
--> 100 keywords = self.chain.run(input_documents=input_document, question=self.prompt).strip()
101 keywords = [keyword.strip() for keyword in keywords.split(",")]
102 all_keywords.append(keywords)

File ~/miniconda3/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:145, in deprecated..deprecate..warning_emitting_wrapper(*args, **kwargs)
143 warned = True
144 emit_warning()
--> 145 return wrapped(*args, **kwargs)

File ~/miniconda3/lib/python3.11/site-packages/langchain/chains/base.py:543, in Chain.run(self, callbacks, tags, metadata, *args, **kwargs)
538 return self(args[0], callbacks=callbacks, tags=tags, metadata=metadata)[
539 _output_key
540 ]
542 if kwargs and not args:
--> 543 return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[
544 _output_key
545 ]
547 if not kwargs and not args:
548 raise ValueError(
549 "run supported with either positional arguments or keyword arguments,"
550 " but none were provided."
551 )

File ~/miniconda3/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:145, in deprecated..deprecate..warning_emitting_wrapper(*args, **kwargs)
143 warned = True
144 emit_warning()
--> 145 return wrapped(*args, **kwargs)

File ~/miniconda3/lib/python3.11/site-packages/langchain/chains/base.py:363, in Chain.call(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)
331 """Execute the chain.
332
333 Args:
(...)
354 Chain.output_keys.
355 """
356 config = {
357 "callbacks": callbacks,
358 "tags": tags,
359 "metadata": metadata,
360 "run_name": run_name,
361 }
--> 363 return self.invoke(
364 inputs,
365 cast(RunnableConfig, {k: v for k, v in config.items() if v is not None}),
366 return_only_outputs=return_only_outputs,
367 include_run_info=include_run_info,
368 )

File ~/miniconda3/lib/python3.11/site-packages/langchain/chains/base.py:162, in Chain.invoke(self, input, config, **kwargs)
160 except BaseException as e:
161 run_manager.on_chain_error(e)
--> 162 raise e
163 run_manager.on_chain_end(outputs)
164 final_outputs: Dict[str, Any] = self.prep_outputs(
165 inputs, outputs, return_only_outputs
166 )

File ~/miniconda3/lib/python3.11/site-packages/langchain/chains/base.py:156, in Chain.invoke(self, input, config, **kwargs)
149 run_manager = callback_manager.on_chain_start(
150 dumpd(self),
151 inputs,
152 name=run_name,
153 )
154 try:
155 outputs = (
--> 156 self._call(inputs, run_manager=run_manager)
157 if new_arg_supported
158 else self._call(inputs)
159 )
160 except BaseException as e:
161 run_manager.on_chain_error(e)

File ~/miniconda3/lib/python3.11/site-packages/langchain/chains/combine_documents/base.py:136, in BaseCombineDocumentsChain._call(self, inputs, run_manager)
134 # Other keys are assumed to be needed for LLM prediction
135 other_keys = {k: v for k, v in inputs.items() if k != self.input_key}
--> 136 output, extra_return_dict = self.combine_docs(
137 docs, callbacks=_run_manager.get_child(), **other_keys
138 )
139 extra_return_dict[self.output_key] = output
140 return extra_return_dict

File ~/miniconda3/lib/python3.11/site-packages/langchain/chains/combine_documents/stuff.py:242, in StuffDocumentsChain.combine_docs(self, docs, callbacks, **kwargs)
228 def combine_docs(
229 self, docs: List[Document], callbacks: Callbacks = None, **kwargs: Any
230 ) -> Tuple[str, dict]:
231 """Stuff all documents into one prompt and pass to LLM.
232
233 Args:
(...)
240 element returned is a dictionary of other keys to return.
241 """
--> 242 inputs = self._get_inputs(docs, **kwargs)
243 # Call predict on the LLM.
244 return self.llm_chain.predict(callbacks=callbacks, **inputs), {}

File ~/miniconda3/lib/python3.11/site-packages/langchain/chains/combine_documents/stuff.py:198, in StuffDocumentsChain._get_inputs(self, docs, **kwargs)
183 """Construct inputs from kwargs and docs.
184
185 Format and then join all the documents together into one input with name
(...)
195 dictionary of inputs to LLMChain
196 """
197 # Format each document according to the prompt
--> 198 doc_strings = [format_document(doc, self.document_prompt) for doc in docs]
199 # Join the documents together to put them in the prompt.
200 inputs = {
201 k: v
202 for k, v in kwargs.items()
203 if k in self.llm_chain.prompt.input_variables
204 }

File ~/miniconda3/lib/python3.11/site-packages/langchain/chains/combine_documents/stuff.py:198, in (.0)
183 """Construct inputs from kwargs and docs.
184
185 Format and then join all the documents together into one input with name
(...)
195 dictionary of inputs to LLMChain
196 """
197 # Format each document according to the prompt
--> 198 doc_strings = [format_document(doc, self.document_prompt) for doc in docs]
199 # Join the documents together to put them in the prompt.
200 inputs = {
201 k: v
202 for k, v in kwargs.items()
203 if k in self.llm_chain.prompt.input_variables
204 }

File ~/miniconda3/lib/python3.11/site-packages/langchain_core/prompts/base.py:247, in format_document(doc, prompt)
213 def format_document(doc: Document, prompt: BasePromptTemplate) -> str:
214 """Format a document into a string based on a prompt template.
215
216 First, this pulls information from the document from two sources:
(...)
245 >>> "Page 1: This is a joke"
246 """
--> 247 base_info = {"page_content": doc.page_content, **doc.metadata}
248 missing_metadata = set(prompt.input_variables).difference(base_info)
249 if len(missing_metadata) > 0:

AttributeError: 'tuple' object has no attribute 'page_content'

`

@maximoguerrero Have you tried using the latest commit in the main branch? I believe that should fix your issue. See #199