spaCy errors encountered while trying out transformer-srl library on Win 10, Python 3.8

Question

spaCy errors encountered while trying out transformer-srl library on Win 10, Python 3.8

pragyakatyayan opened this issue 2 years ago · comments

Dr Pragya Katyayan commented 2 years ago

I successfully installed transformers-srl library using my Jupyter-notebook terminal. However, I encountered the following error while trying to run the following code. I am using a Windows 10 system with Python- 3.8.

code:

from transformer_srl import dataset_readers, models, predictors

predictor = predictors.SrlTransformersPredictor.from_path("D:/srl_bert_base_conll2012.tar.gz", "transformer_srl")
predictor.predict(sentence="Did Uriah honestly think he could beat the game in under three hours?")

error with spacy version- 2.2.4

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-1-27de09498ecb> in <module>
      1 from transformer_srl import dataset_readers, models, predictors
      2 
----> 3 predictor = predictors.SrlTransformersPredictor.from_path("D:/srl_bert_base_conll2012.tar.gz", "transformer_srl")
      4 predictor.predict(sentence="Did Uriah honestly think he could beat the game in under three hours?")

~\anaconda3\lib\site-packages\transformer_srl\predictors.py in from_path(cls, archive_path, predictor_name, cuda_device, dataset_reader_to_load, frozen, import_plugins, language, restrict_frames, restrict_roles)
    153         if import_plugins:
    154             plugins.import_plugins()
--> 155         return SrlTransformersPredictor.from_archive(
    156             load_archive(archive_path, cuda_device=cuda_device),
    157             predictor_name,

~\anaconda3\lib\site-packages\transformer_srl\predictors.py in from_archive(cls, archive, predictor_name, dataset_reader_to_load, frozen, language, restrict_frames, restrict_roles)
    197             model.eval()
    198 
--> 199         return predictor_class(model, dataset_reader, language)

~\anaconda3\lib\site-packages\transformer_srl\predictors.py in __init__(self, model, dataset_reader, language)
     20         self, model: Model, dataset_reader: DatasetReader, language: str = "en_core_web_sm",
     21     ) -> None:
---> 22         super().__init__(model, dataset_reader, language)
     23 
     24     @staticmethod

~\anaconda3\lib\site-packages\allennlp_models\structured_prediction\predictors\srl.py in __init__(self, model, dataset_reader, language)
     22     ) -> None:
     23         super().__init__(model, dataset_reader)
---> 24         self._tokenizer = SpacyTokenizer(language=language, pos_tags=True)
     25 
     26     def predict(self, sentence: str) -> JsonDict:

~\anaconda3\lib\site-packages\allennlp\data\tokenizers\spacy_tokenizer.py in __init__(self, language, pos_tags, parse, ner, keep_spacy_tokens, split_on_spaces, start_tokens, end_tokens)
     61         end_tokens: Optional[List[str]] = None,
     62     ) -> None:
---> 63         self.spacy = get_spacy_model(language, pos_tags, parse, ner)
     64         if split_on_spaces:
     65             self.spacy.tokenizer = _WhitespaceSpacyTokenizer(self.spacy.vocab)

~\anaconda3\lib\site-packages\allennlp\common\util.py in get_spacy_model(spacy_model_name, pos_tags, parse, ner)
    273             disable.append("ner")
    274         try:
--> 275             spacy_model = spacy.load(spacy_model_name, disable=disable)
    276         except OSError:
    277             logger.warning(

~\AppData\Roaming\Python\Python38\site-packages\spacy\__init__.py in load(name, **overrides)
     28     if depr_path not in (True, False, None):
     29         deprecation_warning(Warnings.W001.format(path=depr_path))
---> 30     return util.load_model(name, **overrides)
     31 
     32 

~\AppData\Roaming\Python\Python38\site-packages\spacy\util.py in load_model(name, **overrides)
    162             return load_model_from_link(name, **overrides)
    163         if is_package(name):  # installed as package
--> 164             return load_model_from_package(name, **overrides)
    165         if Path(name).exists():  # path to model data directory
    166             return load_model_from_path(Path(name), **overrides)

~\AppData\Roaming\Python\Python38\site-packages\spacy\util.py in load_model_from_package(name, **overrides)
    183     """Load a model from an installed package."""
    184     cls = importlib.import_module(name)
--> 185     return cls.load(**overrides)
    186 
    187 

~\anaconda3\lib\site-packages\en_core_web_sm\__init__.py in load(**overrides)
      8 
      9 def load(**overrides):
---> 10     return load_model_from_init_py(__file__, **overrides)

~\AppData\Roaming\Python\Python38\site-packages\spacy\util.py in load_model_from_init_py(init_file, **overrides)
    226     if not model_path.exists():
    227         raise IOError(Errors.E052.format(path=path2str(data_path)))
--> 228     return load_model_from_path(data_path, meta, **overrides)
    229 
    230 

~\AppData\Roaming\Python\Python38\site-packages\spacy\util.py in load_model_from_path(model_path, meta, **overrides)
    207             config = meta.get("pipeline_args", {}).get(name, {})
    208             factory = factories.get(name, name)
--> 209             component = nlp.create_pipe(factory, config=config)
    210             nlp.add_pipe(component, name=name)
    211     return nlp.from_disk(model_path, exclude=disable)

~\AppData\Roaming\Python\Python38\site-packages\spacy\language.py in create_pipe(self, name, config)
    298                 raise KeyError(Errors.E108.format(name=name))
    299             else:
--> 300                 raise KeyError(Errors.E002.format(name=name))
    301         factory = self.factories[name]
    302         return factory(self, **config)

KeyError: "[E002] Can't find factory for 'tok2vec'. This usually happens when spaCy calls `nlp.create_pipe` with a component name that's not built in - for example, when constructing the pipeline from a model's meta.json. If you're using a custom component, you can write to `Language.factories['tok2vec']` or remove it from the model meta and add it via `nlp.add_pipe` instead."

I tried to upgrade to spacy-3.1.1, but got the following error:

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-1-27de09498ecb> in <module>
----> 1 from transformer_srl import dataset_readers, models, predictors
      2 
      3 predictor = predictors.SrlTransformersPredictor.from_path("D:/srl_bert_base_conll2012.tar.gz", "transformer_srl")
      4 predictor.predict(sentence="Did Uriah honestly think he could beat the game in under three hours?")

~\anaconda3\lib\site-packages\transformer_srl\dataset_readers.py in <module>
     12 from allennlp.data.tokenizers import Token
     13 from allennlp_models.common.ontonotes import Ontonotes, OntonotesSentence
---> 14 from allennlp_models.structured_prediction import SrlReader
     15 from conllu import parse_incr
     16 from nltk import Tree

~\anaconda3\lib\site-packages\allennlp_models\structured_prediction\__init__.py in <module>
      1 # flake8: noqa: F403
----> 2 from allennlp_models.structured_prediction.predictors import *
      3 from allennlp_models.structured_prediction.dataset_readers import *
      4 from allennlp_models.structured_prediction.metrics import *
      5 from allennlp_models.structured_prediction.models import *

~\anaconda3\lib\site-packages\allennlp_models\structured_prediction\predictors\__init__.py in <module>
      2     BiaffineDependencyParserPredictor,
      3 )
----> 4 from allennlp_models.structured_prediction.predictors.constituency_parser import (
      5     ConstituencyParserPredictor,
      6 )

~\anaconda3\lib\site-packages\allennlp_models\structured_prediction\predictors\constituency_parser.py in <module>
      3 from overrides import overrides
      4 from nltk import Tree
----> 5 from spacy.lang.en.tag_map import TAG_MAP
      6 
      7 from allennlp.common.util import JsonDict, sanitize

ModuleNotFoundError: No module named 'spacy.lang.en.tag_map'

I am not sure, how to resolve the issue. Kindly help.
Any help is deeply appreciated. Thanks in advance.

Riccardo Orlando · Answer 1 · Wed Feb 09 2022 21:20:50 GMT+0800 (China Standard Time)

Hi, requirements says you need spaCy 2.3.x. Can you try to update it from 2.2.4 to 2.3? Let me know if this solves your issue.

Dr Pragya Katyayan · Answer 2 · Thu Feb 10 2022 13:01:01 GMT+0800 (China Standard Time)

Thanks for replying. I downgraded spaCy to version 2.3.0, but I am still getting keyerror-

KeyError: "[E002] Can't find factory for 'tok2vec'. This usually happens when spaCy calls `nlp.create_pipe` with a component name that's not built in - for example, when constructing the pipeline from a model's meta.json. If you're using a custom component, you can write to `Language.factories['tok2vec']` or remove it from the model meta and add it via `nlp.add_pipe` instead."

UPDATE:
However, I downloaded the language model again for 2.3.0 using python -m spacy download en_core_web_sm and it resolved the error. I got the following output:

{'verbs': [{'verb': 'Did',
   'description': '[do.01: Did] Uriah honestly think he could beat the game in under three hours ?',
   'tags': ['B-V',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O'],
   'frame': 'do.01',
   'frame_score': 0.9999996423721313,
   'lemma': 'do'},
  {'verb': 'think',
   'description': 'Did [ARG0: Uriah] [ARGM-ADV: honestly] [think.01: think] [ARG1: he could beat the game in under three hours] ?',
   'tags': ['O',
    'B-ARG0',
    'B-ARGM-ADV',
    'B-V',
    'B-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'O'],
   'frame': 'think.01',
   'frame_score': 1.0,
   'lemma': 'think'},
  {'verb': 'could',
   'description': 'Did Uriah honestly think he [go.04: could] beat the game in under three hours ?',
   'tags': ['O',
    'O',
    'O',
    'O',
    'O',
    'B-V',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O'],
   'frame': 'go.04',
   'frame_score': 0.10186543315649033,
   'lemma': 'could'},
  {'verb': 'beat',
   'description': 'Did Uriah honestly think [ARG0: he] [ARGM-MOD: could] [beat.03: beat] [ARG1: the game] [ARGM-TMP: in under three hours] ?',
   'tags': ['O',
    'O',
    'O',
    'O',
    'B-ARG0',
    'B-ARGM-MOD',
    'B-V',
    'B-ARG1',
    'I-ARG1',
    'B-ARGM-TMP',
    'I-ARGM-TMP',
    'I-ARGM-TMP',
    'I-ARGM-TMP',
    'O'],
   'frame': 'beat.03',
   'frame_score': 0.9999936819076538,
   'lemma': 'beat'}],
 'words': ['Did',
  'Uriah',
  'honestly',
  'think',
  'he',
  'could',
  'beat',
  'the',
  'game',
  'in',
  'under',
  'three',
  'hours',
  '?']}

Riccardo Orlando · Answer 3 · Thu Feb 10 2022 15:06:21 GMT+0800 (China Standard Time)

Glad it worked :)