hankcs / HanLP

中文分词 词性标注 命名实体识别 依存句法分析 成分句法分析 语义依存分析 语义角色标注 指代消解 风格转换 语义相似度 新词发现 关键词短语提取 自动摘要 文本分类聚类 拼音简繁转换 自然语言处理

Home Page:https://hanlp.hankcs.com/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

load函数出现错误日志

Buaasinong opened this issue · comments

Describe the bug
加载不了下载到本地路径的预训练模型,不知道是否是版本问题?

Code to reproduce the issue

import hanlp

tok = hanlp.load(hanlp.pretrained.tok.COARSE_ELECTRA_SMALL_ZH)

Describe the current behavior
A clear and concise description of what happened.

System information
OS: Linux-4.4.0-210-generic-x86_64-with-debian-buster-sid
Python: 3.7.13
PyTorch: 1.13.0+cu117
HanLP: 2.1.0-beta.44

BadZipFile Traceback (most recent call last)
/tmp/ipykernel_25466/3441843643.py in
1 import hanlp
2
----> 3 tok = hanlp.load(hanlp.pretrained.tok.COARSE_ELECTRA_SMALL_ZH)
4 tok(['商品和服务。', '晓美焰来到北京立方庭参观自然语义科技公司'])

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/init.py in load(save_dir, verbose, **kwargs)
41 from hanlp_common.constant import HANLP_VERBOSE
42 verbose = HANLP_VERBOSE
---> 43 return load_from_meta_file(save_dir, 'meta.json', verbose=verbose, **kwargs)
44
45

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/component_util.py in load_from_meta_file(save_dir, meta_filename, transform_only, verbose, **kwargs)
180 except:
181 pass
--> 182 raise e from None
183
184

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/component_util.py in load_from_meta_file(save_dir, meta_filename, transform_only, verbose, **kwargs)
104 else:
105 if os.path.isfile(os.path.join(save_dir, 'config.json')):
--> 106 obj.load(save_dir, verbose=verbose, **kwargs)
107 else:
108 obj.load(metapath, **kwargs)

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/common/torch_component.py in load(self, save_dir, devices, verbose, **kwargs)
171 if devices is None and self.model:
172 devices = self.devices
--> 173 self.load_config(save_dir, **kwargs)
174 self.load_vocabs(save_dir)
175 if verbose:

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/common/torch_component.py in load_config(self, save_dir, filename, **kwargs)
123 for k, v in self.config.items():
124 if isinstance(v, dict) and 'classpath' in v:
--> 125 self.config[k] = Configurable.from_config(v)
126 self.on_config_ready(**self.config, save_dir=save_dir)
127

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp_common/configurable.py in from_config(config, **kwargs)
30 return cls(**deserialized_config)
31 else:
---> 32 return cls.from_config(deserialized_config)
33
34

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/common/transform.py in from_config(cls, config)
256 config = dict(config)
257 config.pop('classpath')
--> 258 return cls(**config)
259
260

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/common/transform.py in init(self, mapper, src, dst)
478 self.mapper = mapper
479 if isinstance(mapper, str):
--> 480 mapper = get_resource(mapper)
481 if isinstance(mapper, str):
482 self._table = load_json(mapper)

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/io_util.py in get_resource(path, save_dir, extract, prefix, append_location, verbose)
339 path = realpath
340 if extract and compressed:
--> 341 path = uncompress(path, verbose=verbose)
342 if anchor:
343 path = path_join(path, anchor)

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/io_util.py in uncompress(path, dest, remove, verbose)
258 elif os.path.isdir(prefix):
259 shutil.rmtree(prefix)
--> 260 raise e
261 if remove:
262 remove_file(path)

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/io_util.py in uncompress(path, dest, remove, verbose)
223 else:
224 try:
--> 225 with zipfile.ZipFile(path, "r") if ext == '.zip' else tarfile.open(path, 'r:*') as archive:
226 if not dest:
227 namelist = sorted(archive.namelist() if file_is_zip else archive.getnames())

~/anaconda3/envs/topic/lib/python3.7/zipfile.py in init(self, file, mode, compression, allowZip64, compresslevel)
1256 try:
1257 if mode == 'r':
-> 1258 self._RealGetContents()
1259 elif mode in ('w', 'x'):
1260 # set the modified flag so central directory gets written

~/anaconda3/envs/topic/lib/python3.7/zipfile.py in _RealGetContents(self)
1323 raise BadZipFile("File is not a zip file")
1324 if not endrec:
-> 1325 raise BadZipFile("File is not a zip file")
1326 if self.debug > 1:
1327 print(endrec)

BadZipFile: File is not a zip file

  • I've completed this form and searched the web for solutions.

Describe the bug 加载不了下载到本地路径的预训练模型,不知道是否是版本问题?

Code to reproduce the issue

import hanlp

tok = hanlp.load(hanlp.pretrained.tok.COARSE_ELECTRA_SMALL_ZH)

Describe the current behavior A clear and concise description of what happened.

System information

OS: Linux-4.4.0-210-generic-x86_64-with-debian-buster-sid
Python: 3.7.13
PyTorch: 1.13.0+cu117
HanLP: 2.1.0-beta.44
BadZipFile Traceback (most recent call last) /tmp/ipykernel_25466/3441843643.py in 1 import hanlp 2 ----> 3 tok = hanlp.load(hanlp.pretrained.tok.COARSE_ELECTRA_SMALL_ZH) 4 tok(['商品和服务。', '晓美焰来到北京立方庭参观自然语义科技公司'])

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/init.py in load(save_dir, verbose, **kwargs) 41 from hanlp_common.constant import HANLP_VERBOSE 42 verbose = HANLP_VERBOSE ---> 43 return load_from_meta_file(save_dir, 'meta.json', verbose=verbose, **kwargs) 44 45

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/component_util.py in load_from_meta_file(save_dir, meta_filename, transform_only, verbose, **kwargs) 180 except: 181 pass --> 182 raise e from None 183 184

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/component_util.py in load_from_meta_file(save_dir, meta_filename, transform_only, verbose, **kwargs) 104 else: 105 if os.path.isfile(os.path.join(save_dir, 'config.json')): --> 106 obj.load(save_dir, verbose=verbose, **kwargs) 107 else: 108 obj.load(metapath, **kwargs)

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/common/torch_component.py in load(self, save_dir, devices, verbose, **kwargs) 171 if devices is None and self.model: 172 devices = self.devices --> 173 self.load_config(save_dir, **kwargs) 174 self.load_vocabs(save_dir) 175 if verbose:

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/common/torch_component.py in load_config(self, save_dir, filename, **kwargs) 123 for k, v in self.config.items(): 124 if isinstance(v, dict) and 'classpath' in v: --> 125 self.config[k] = Configurable.from_config(v) 126 self.on_config_ready(**self.config, save_dir=save_dir) 127

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp_common/configurable.py in from_config(config, **kwargs) 30 return cls(**deserialized_config) 31 else: ---> 32 return cls.from_config(deserialized_config) 33 34

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/common/transform.py in from_config(cls, config) 256 config = dict(config) 257 config.pop('classpath') --> 258 return cls(**config) 259 260

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/common/transform.py in init(self, mapper, src, dst) 478 self.mapper = mapper 479 if isinstance(mapper, str): --> 480 mapper = get_resource(mapper) 481 if isinstance(mapper, str): 482 self._table = load_json(mapper)

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/io_util.py in get_resource(path, save_dir, extract, prefix, append_location, verbose) 339 path = realpath 340 if extract and compressed: --> 341 path = uncompress(path, verbose=verbose) 342 if anchor: 343 path = path_join(path, anchor)

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/io_util.py in uncompress(path, dest, remove, verbose) 258 elif os.path.isdir(prefix): 259 shutil.rmtree(prefix) --> 260 raise e 261 if remove: 262 remove_file(path)

~/anaconda3/envs/topic/lib/python3.7/site-packages/hanlp/utils/io_util.py in uncompress(path, dest, remove, verbose) 223 else: 224 try: --> 225 with zipfile.ZipFile(path, "r") if ext == '.zip' else tarfile.open(path, 'r:*') as archive: 226 if not dest: 227 namelist = sorted(archive.namelist() if file_is_zip else archive.getnames())

~/anaconda3/envs/topic/lib/python3.7/zipfile.py in init(self, file, mode, compression, allowZip64, compresslevel) 1256 try: 1257 if mode == 'r': -> 1258 self._RealGetContents() 1259 elif mode in ('w', 'x'): 1260 # set the modified flag so central directory gets written

~/anaconda3/envs/topic/lib/python3.7/zipfile.py in _RealGetContents(self) 1323 raise BadZipFile("File is not a zip file") 1324 if not endrec: -> 1325 raise BadZipFile("File is not a zip file") 1326 if self.debug > 1: 1327 print(endrec)

BadZipFile: File is not a zip file

  • I've completed this form and searched the web for solutions.

我发现可能是服务器的网络问题,需要手动根据代码提示创建文件夹并下载对应文件,并解压到相关文件夹,该问题已经解决

其实只要wget下载就行了,不需要解压。更多方案参考:https://hanlp.hankcs.com/docs/install.html#download-error