chapter 6 - setup_chapter() function failing in Kaggle
martomor opened this issue · comments
Information
The problem arises in chapter:
- Introduction
- Text Classification
- Transformer Anatomy
- Multilingual Named Entity Recognition
- Text Generation
- Summarization
- Question Answering
- Making Transformers Efficient in Production
- Dealing with Few to No Labels
- Training Transformers from Scratch
- Future Directions
Describe the bug
setup_chapter() generating this error in kaggle: ModuleNotFoundError: No module named 'botocore.compress'
To Reproduce
Steps to reproduce the behavior:
1.Setup Kaggle accelerator: Tried with GPU T4x2 and GPU P100
2.Follow installation steps from the book:
!git clone https://github.com/nlp-with-transformers/notebooks.git
%cd notebooks
from install import *
install_requirements(is_chapter6=True)
- Import utils and run the setup_chapter function:
from utils import *
setup_chapter()
Result:
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[2], line 1
----> 1 from utils import *
2 setup_chapter()
File /kaggle/working/notebooks/utils.py:5
2 import sys
3 from textwrap import TextWrapper
----> 5 import datasets
6 import huggingface_hub
7 import matplotlib.font_manager as font_manager
File /opt/conda/lib/python3.10/site-packages/datasets/__init__.py:37
34 del pyarrow
35 del version
---> 37 from .arrow_dataset import Dataset, concatenate_datasets
38 from .arrow_reader import ReadInstruction
39 from .builder import ArrowBasedBuilder, BeamBasedBuilder, BuilderConfig, DatasetBuilder, GeneratorBasedBuilder
File /opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:60
58 from . import config
59 from .arrow_reader import ArrowReader
---> 60 from .arrow_writer import ArrowWriter, OptimizedTypedSequence
61 from .features import Audio, ClassLabel, Features, Image, Sequence, Value
62 from .features.features import FeatureType, _ArrayXD, decode_nested_example, pandas_types_mapper, require_decoding
File /opt/conda/lib/python3.10/site-packages/datasets/arrow_writer.py:26
23 import pyarrow as pa
25 from . import config
---> 26 from .features import Features, Image, Value
27 from .features.features import (
28 FeatureType,
29 _ArrayXDExtensionType,
(...)
34 numpy_to_pyarrow_listarray,
35 )
36 from .info import DatasetInfo
File /opt/conda/lib/python3.10/site-packages/datasets/features/__init__.py:17
1 # flake8: noqa
3 __all__ = [
4 "Audio",
5 "Array2D",
(...)
15 "TranslationVariableLanguages",
16 ]
---> 17 from .audio import Audio
18 from .features import Array2D, Array3D, Array4D, Array5D, ClassLabel, Features, Sequence, Value
19 from .image import Image
File /opt/conda/lib/python3.10/site-packages/datasets/features/audio.py:10
8 from ..table import array_cast
9 from ..utils.py_utils import no_op_if_value_is_null
---> 10 from ..utils.streaming_download_manager import xopen
13 @dataclass
14 class Audio:
15 """Audio Feature to extract audio data from an audio file.
16
17 Input: The Audio feature accepts as input:
(...)
39 returns the underlying dictionary in the format {"path": audio_path, "bytes": audio_bytes}.
40 """
File /opt/conda/lib/python3.10/site-packages/datasets/utils/streaming_download_manager.py:19
16 from aiohttp.client_exceptions import ClientError
18 from .. import config
---> 19 from ..filesystems import COMPRESSION_FILESYSTEMS
20 from .download_manager import DownloadConfig, map_nested
21 from .file_utils import (
22 get_authentication_headers_for_url,
23 http_head,
(...)
27 url_or_path_join,
28 )
File /opt/conda/lib/python3.10/site-packages/datasets/filesystems/__init__.py:13
10 _has_s3fs = importlib.util.find_spec("s3fs") is not None
12 if _has_s3fs:
---> 13 from .s3filesystem import S3FileSystem # noqa: F401
15 COMPRESSION_FILESYSTEMS: List[compression.BaseCompressedFileFileSystem] = [
16 compression.Bz2FileSystem,
17 compression.GzipFileSystem,
(...)
20 compression.ZstdFileSystem,
21 ]
23 # Register custom filesystems
File /opt/conda/lib/python3.10/site-packages/datasets/filesystems/s3filesystem.py:3
1 import re
----> 3 import s3fs
6 class S3FileSystem(s3fs.S3FileSystem):
7 """
8 ``datasets.filesystems.S3FileSystem`` is a subclass of s3fs.S3FileSystem](https://s3fs.readthedocs.io/en/latest/api.html), which is a known
9 implementation of ``fsspec``. Filesystem Spec [FSSPEC](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest) is a project to
(...)
68 ```
69 """
File /opt/conda/lib/python3.10/site-packages/s3fs/__init__.py:1
----> 1 from .core import S3FileSystem, S3File
2 from .mapping import S3Map
4 from ._version import get_versions
File /opt/conda/lib/python3.10/site-packages/s3fs/core.py:29
27 import aiobotocore
28 import botocore
---> 29 import aiobotocore.session
30 from aiobotocore.config import AioConfig
31 from botocore.exceptions import ClientError, HTTPClientError, ParamValidationError
File /opt/conda/lib/python3.10/site-packages/aiobotocore/session.py:12
3 from botocore.session import (
4 EVENT_ALIASES,
5 ServiceModel,
(...)
8 copy,
9 )
11 from . import retryhandler
---> 12 from .client import AioBaseClient, AioClientCreator
13 from .configprovider import AioSmartDefaultsConfigStoreFactory
14 from .credentials import AioCredentials, create_credential_resolver
File /opt/conda/lib/python3.10/site-packages/aiobotocore/client.py:10
1 from botocore.awsrequest import prepare_request_dict
2 from botocore.client import (
3 BaseClient,
4 ClientCreator,
(...)
8 resolve_checksum_context,
9 )
---> 10 from botocore.compress import maybe_compress_request
11 from botocore.discovery import block_endpoint_discovery_required_operations
12 from botocore.exceptions import OperationNotPageableError, UnknownServiceError
ModuleNotFoundError: No module named 'botocore.compress'
Link to notebook: https://www.kaggle.com/code/martinmoreno313/nlp-summarization
Expected behavior
The code should output:
Using transformers v4.12.0.dev0
Using datasets v1.12.1
Using accelerate v0.5.1
I installed boto again using
pip install boto3 --upgrade
And restarted the whole notebook, it worked for me.
After reinstalling boto3 as above, I get this error:
ImportError: cannot import name 'IdentityCache' from 'botocore.utils' (/opt/conda/lib/python3.10/site-packages/botocore/utils.py)