nlp-with-transformers / notebooks

Jupyter notebooks for the Natural Language Processing with Transformers book

Home Page:https://transformersbook.com/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

chapter 6 - setup_chapter() function failing in Kaggle

martomor opened this issue · comments

Information

The problem arises in chapter:

  • Introduction
  • Text Classification
  • Transformer Anatomy
  • Multilingual Named Entity Recognition
  • Text Generation
  • Summarization
  • Question Answering
  • Making Transformers Efficient in Production
  • Dealing with Few to No Labels
  • Training Transformers from Scratch
  • Future Directions

Describe the bug

setup_chapter() generating this error in kaggle: ModuleNotFoundError: No module named 'botocore.compress'

To Reproduce

Steps to reproduce the behavior:

1.Setup Kaggle accelerator: Tried with GPU T4x2 and GPU P100
2.Follow installation steps from the book:

!git clone https://github.com/nlp-with-transformers/notebooks.git
%cd notebooks
from install import *
install_requirements(is_chapter6=True)
  1. Import utils and run the setup_chapter function:
from utils import *
setup_chapter()

Result:

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[2], line 1
----> 1 from utils import *
      2 setup_chapter()

File /kaggle/working/notebooks/utils.py:5
      2 import sys
      3 from textwrap import TextWrapper
----> 5 import datasets
      6 import huggingface_hub
      7 import matplotlib.font_manager as font_manager

File /opt/conda/lib/python3.10/site-packages/datasets/__init__.py:37
     34 del pyarrow
     35 del version
---> 37 from .arrow_dataset import Dataset, concatenate_datasets
     38 from .arrow_reader import ReadInstruction
     39 from .builder import ArrowBasedBuilder, BeamBasedBuilder, BuilderConfig, DatasetBuilder, GeneratorBasedBuilder

File /opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:60
     58 from . import config
     59 from .arrow_reader import ArrowReader
---> 60 from .arrow_writer import ArrowWriter, OptimizedTypedSequence
     61 from .features import Audio, ClassLabel, Features, Image, Sequence, Value
     62 from .features.features import FeatureType, _ArrayXD, decode_nested_example, pandas_types_mapper, require_decoding

File /opt/conda/lib/python3.10/site-packages/datasets/arrow_writer.py:26
     23 import pyarrow as pa
     25 from . import config
---> 26 from .features import Features, Image, Value
     27 from .features.features import (
     28     FeatureType,
     29     _ArrayXDExtensionType,
   (...)
     34     numpy_to_pyarrow_listarray,
     35 )
     36 from .info import DatasetInfo

File /opt/conda/lib/python3.10/site-packages/datasets/features/__init__.py:17
      1 # flake8: noqa
      3 __all__ = [
      4     "Audio",
      5     "Array2D",
   (...)
     15     "TranslationVariableLanguages",
     16 ]
---> 17 from .audio import Audio
     18 from .features import Array2D, Array3D, Array4D, Array5D, ClassLabel, Features, Sequence, Value
     19 from .image import Image

File /opt/conda/lib/python3.10/site-packages/datasets/features/audio.py:10
      8 from ..table import array_cast
      9 from ..utils.py_utils import no_op_if_value_is_null
---> 10 from ..utils.streaming_download_manager import xopen
     13 @dataclass
     14 class Audio:
     15     """Audio Feature to extract audio data from an audio file.
     16 
     17     Input: The Audio feature accepts as input:
   (...)
     39             returns the underlying dictionary in the format {"path": audio_path, "bytes": audio_bytes}.
     40     """

File /opt/conda/lib/python3.10/site-packages/datasets/utils/streaming_download_manager.py:19
     16 from aiohttp.client_exceptions import ClientError
     18 from .. import config
---> 19 from ..filesystems import COMPRESSION_FILESYSTEMS
     20 from .download_manager import DownloadConfig, map_nested
     21 from .file_utils import (
     22     get_authentication_headers_for_url,
     23     http_head,
   (...)
     27     url_or_path_join,
     28 )

File /opt/conda/lib/python3.10/site-packages/datasets/filesystems/__init__.py:13
     10 _has_s3fs = importlib.util.find_spec("s3fs") is not None
     12 if _has_s3fs:
---> 13     from .s3filesystem import S3FileSystem  # noqa: F401
     15 COMPRESSION_FILESYSTEMS: List[compression.BaseCompressedFileFileSystem] = [
     16     compression.Bz2FileSystem,
     17     compression.GzipFileSystem,
   (...)
     20     compression.ZstdFileSystem,
     21 ]
     23 # Register custom filesystems

File /opt/conda/lib/python3.10/site-packages/datasets/filesystems/s3filesystem.py:3
      1 import re
----> 3 import s3fs
      6 class S3FileSystem(s3fs.S3FileSystem):
      7     """
      8     ``datasets.filesystems.S3FileSystem`` is a subclass of s3fs.S3FileSystem](https://s3fs.readthedocs.io/en/latest/api.html), which is a known
      9     implementation of ``fsspec``. Filesystem Spec [FSSPEC](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest) is a project to
   (...)
     68     ```
     69     """

File /opt/conda/lib/python3.10/site-packages/s3fs/__init__.py:1
----> 1 from .core import S3FileSystem, S3File
      2 from .mapping import S3Map
      4 from ._version import get_versions

File /opt/conda/lib/python3.10/site-packages/s3fs/core.py:29
     27 import aiobotocore
     28 import botocore
---> 29 import aiobotocore.session
     30 from aiobotocore.config import AioConfig
     31 from botocore.exceptions import ClientError, HTTPClientError, ParamValidationError

File /opt/conda/lib/python3.10/site-packages/aiobotocore/session.py:12
      3 from botocore.session import (
      4     EVENT_ALIASES,
      5     ServiceModel,
   (...)
      8     copy,
      9 )
     11 from . import retryhandler
---> 12 from .client import AioBaseClient, AioClientCreator
     13 from .configprovider import AioSmartDefaultsConfigStoreFactory
     14 from .credentials import AioCredentials, create_credential_resolver

File /opt/conda/lib/python3.10/site-packages/aiobotocore/client.py:10
      1 from botocore.awsrequest import prepare_request_dict
      2 from botocore.client import (
      3     BaseClient,
      4     ClientCreator,
   (...)
      8     resolve_checksum_context,
      9 )
---> 10 from botocore.compress import maybe_compress_request
     11 from botocore.discovery import block_endpoint_discovery_required_operations
     12 from botocore.exceptions import OperationNotPageableError, UnknownServiceError

ModuleNotFoundError: No module named 'botocore.compress'

Link to notebook: https://www.kaggle.com/code/martinmoreno313/nlp-summarization

Expected behavior

The code should output:

Using transformers v4.12.0.dev0
Using datasets v1.12.1
Using accelerate v0.5.1

I installed boto again using
pip install boto3 --upgrade

And restarted the whole notebook, it worked for me.

After reinstalling boto3 as above, I get this error:
ImportError: cannot import name 'IdentityCache' from 'botocore.utils' (/opt/conda/lib/python3.10/site-packages/botocore/utils.py)