Installing tensorflow breaks the transformers library in `test_factual_consistency()`
kennysong opened this issue · comments
Ran into this pretty surprising bug while running tests in an environment that had TF installed. To reproduce:
# Create a fresh venv
python -m venv .venv
source .venv/bin/activate
# Install langcheck
python -m pip install --upgrade pip
python -m pip install -e .[dev]
# Tests pass
python -m pytest -s -vv -m "not optional"
# Install TensorFlow
pip install tensorflow
# Tests fail
python -m pytest -s -vv -m "not optional"
Relevant versions:
- Python 3.9.2
- TensorFlow 2.14.0
- Keras 2.14.0
- Transformers 4.22.1
See error message below:
========================================================================================================== FAILURES ===========================================================================================================
____________________________________________________________________________________ test_factual_consistency[generated_outputs0-sources0] ____________________________________________________________________________________
self = <module 'transformers.models.marian' from '/home/kennysong/langcheck/.venv/lib/python3.9/site-packages/transformers/models/marian/__init__.py'>, module_name = 'modeling_tf_marian'
def _get_module(self, module_name: str):
try:
> return importlib.import_module("." + module_name, self.__name__)
.venv/lib/python3.9/site-packages/transformers/utils/import_utils.py:1031:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/usr/lib/python3.9/importlib/__init__.py:127: in import_module
return _bootstrap._gcd_import(name[level:], package, level)
.venv/lib/python3.9/site-packages/transformers/models/marian/modeling_tf_marian.py:33: in <module>
from ...modeling_tf_utils import (
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
"""TF general model utils."""
import functools
import gc
import inspect
import json
import os
import pickle
import re
import warnings
from collections.abc import Mapping
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
import h5py
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.engine import data_adapter
from tensorflow.python.keras.engine.keras_tensor import KerasTensor
from tensorflow.python.keras.saving import hdf5_format
from huggingface_hub import Repository, list_repo_files
> from keras.saving.hdf5_format import save_attributes_to_hdf5_group
E ModuleNotFoundError: No module named 'keras.saving.hdf5_format'
.venv/lib/python3.9/site-packages/transformers/modeling_tf_utils.py:39: ModuleNotFoundError
The above exception was the direct cause of the following exception:
generated_outputs = ['東京は日本の首都です。', '地球は平面です。'], sources = ['東京は日本の首都です。', '地球は球体です。']
@pytest.mark.parametrize(
'generated_outputs,sources',
[(['東京は日本の首都です。', '地球は平面です。'], ['東京は日本の首都です。', '地球は球体です。'])])
def test_factual_consistency(generated_outputs, sources):
> eval_value = factual_consistency(generated_outputs, sources)
tests/eval/ja/test_source_based_text_quality.py:16:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
src/langcheck/eval/ja/source_based_text_quality.py:89: in factual_consistency
_factual_consistency_translation_pipeline = pipeline(
.venv/lib/python3.9/site-packages/transformers/pipelines/__init__.py:702: in pipeline
framework, model = infer_framework_load_model(
.venv/lib/python3.9/site-packages/transformers/pipelines/base.py:233: in infer_framework_load_model
_class = getattr(transformers_module, f"TF{architecture}", None)
.venv/lib/python3.9/site-packages/transformers/utils/import_utils.py:1022: in __getattr__
value = getattr(module, name)
.venv/lib/python3.9/site-packages/transformers/utils/import_utils.py:1021: in __getattr__
module = self._get_module(self._class_to_module[name])
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <module 'transformers.models.marian' from '/home/kennysong/langcheck/.venv/lib/python3.9/site-packages/transformers/models/marian/__init__.py'>, module_name = 'modeling_tf_marian'
def _get_module(self, module_name: str):
try:
return importlib.import_module("." + module_name, self.__name__)
except Exception as e:
> raise RuntimeError(
f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its"
f" traceback):\n{e}"
) from e
E RuntimeError: Failed to import transformers.models.marian.modeling_tf_marian because of the following error (look up to see its traceback):
E No module named 'keras.saving.hdf5_format'
.venv/lib/python3.9/site-packages/transformers/utils/import_utils.py:1033: RuntimeError
Can you try pip install -U tensorflow==2.10
?
Basically transformer
tried to import module keras.saving.hdf5_format
that no longer exist in the latest version.
Relevant stackoverflow: https://stackoverflow.com/questions/74586892/no-module-named-keras-saving-hdf5-format
Yeah, downgrading TensorFlow could resolve this problem for a user using langcheck.eval.factual_consistency()
in an environment with TF installed.
We should also investigate if there's a better way to handle this in the LangCheck package. Or maybe it needs to be fixed upstream in transformers
.