[BUG] Question Answering unit tests fail on gpu testing machine
hlums opened this issue · comments
Hong Lu commented
Description
tests/unit/test_models_transformers_question_answering.py succeeds on DSVM, but fails on gpu testing machine.
tests/unit/test_common_pytorch_utils.py ...... [ 54%]
tests/unit/test_models_transformers_question_answering.py .F.. [ 90%]
tests/unit/test_transformers_sequence_classification.py . [100%]
=================================== FAILURES ===================================
_____________________________ test_AnswerExtractor _____________________________
qa_test_data = {'test_dataset': <utils_nlp.models.transformers.datasets.QADataset object at 0x7f1447bbe828>, 'test_features_bert': <t...ject at 0x7f1446a0c780>, 'test_features_xlnet': <torch.utils.data.dataloader.DataLoader object at 0x7f14469bfd30>, ...}
tmp_module = '/tmp/pytest-of-nlpadmin/pytest-1011/tmpnzdwdt1d'
@pytest.mark.gpu
def test_AnswerExtractor(qa_test_data, tmp_module):
# test bert
qa_extractor_bert = AnswerExtractor(cache_dir=tmp_module)
> qa_extractor_bert.fit(qa_test_data["train_features_bert"], cache_model=True)
tests/unit/test_models_transformers_question_answering.py:197:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
utils_nlp/models/transformers/question_answering.py:551: in fit
seed=seed,
utils_nlp/models/transformers/common.py:174: in fine_tune
outputs = self.model(**inputs)
/data/anaconda/envs/nlp_gpu/lib/python3.6/site-packages/torch/nn/modules/module.py:547: in __call__
result = self.forward(*input, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = DataParallel(
(module): BertForQuestionAnswering(
(bert): BertModel(
(embeddings): BertEmbeddings(
...)
(activation): Tanh()
)
)
(qa_outputs): Linear(in_features=768, out_features=2, bias=True)
)
)
inputs = ()
kwargs = {'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, ... 0,
0, 0, 0, 0]], device='cuda:1'), 'start_positions': tensor([ 0, 17], device='cuda:1'), ...}
t = Parameter containing:
tensor([[-0.0005, -0.0416, 0.0131, ..., -0.0039, -0.0335, 0.0150],
[ 0.0169, -0.0311,...18],
[ 0.0313, -0.0297, -0.0230, ..., -0.0145, -0.0525, 0.0284]],
device='cuda:1', requires_grad=True)
def forward(self, *inputs, **kwargs):
if not self.device_ids:
return self.module(*inputs, **kwargs)
for t in chain(self.module.parameters(), self.module.buffers()):
if t.device != self.src_device_obj:
raise RuntimeError("module must have its parameters and buffers "
"on device {} (device_ids[0]) but found one of "
> "them on device: {}".format(self.src_device_obj, t.device))
E RuntimeError: module must have its parameters and buffers on device cuda:0 (device_ids[0]) but found one of them on device: cuda:1