FastSpeechs training error

Question

FastSpeechs training error

lunar333 opened this issue 2 years ago · comments

the preprocess process was running successfully ,but when I running the train code ,it has a error as followed

(torch) zhonghuihang@kdf-X12DAi-N6:~/fastSpeech2-master$ python3 train.py -p config/LJSpeech/preprocess.yaml -m config/LJSpeech/model.yaml -t config/LJSpeech/train.yaml
Prepare training ...
Number of FastSpeech2 Parameters: 35159361
Traceback (most recent call last):
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 186, in nti
s = nts(s, "ascii", "strict")
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 170, in nts
return s.decode(encoding, errors)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xdf in position 2: ordinal not in range(128)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 2289, in next
tarinfo = self.tarinfo.fromtarfile(self)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 1095, in fromtarfile
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 1037, in frombuf
chksum = nti(buf[148:156])
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 189, in nti
raise InvalidHeaderError("invalid header")
tarfile.InvalidHeaderError: invalid header

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/serialization.py", line 595, in _load
return legacy_load(f)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/serialization.py", line 506, in legacy_load
with closing(tarfile.open(fileobj=f, mode='r:', format=tarfile.PAX_FORMAT)) as tar,
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 1593, in open
return func(name, filemode, fileobj, **kwargs)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 1623, in taropen
return cls(name, mode, fileobj, **kwargs)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 1486, in init
self.firstmember = self.next()
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/tarfile.py", line 2301, in next
raise ReadError(str(e))
tarfile.ReadError: invalid header

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "train.py", line 198, in
main(args, configs)
File "train.py", line 48, in main
vocoder = get_vocoder(model_config, device)
File "/home/zhonghuihang/fastSpeech2-master/utils/model.py", line 63, in get_vocoder
ckpt = torch.load("/home/zhonghuihang/fastSpeech2-master/hifigan/generator_LJSpeech.pth.tar.zip")
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/serialization.py", line 426, in load
return _load(f, map_location, pickle_module, **pickle_load_args)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/serialization.py", line 599, in _load
raise RuntimeError("{} is a zip archive (did you mean to use torch.jit.load()?)".format(f.name))
RuntimeError: /home/zhonghuihang/fastSpeech2-master/hifigan/generator_LJSpeech.pth.tar.zip is a zip archive (did you mean to use torch.jit.load()?)
(torch) zhonghuihang@kdf-X12DAi-N6:~/fastSpeech2-master$ python3 train.py -p config/LJSpeech/preprocess.yaml -m config/LJSpeech/model.yaml -t config/LJSpeech/train.yaml
Prepare training ...
channel 3: open failed: connect failed: Connection refused
channel 4: open failed: connect failed: Connection refused
channel 3: open failed: connect failed: Connection refused
channel 4: open failed: connect failed: Connection refused
channel 3: open failed: connect failed: Connection refused
channel 4: open failed: connect failed: Connection refused
Number of FastSpeech2 Parameters: 35159361
Removing weight norm...
Training: 0%| | 0/900000 [00:00<?, ?it/sTraceback (most recent call last): | 0/197 [00:00<?, ?it/s]
File "train.py", line 198, in
main(args, configs)
File "train.py", line 82, in main
output = model(*(batch[2:]))
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in call
result = self.forward(*input, **kwargs)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 150, in forward
return self.module(*inputs[0], **kwargs[0])
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in call
result = self.forward(*input, **kwargs)
File "/home/zhonghuihang/fastSpeech2-master/model/fastspeech2.py", line 66, in forward
output = self.encoder(texts, src_masks)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in call
result = self.forward(*input, **kwargs)
File "/home/zhonghuihang/fastSpeech2-master/transformer/Models.py", line 95, in forward
enc_output, mask=mask, slf_attn_mask=slf_attn_mask
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in call
result = self.forward(*input, **kwargs)
File "/home/zhonghuihang/fastSpeech2-master/transformer/Layers.py", line 23, in forward
enc_input, enc_input, enc_input, mask=slf_attn_mask
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in call
result = self.forward(*input, **kwargs)
File "/home/zhonghuihang/fastSpeech2-master/transformer/SubLayers.py", line 39, in forward
q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in call
result = self.forward(*input, **kwargs)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 87, in forward
return F.linear(input, self.weight, self.bias)
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/torch/nn/functional.py", line 1372, in linear
output = input.matmul(weight.t())
RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)
Training: 0%| | 1/900000 [00:00<142:38:55, 1.75it/s]
Exception ignored in: <function tqdm.del at 0x7ff84f88e4d0>
Traceback (most recent call last):
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/tqdm/std.py", line 1086, in del
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/tqdm/std.py", line 1270, in close
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/tqdm/std.py", line 572, in _decr_instances
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/site-packages/tqdm/_monitor.py", line 51, in exit
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/threading.py", line 522, in set
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/threading.py", line 365, in notify_all
File "/home/zhonghuihang/miniconda3/envs/torch/lib/python3.7/threading.py", line 348, in notify
TypeError: 'NoneType' object is not callable