cache in nfs error
simplew2011 opened this issue · comments
simplew2011 commented
Describe the bug
- When reading dataset, a cache will be generated to the ~/. cache/huggingface/datasets directory
- When using .map and .filter operations, runtime cache will be generated to the /tmp/hf_datasets-* directory
- The default is to use the path of tempfile.tempdir
- If I modify this path to the NFS disk, an error will be reported, but the program will continue to run
- https://github.com/huggingface/datasets/blob/main/src/datasets/config.py#L257
Traceback (most recent call last):
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/process.py", line 315, in _bootstrap
self.run()
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/managers.py", line 616, in _run_server
server.serve_forever()
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/managers.py", line 182, in serve_forever
sys.exit(0)
SystemExit: 0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/util.py", line 300, in _run_finalizers
finalizer()
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/util.py", line 224, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
rmtree(tempdir)
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/shutil.py", line 718, in rmtree
_rmtree_safe_fd(fd, path, onerror)
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/shutil.py", line 675, in _rmtree_safe_fd
onerror(os.unlink, fullname, sys.exc_info())
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/shutil.py", line 673, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '.nfs000000038330a012000030b4'
Traceback (most recent call last):
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/process.py", line 315, in _bootstrap
self.run()
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/managers.py", line 616, in _run_server
server.serve_forever()
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/managers.py", line 182, in serve_forever
sys.exit(0)
SystemExit: 0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/util.py", line 300, in _run_finalizers
finalizer()
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/util.py", line 224, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
rmtree(tempdir)
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/shutil.py", line 718, in rmtree
_rmtree_safe_fd(fd, path, onerror)
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/shutil.py", line 675, in _rmtree_safe_fd
onerror(os.unlink, fullname, sys.exc_info())
File "/home/wzp/miniconda3/envs/dask/lib/python3.8/shutil.py", line 673, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '.nfs0000000400064d4a000030e5'
Steps to reproduce the bug
import os
import time
import tempfile
from datasets import load_dataset
def add_column(sample):
# print(type(sample))
# time.sleep(0.1)
sample['__ds__stats__'] = {'data': 123}
return sample
def filt_column(sample):
# print(type(sample))
if len(sample['content']) > 10:
return True
else:
return False
if __name__ == '__main__':
input_dir = '/mnt/temp/CN/small' # some json dataset
dataset = load_dataset('json', data_dir=input_dir)
temp_dir = '/media/release/release/temp/temp' # a nfs folder
os.makedirs(temp_dir, exist_ok=True)
# change huggingface-datasets runtime cache in nfs(default in /tmp)
tempfile.tempdir = temp_dir
aa = dataset.map(add_column, num_proc=64)
aa = aa.filter(filt_column, num_proc=64)
print(aa)
Expected behavior
no error occur
Environment info
datasets==2.18.0
ubuntu 20.04