rapidsai / cuml

cuML - RAPIDS Machine Learning Library

Home Page:https://docs.rapids.ai/api/cuml/stable/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

[BUG] UserWarning: Error getting driver and runtime versions:

dysartk opened this issue · comments

Describe the bug
After installing Rapids using the instructed command in Conda and string to import cuml I get the below error. The installation went seemingly well without errors. The environment seems acceptable.

I am running Ubuntu 22.04
Nvidia 2070 Super
Driver 550.67
Cuda 12.4

Steps/Code to reproduce bug
import cuml

Error

/home/kd/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/_ptxcompiler.py:61: UserWarning: Error getting driver and runtime versions:

stdout:

stderr:

Traceback (most recent call last):
File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 254, in ensure_initialized
self.cuInit(0)
File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 327, in safe_cuda_api_call
self._check_ctypes_error(fname, retcode)
File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 395, in _check_ctypes_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [999] Call to cuInit results in CUDA_ERROR_UNKNOWN

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "", line 4, in
File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 292, in getattr
self.ensure_initialized()
File "/home/kevindysart/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.py", line 258, in ensure_initialized
raise CudaSupportError(f"Error at driver init: {description}")
numba.cuda.cudadrv.error.CudaSupportError: Error at driver init: Call to cuInit results in CUDA_ERROR_UNKNOWN (999)

Not patching Numba
warnings.warn(msg, UserWarning)

CUDARuntimeError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import cuml
2 from cupy import asnumpy
3 from joblib import dump, load

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/init.py:17
1 #
2 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
3 #
(...)
14 # limitations under the License.
15 #
---> 17 from cuml.internals.base import Base, UniversalBase
18 from cuml.internals.available_devices import is_cuda_available
20 # GPU only packages

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/init.py:18
1 #
2 # Copyright (c) 2019-2023, NVIDIA CORPORATION.
3 #
(...)
14 # limitations under the License.
15 #
17 from cuml.internals.available_devices import is_cuda_available
---> 18 from cuml.internals.base_helpers import BaseMetaClass, _tags_class_and_instance
19 from cuml.internals.api_decorators import (
20 _deprecate_pos_args,
21 api_base_fit_transform,
(...)
33 exit_internal_api,
34 )
35 from cuml.internals.api_context_managers import (
36 in_internal_api,
37 set_api_output_dtype,
38 set_api_output_type,
39 )

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/base_helpers.py:20
17 from inspect import Parameter, signature
18 import typing
---> 20 from cuml.internals.api_decorators import (
21 api_base_return_generic,
22 api_base_return_array,
23 api_base_return_sparse_array,
24 api_base_return_any,
25 api_return_any,
26 _deprecate_pos_args,
27 )
28 from cuml.internals.array import CumlArray
29 from cuml.internals.array_sparse import SparseCumlArray

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/api_decorators.py:24
21 import warnings
23 # TODO: Try to resolve circular import that makes this necessary:
---> 24 from cuml.internals import input_utils as iu
25 from cuml.internals.api_context_managers import BaseReturnAnyCM
26 from cuml.internals.api_context_managers import BaseReturnArrayCM

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/input_utils.py:19
1 #
2 # Copyright (c) 2019-2023, NVIDIA CORPORATION.
3 #
(...)
14 # limitations under the License.
15 #
17 from collections import namedtuple
---> 19 from cuml.internals.array import CumlArray
20 from cuml.internals.array_sparse import SparseCumlArray
21 from cuml.internals.global_settings import GlobalSettings

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/array.py:21
18 import operator
19 import pickle
---> 21 from cuml.internals.global_settings import GlobalSettings
22 from cuml.internals.logger import debug
23 from cuml.internals.mem_type import MemoryType, MemoryTypeError

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/global_settings.py:20
18 import threading
19 from cuml.internals.available_devices import is_cuda_available
---> 20 from cuml.internals.device_type import DeviceType
21 from cuml.internals.mem_type import MemoryType
22 from cuml.internals.safe_imports import cpu_only_import, gpu_only_import

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/device_type.py:19
1 #
2 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
3 #
(...)
14 # limitations under the License.
15 #
18 from enum import Enum, auto
---> 19 from cuml.internals.mem_type import MemoryType
22 class DeviceTypeError(Exception):
23 """An exception thrown to indicate bad device type selection"""

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/mem_type.py:22
19 from cuml.internals.device_support import GPU_ENABLED
20 from cuml.internals.safe_imports import cpu_only_import, gpu_only_import
---> 22 cudf = gpu_only_import("cudf")
23 cp = gpu_only_import("cupy")
24 cpx_sparse = gpu_only_import("cupyx.scipy.sparse")

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cuml/internals/safe_imports.py:356, in gpu_only_import(module, alt)
330 """A function used to import modules required only in GPU installs
331
332 This function will attempt to import a module with the given name, but it
(...)
353 UnavailableMeta.
354 """
355 if GPU_ENABLED:
--> 356 return importlib.import_module(module)
357 else:
358 return safe_import(
359 module,
360 msg=f"{module} is not installed in non GPU-enabled installations",
361 alt=alt,
362 )

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/importlib/init.py:126, in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/init.py:10
7 from cudf.utils.gpu_utils import validate_setup
9 _setup_numba()
---> 10 validate_setup()
12 import cupy
13 from numba import config as numba_config, cuda

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/gpu_utils.py:55, in validate_setup()
53 except CUDARuntimeError as e:
54 if e.status in notify_caller_errors:
---> 55 raise e
56 # If there is no GPU detected, set gpus_count to -1
57 gpus_count = -1

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/cudf/utils/gpu_utils.py:52, in validate_setup()
31 notify_caller_errors = {
32 cudaError_t.cudaErrorInitializationError,
33 cudaError_t.cudaErrorInsufficientDriver,
(...)
48 cudaError_t.cudaErrorApiFailureBase,
49 }
51 try:
---> 52 gpus_count = getDeviceCount()
53 except CUDARuntimeError as e:
54 if e.status in notify_caller_errors:

File ~/anaconda3/envs/rapids-24.04/lib/python3.11/site-packages/rmm/_cuda/gpu.py:102, in getDeviceCount()

Thanks for the issue @dysartk, the driver and GPU should be fine so this is an unexpected issue. It seems to be failing to do some basic CUDA calls, is there a chance you are using conda inside a docker container? Otherwise could you try running https://github.com/rapidsai/cuml/blob/branch-24.06/print_env.sh and put the output of that here?