AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'

Question

AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'

AdarshGowda33 opened this issue 9 months ago · comments

While running
model, tokenizer = load_model(model_name, bnb_config)

I am getting the following error,

AttributeError Traceback (most recent call last)
Cell In[33], line 4
2 model_name = "meta-llama/Llama-2-7b-hf"
3 bnb_config = create_bnb_config()
----> 4 model, tokenizer = load_model(model_name, bnb_config)

Cell In[4], line 5, in load_model(model_name, bnb_config)
2 n_gpus = torch.cuda.device_count()
3 max_memory = f'{40960}MB'
----> 5 model = AutoModelForCausalLM.from_pretrained(
6 model_name,
7 quantization_config=bnb_config,
8 device_map="auto", # dispatch efficiently the model on the available ressources
9 max_memory = {i: max_memory for i in range(n_gpus)},
10 )
11 tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
13 # Needed for LLaMA tokenizer

File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:566, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
564 elif type(config) in cls._model_mapping.keys():
565 model_class = _get_model_class(config, cls._model_mapping)
--> 566 return model_class.from_pretrained(
567 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
568 )
569 raise ValueError(
570 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n"
571 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}."
572 )

File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:3480, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3471 if dtype_orig is not None:
3472 torch.set_default_dtype(dtype_orig)
3473 (
3474 model,
3475 missing_keys,
3476 unexpected_keys,
3477 mismatched_keys,
3478 offload_index,
3479 error_msgs,
-> 3480 ) = cls._load_pretrained_model(
3481 model,
3482 state_dict,
3483 loaded_state_dict_keys, # XXX: rename?
3484 resolved_archive_file,
3485 pretrained_model_name_or_path,
3486 ignore_mismatched_sizes=ignore_mismatched_sizes,
3487 sharded_metadata=sharded_metadata,
3488 _fast_init=_fast_init,
3489 low_cpu_mem_usage=low_cpu_mem_usage,
3490 device_map=device_map,
3491 offload_folder=offload_folder,
3492 offload_state_dict=offload_state_dict,
3493 dtype=torch_dtype,
3494 is_quantized=(getattr(model, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES),
3495 keep_in_fp32_modules=keep_in_fp32_modules,
3496 )
3498 model.is_loaded_in_4bit = load_in_4bit
3499 model.is_loaded_in_8bit = load_in_8bit

File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:3870, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules)
3868 if low_cpu_mem_usage:
3869 if not is_fsdp_enabled() or is_fsdp_enabled_and_dist_rank_0():
-> 3870 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
3871 model_to_load,
3872 state_dict,
3873 loaded_keys,
3874 start_prefix,
3875 expected_keys,
3876 device_map=device_map,
3877 offload_folder=offload_folder,
3878 offload_index=offload_index,
3879 state_dict_folder=state_dict_folder,
3880 state_dict_index=state_dict_index,
3881 dtype=dtype,
3882 is_quantized=is_quantized,
3883 is_safetensors=is_safetensors,
3884 keep_in_fp32_modules=keep_in_fp32_modules,
3885 )
3886 error_msgs += new_error_msgs
3887 else:

File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:751, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules)
748 fp16_statistics = None
750 if "SCB" not in param_name:
--> 751 set_module_quantized_tensor_to_device(
752 model, param_name, param_device, value=param, fp16_statistics=fp16_statistics
753 )
755 return error_msgs, offload_index, state_dict_index

File /opt/conda/lib/python3.10/site-packages/transformers/integrations/bitsandbytes.py:98, in set_module_quantized_tensor_to_device(module, tensor_name, device, value, fp16_statistics)
96 new_value = bnb.nn.Int8Params(new_value, requires_grad=False, **kwargs).to(device)
97 elif is_4bit:
---> 98 new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device)
100 module._parameters[tensor_name] = new_value
101 if fp16_statistics is not None:

File /opt/conda/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:179, in Params4bit.to(self, *args, **kwargs)
176 device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
178 if (device is not None and device.type == "cuda" and self.data.device.type == "cpu"):
--> 179 return self.cuda(device)
180 else:
181 s = self.quant_state

File /opt/conda/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:157, in Params4bit.cuda(self, device)
155 def cuda(self, device):
156 w = self.data.contiguous().half().cuda(device)
--> 157 w_4bit, quant_state = bnb.functional.quantize_4bit(w, blocksize=self.blocksize, compress_statistics=self.compress_statistics, quant_type=self.quant_type)
158 self.data = w_4bit
159 self.quant_state = quant_state

File /opt/conda/lib/python3.10/site-packages/bitsandbytes/functional.py:832, in quantize_4bit(A, absmax, out, blocksize, compress_statistics, quant_type)
830 lib.cquantize_blockwise_fp16_fp4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n))
831 else:
--> 832 lib.cquantize_blockwise_fp16_nf4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n))
833 elif A.dtype == torch.bfloat16:
834 if quant_type == 'fp4':

AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'

yzqxhhm · Answer 1 · Mon Mar 18 2024 18:57:06 GMT+0800 (China Standard Time)

我也出现了这个问题，怎么解决呀？