ValueError: node array from the pickle has an incompatible dtype
ytzfhqs opened this issue · comments
QingSong Hao commented
I encountered an error when using SUOD on the Kaggle platform
The process of installing dependent packages is as follows:
!git clone https://github.com/yzhao062/pyod.git
%cd pyod
!pip install .
!pip install -r requirements_ci.txt
I encountered an error while using the SUDO code in the tutorial file:
from pyod.models.suod import SUOD
from pyod.models.lof import LOF
from pyod.models.iforest import IForest
from pyod.models.copod import COPOD
from pyod.utils.utility import standardizer
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize
contamination = 0.1
n_train = 200
n_test = 100
# Generate sample data
X_train, X_test, y_train, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=2,
contamination=contamination,
random_state=42)
# train SUOD
clf_name = 'SUOD'
# initialized a group of outlier detectors for acceleration
detector_list = [LOF(n_neighbors=15), LOF(n_neighbors=20),
LOF(n_neighbors=25), LOF(n_neighbors=35),
COPOD(), IForest(n_estimators=100),
IForest(n_estimators=200)]
# decide the number of parallel process, and the combination method
clf = SUOD(base_estimators=detector_list, n_jobs=2, combination='average',
verbose=False)
# or to use the default detectors
# clf = SUOD(n_jobs=2, combination='average',
# verbose=False)
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test) # outlier scores
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print(clf_name, y_test, y_test_scores)
# visualize the results
visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred, show_figure=True, save_figure=False)
Detailed error details:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[14], line 44
41 y_train_scores = clf.decision_scores_ # raw outlier scores
43 # get the prediction on the test data
---> 44 y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
45 y_test_scores = clf.decision_function(X_test) # outlier scores
47 # evaluate and print the results
File /kaggle/working/pyod/pyod/models/base.py:168, in BaseDetector.predict(self, X, return_confidence)
147 """Predict if a particular sample is an outlier or not.
148
149 Parameters
(...)
164 Only if return_confidence is set to True.
165 """
167 check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
--> 168 pred_score = self.decision_function(X)
170 if isinstance(self.contamination, (float, int)):
171 prediction = (pred_score > self.threshold_).astype('int').ravel()
File /kaggle/working/pyod/pyod/models/suod.py:260, in SUOD.decision_function(self, X)
257 X = check_array(X)
259 # initialize the output score
--> 260 predicted_scores = self.model_.decision_function(X)
262 # standardize the score and combine
263 predicted_scores = self.score_scalar_.transform(predicted_scores)
File /opt/conda/lib/python3.10/site-packages/suod/models/base.py:460, in SUOD.decision_function(self, X)
456 # decide whether bps is needed
457 # it is turned off
458 if self.bps_flag:
459 # load the pre-trained cost predictor to forecast the train cost
--> 460 cost_predictor = joblib.load(self.cost_forecast_loc_pred_)
462 time_cost_pred = cost_forecast_meta(cost_predictor, X,
463 self.base_estimator_names)
465 n_estimators_list, starts, n_jobs = balanced_scheduling(
466 time_cost_pred, self.n_estimators, self.n_jobs, self.verbose)
File /opt/conda/lib/python3.10/site-packages/joblib/numpy_pickle.py:658, in load(filename, mmap_mode)
652 if isinstance(fobj, str):
653 # if the returned file object is a string, this means we
654 # try to load a pickle file generated with an version of
655 # Joblib so we load it with joblib compatibility function.
656 return load_compatibility(fobj)
--> 658 obj = _unpickle(fobj, filename, mmap_mode)
659 return obj
File /opt/conda/lib/python3.10/site-packages/joblib/numpy_pickle.py:577, in _unpickle(fobj, filename, mmap_mode)
575 obj = None
576 try:
--> 577 obj = unpickler.load()
578 if unpickler.compat_mode:
579 warnings.warn("The file '%s' has been generated with a "
580 "joblib version less than 0.10. "
581 "Please regenerate this pickle file."
582 % filename,
583 DeprecationWarning, stacklevel=3)
File /opt/conda/lib/python3.10/pickle.py:1213, in _Unpickler.load(self)
1211 raise EOFError
1212 assert isinstance(key, bytes_types)
-> 1213 dispatch[key[0]](self)
1214 except _Stop as stopinst:
1215 return stopinst.value
File /opt/conda/lib/python3.10/site-packages/joblib/numpy_pickle.py:402, in NumpyUnpickler.load_build(self)
394 def load_build(self):
395 """Called to set the state of a newly created object.
396
397 We capture it to replace our place-holder objects, NDArrayWrapper or
(...)
400 NDArrayWrapper is used for backward compatibility with joblib <= 0.9.
401 """
--> 402 Unpickler.load_build(self)
404 # For backward compatibility, we support NDArrayWrapper objects.
405 if isinstance(self.stack[-1], (NDArrayWrapper, NumpyArrayWrapper)):
File /opt/conda/lib/python3.10/pickle.py:1718, in _Unpickler.load_build(self)
1716 setstate = getattr(inst, "__setstate__", None)
1717 if setstate is not None:
-> 1718 setstate(state)
1719 return
1720 slotstate = None
File sklearn/tree/_tree.pyx:676, in sklearn.tree._tree.Tree.__setstate__()
File sklearn/tree/_tree.pyx:1364, in sklearn.tree._tree._check_node_ndarray()
ValueError: node array from the pickle has an incompatible dtype:
- expected: [('left_child', '<i8'), ('right_child', '<i8'), ('feature', '<i8'), ('threshold', '<f8'), ('impurity', '<f8'), ('n_node_samples', '<i8'), ('weighted_n_node_samples', '<f8')]
- got : {'names': ['left_child', 'right_child', 'feature', 'threshold', 'impurity', 'n_node_samples', 'weighted_n_node_samples', 'missing_go_to_left'], 'formats': ['<i8', '<i8', '<i8', '<f8', '<f8', '<i8', '<f8', 'u1'], 'offsets': [0, 8, 16, 24, 32, 40, 48, 56], 'itemsize': 64}
scikit_learn
version is 1.2.2, joblib
version is 1.2.0
I ran other sample code without any issues, but whenever I use SUOD, I will report the above error
Mike Laszkiewicz commented
Hi,
I had the same issue. First with scikit_learn version 1.1.3
, then I updated to 1.2.2
(just like you), and finally, I updated to 1.3.0
.
Updating to 1.3.0 did the trick for me.
I hope that helps.