WillKoehrsen / feature-selector

Feature selector is a tool for dimensionality reduction of machine learning datasets

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Cannot run identify_zero_importance on small data

bwang482 opened this issue · comments

I have 32 samples and it seems feature-selector doesn't allow small data. I have edited line 295 in feature_selector.py but still getting the same error.

if task == 'classification':
   model = lgb.LGBMClassifier(min_child_samples=1, min_data_in_bin=1, n_estimators=1000, learning_rate = 0.05, verbose = -1)

Error message I keep getting:

Training Gradient Boosting Model

---------------------------------------------------------------------------
LightGBMError                             Traceback (most recent call last)
<ipython-input-5-47b00f66835b> in <module>
      1 fs.identify_zero_importance(task = 'classification', eval_metric = 'auc', 
----> 2                             n_iterations = 10, early_stopping = True)

/mnt/sdb/env1/lib/python3.6/site-packages/feature_selector/feature_selector.py in identify_zero_importance(self, task, eval_metric, n_iterations, early_stopping)
    309                 model.fit(train_features, train_labels, eval_metric = eval_metric,
    310                           eval_set = [(valid_features, valid_labels)],
--> 311                           early_stopping_rounds = 100, verbose = -1)
    312 
    313                 # Clean up memory

/mnt/sdb/env1/lib/python3.6/site-packages/lightgbm/sklearn.py in fit(self, X, y, sample_weight, init_score, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_metric, early_stopping_rounds, verbose, feature_name, categorical_feature, callbacks)
    673                                         verbose=verbose, feature_name=feature_name,
    674                                         categorical_feature=categorical_feature,
--> 675                                         callbacks=callbacks)
    676         return self
    677 

/mnt/sdb/env1/lib/python3.6/site-packages/lightgbm/sklearn.py in fit(self, X, y, sample_weight, init_score, group, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_group, eval_metric, early_stopping_rounds, verbose, feature_name, categorical_feature, callbacks)
    467                               verbose_eval=verbose, feature_name=feature_name,
    468                               categorical_feature=categorical_feature,
--> 469                               callbacks=callbacks)
    470 
    471         if evals_result:

/mnt/sdb/env1/lib/python3.6/site-packages/lightgbm/engine.py in train(params, train_set, num_boost_round, valid_sets, valid_names, fobj, feval, init_model, feature_name, categorical_feature, early_stopping_rounds, evals_result, verbose_eval, learning_rates, keep_training_booster, callbacks)
    178     # construct booster
    179     try:
--> 180         booster = Booster(params=params, train_set=train_set)
    181         if is_valid_contain_train:
    182             booster.set_train_data_name(train_data_name)

/mnt/sdb/env1/lib/python3.6/site-packages/lightgbm/basic.py in __init__(self, params, train_set, model_file, silent)
   1301             self.handle = ctypes.c_void_p()
   1302             _safe_call(_LIB.LGBM_BoosterCreate(
-> 1303                 train_set.construct().handle,
   1304                 c_str(params_str),
   1305                 ctypes.byref(self.handle)))

/mnt/sdb/env1/lib/python3.6/site-packages/lightgbm/basic.py in construct(self)
    854                                 weight=self.weight, group=self.group, init_score=self.init_score,
    855                                 predictor=self._predictor, silent=self.silent, feature_name=self.feature_name,
--> 856                                 categorical_feature=self.categorical_feature, params=self.params)
    857             if self.free_raw_data:
    858                 self.data = None

/mnt/sdb/env1/lib/python3.6/site-packages/lightgbm/basic.py in _lazy_init(self, data, label, reference, weight, group, init_score, predictor, silent, feature_name, categorical_feature, params)
    708             self.__init_from_csc(data, params_str, ref_dataset)
    709         elif isinstance(data, np.ndarray):
--> 710             self.__init_from_np2d(data, params_str, ref_dataset)
    711         else:
    712             try:

/mnt/sdb/env1/lib/python3.6/site-packages/lightgbm/basic.py in __init_from_np2d(self, mat, params_str, ref_dataset)
    770             c_str(params_str),
    771             ref_dataset,
--> 772             ctypes.byref(self.handle)))
    773 
    774     def __init_from_csr(self, csr, params_str, ref_dataset):

/mnt/sdb/env1/lib/python3.6/site-packages/lightgbm/basic.py in _safe_call(ret)
     46     """
     47     if ret != 0:
---> 48         raise LightGBMError(_LIB.LGBM_GetLastError())
     49 
     50 

LightGBMError: b'Cannot construct Dataset since there are not useful features.                 It should be at least two unique rows.                 If the num_row (num_data) is small, you can set min_data=1 and min_data_in_bin=1 to fix this.                 Otherwise please make sure you are using the right dataset.'