tinkoff-ai / etna

ETNA – Time-Series Library

Home Page:https://etna.tinkoff.ru

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

[BUG] Error on using `MRMRFeatureSelectionTransform` with `fast_redundancy=False`

Mr-Geekman opened this issue Β· comments

πŸ› Bug Report

During running some code MRMRFeatureSelectionTransform with fast_redundancy=False with error:

ValueError: shape mismatch: value array of shape (115,0) could not be broadcast to indexing result of shape (115,)

Expected behavior

There is not error.

How To Reproduce

import pandas as pd


from etna.analysis import StatisticsRelevanceTable
from etna.transforms import MRMRFeatureSelectionTransform
from etna.pipeline import Pipeline
from etna.models import CatBoostPerSegmentModel
from etna.transforms import DateFlagsTransform
from etna.transforms import MeanTransform
from etna.transforms import LagTransform
from etna.transforms import TrendTransform
from etna.transforms import FourierTransform
from etna.transforms import HolidayTransform
from etna.metrics import SMAPE
from etna.datasets import TSDataset

HORIZON = 60


def main():
    df = pd.read_csv("examples/data/nordic_merch_sales.csv")
    df = TSDataset.to_dataset(df)
    ts = TSDataset(df, freq="D")

    transforms = [
        TrendTransform(in_column="target", out_column="trend"),
        LagTransform(in_column="target", lags=range(HORIZON, 100), out_column="target_lag"),
        DateFlagsTransform(
            day_number_in_month=True, day_number_in_week=False, is_weekend=False, out_column="datetime_flag"
        ),
        MeanTransform(in_column=f"target_lag_{HORIZON}", window=12, seasonality=7, out_column="mean_transform"),
        FourierTransform(period=365, order=30, out_column="fourier_year"),
        FourierTransform(period=30.4, order=10, out_column="fourier_month"),
        HolidayTransform(iso_code="SWE", out_column="SWE_holidays"),
        HolidayTransform(iso_code="NOR", out_column="NOR_holidays"),
        HolidayTransform(iso_code="FIN", out_column="FIN_holidays"),
    ]
    rt = StatisticsRelevanceTable()
    feature_selector_transform = MRMRFeatureSelectionTransform(
        top_k=20,
        relevance_table=rt,
        # fast_redundancy=True,
        fast_redundancy=False,
    )
    pipeline = Pipeline(
        model=CatBoostPerSegmentModel(), transforms=transforms + [feature_selector_transform], horizon=HORIZON
    )

    metrics_mrmr_feature_selector, forecast_mrmr_feature_selector, _ = pipeline.backtest(
        ts=ts, metrics=[SMAPE()], n_folds=1
    )

    smape_mrmr_feature_selector = metrics_mrmr_feature_selector["SMAPE"].mean()
    print(f"SMAPE with MRMRFeatureSelectionTransform: {smape_mrmr_feature_selector:.3f}")


if __name__ == "__main__":
    main()

Environment

No response

Additional context

No response

Checklist

  • Bug appears at the latest library version