databricks / koalas

Koalas: pandas API on Apache Spark

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

DataFrame.pivot does not accept list as index parameter

crucis opened this issue · comments

The following example does not work in Databricks Runtime 8.4:

kdf = ks.DataFrame({"ui": ['C', 'D', 'D', 'C'],
                   "foo": ['one', 'one', 'two', 'two'],
                   "bar": ['A', 'A', 'B', 'C'],
                   "ar": [1, 2, 2, 2],
                   "baz": [1, 2, 3, 4]}, columns=['ui', 'foo', 'bar', 'baz', 'ar'])

kdf.pivot(index=['ui', 'foo'], columns='bar', values=['baz', 'ar'])
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<command-4107535394601473> in <module>
----> 1 df.pivot(index=['ui','foo'] , columns='bar', values=['baz', 'ar'])

/databricks/python/lib/python3.8/site-packages/databricks/koalas/usage_logging/__init__.py in wrapper(*args, **kwargs)
    193             start = time.perf_counter()
    194             try:
--> 195                 res = func(*args, **kwargs)
    196                 logger.log_success(
    197                     class_name, function_name, time.perf_counter() - start, signature

/databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in pivot(self, index, columns, values)
   6274             index = df._internal.column_labels[: self._internal.index_level]
   6275 
-> 6276         df = df.pivot_table(index=index, columns=columns, values=values, aggfunc="first")
   6277 
   6278         if should_use_existing_index:

/databricks/python/lib/python3.8/site-packages/databricks/koalas/usage_logging/__init__.py in wrapper(*args, **kwargs)
    188         if hasattr(_local, "logging") and _local.logging:
    189             # no need to log since this should be internal call.
--> 190             return func(*args, **kwargs)
    191         _local.logging = True
    192         try:

/databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in pivot_table(self, values, index, columns, aggfunc, fill_value)
   6048             index = [label if is_name_like_tuple(label) else (label,) for label in index]
   6049             sdf = (
-> 6050                 sdf.groupBy([self._internal.spark_column_name_for(label) for label in index])
   6051                 .pivot(pivot_col=self._internal.spark_column_name_for(columns))
   6052                 .agg(*agg_cols)

/databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in <listcomp>(.0)
   6048             index = [label if is_name_like_tuple(label) else (label,) for label in index]
   6049             sdf = (
-> 6050                 sdf.groupBy([self._internal.spark_column_name_for(label) for label in index])
   6051                 .pivot(pivot_col=self._internal.spark_column_name_for(columns))
   6052                 .agg(*agg_cols)

/databricks/python/lib/python3.8/site-packages/databricks/koalas/internal.py in spark_column_name_for(self, label_or_scol)
    813             scol = label_or_scol
    814         else:
--> 815             scol = self.spark_column_for(label_or_scol)
    816         return self.spark_frame.select(scol).columns[0]
    817 

/databricks/python/lib/python3.8/site-packages/databricks/koalas/internal.py in spark_column_for(self, label)
    803         """ Return Spark Column for the given column label. """
    804         column_labels_to_scol = dict(zip(self.column_labels, self.data_spark_columns))
--> 805         if label in column_labels_to_scol:
    806             return column_labels_to_scol[label]
    807         else:

TypeError: unhashable type: 'list'

I am using

kdf.pivot_table(index=['ui','foo'] , columns='bar', values=['baz', 'ar'], aggfunc='first')

to solve my problem, but I think that pivot should work with Multiindex.