Formula spec fails when more than one function call
FabricatiDiem opened this issue · comments
I'm trying out a basic polynomial regression with Bambi, but I keep hitting the following error in Formulae when specifying multiple higher-order terms:
TypeError Traceback (most recent call last)
Cell In[68], line 10
4 test_data_df = pd.DataFrame.from_dict({
5 'X': [-1, -0.97979798, -0.95959596, -0.93939394, -0.91919192],
6 'y': [-6.2, -5.93976743, -5.68822415, -5.44516237, -5.21037433]
7 })
9 # model = bmb.Model("y ~ X + np.power(X, 2)", test_data_df) -- works fine
---> 10 model = bmb.Model("y ~ X + np.power(X, 2) + np.power(X, 3)", test_data_df)
11 # model = bmb.Model("y ~ np.power(X, 2) + np.power(X, 3)", test_data_df) -- also fails for the same reason
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/bambi/models.py:142, in Model.__init__(self, formula, data, family, priors, link, categorical, potentials, dropna, auto_scale, noncentered, priors_cor)
140 na_action = "drop" if dropna else "error"
141 self.formula = formula
--> 142 self._design = design_matrices(formula, data, na_action, 1, extra_namespace)
144 if self._design.response is None:
145 raise ValueError(
146 "No outcome variable is set! "
147 "Please specify an outcome variable using the formula interface."
148 )
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/matrices.py:497, in design_matrices(formula, data, na_action, env, extra_namespace)
494 env = Environment.capture(env, reference=1)
495 env = env.with_outer_namespace(extra_namespace)
--> 497 description = model_description(formula)
499 # Incomplete rows are calculated using columns involved in model formula only
500 cols_to_select = description.var_names.intersection(set(data.columns))
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/model_description.py:22, in model_description(formula)
6 def model_description(formula):
7 """Interpret model formula and obtain a model description.
8
9 This function receives a string with a formula describing a statistical
(...)
20 An object of class ModelTerms with an internal description of the model.
21 """
---> 22 return Resolver(Parser(Scanner(formula).scan()).parse()).resolve()
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/resolver.py:16, in Resolver.resolve(self)
15 def resolve(self):
---> 16 return self.expr.accept(self)
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/expr.py:72, in Binary.accept(self, visitor)
71 def accept(self, visitor):
---> 72 return visitor.visitBinaryExpr(self)
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/resolver.py:24, in Resolver.visitBinaryExpr(self, expr)
22 otype = expr.operator.kind
23 if otype == "TILDE":
---> 24 return Response(expr.left.accept(self)) + expr.right.accept(self)
25 if otype == "PLUS":
26 return expr.left.accept(self) + expr.right.accept(self)
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/expr.py:72, in Binary.accept(self, visitor)
71 def accept(self, visitor):
---> 72 return visitor.visitBinaryExpr(self)
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/resolver.py:26, in Resolver.visitBinaryExpr(self, expr)
24 return Response(expr.left.accept(self)) + expr.right.accept(self)
25 if otype == "PLUS":
---> 26 return expr.left.accept(self) + expr.right.accept(self)
27 elif otype == "MINUS":
28 return expr.left.accept(self) - expr.right.accept(self)
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/terms/terms.py:865, in Model.__add__(self, other)
863 return self - Intercept()
864 elif isinstance(other, (Term, GroupSpecificTerm, Intercept)):
--> 865 return self.add_term(other)
866 elif isinstance(other, type(self)):
867 for term in other.terms:
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/terms/terms.py:1096, in Model.add_term(self, term)
1094 return self
1095 elif isinstance(term, (Term, Intercept)):
-> 1096 if term not in self.common_terms:
1097 self.common_terms.append(term)
1098 return self
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/terms/terms.py:237, in Term.__eq__(self, other)
235 return False
236 else:
--> 237 return self.components == other.components
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/terms/call.py:51, in Call.__eq__(self, other)
49 if not isinstance(other, type(self)):
50 return False
---> 51 return self.call == other.call
File ~/opt/anaconda3/envs/bayesEducation/lib/python3.10/site-packages/formulae/terms/call_resolver.py:225, in LazyCall.__eq__(self, other)
222 def __eq__(self, other):
223 return (
224 self.callee == other.callee
--> 225 and set(self.args) == set(other.args)
226 and set(self.kwargs) == set(other.kwargs)
227 )
TypeError: unhashable type: 'LazyValue'
Here is a minimal example to reproduce:
import bambi as bmb
import pandas as pb
test_data_df = pd.DataFrame.from_dict({
'X': [-1, -0.97979798, -0.95959596, -0.93939394, -0.91919192],
'y': [-6.2, -5.93976743, -5.68822415, -5.44516237, -5.21037433]
})
# model = bmb.Model("y ~ X + np.power(X, 2)", test_data_df) -- works fine
model = bmb.Model("y ~ X + np.power(X, 2) + np.power(X, 3)", test_data_df)
# model = bmb.Model("y ~ np.power(X, 2) + np.power(X, 3)", test_data_df) -- also fails for the same reason
Using:
Python 3.10
PyMC 5.02
Bambi 0.9.3
Formulae 0.3.4
It seems like something like this used to work before, so I'm curious what I am doing wrong.
@FabricatiDiem thanks for opening the issue!
This is indeed a bug. The problem is not about using multiple functions. The problem is some internal machinery is not being able to compare two values, and that's because of the '2' and the '3' in the function calls.
If you generate custom functions and use them, only with variable names, it works
test_data_df = pd.DataFrame.from_dict({
'X': [-1, -0.97979798, -0.95959596, -0.93939394, -0.91919192],
'y': [-6.2, -5.93976743, -5.68822415, -5.44516237, -5.21037433],
})
def p2(x):
return np.power(x, 2)
def p3(x):
return np.power(x, 3)
bmb.Model("y ~ X + p2(X) + p3(X)", test_data_df)
Formula: y ~ X + p2(X) + p3(X)
Family: gaussian
Link: mu = identity
Observations: 5
Priors:
target = mu
Common-level effects
Intercept ~ Normal(mu: -5.6967, sigma: 34.3017)
X ~ Normal(mu: 0.0, sigma: 30.6203)
p2(X) ~ Normal(mu: 0.0, sigma: 15.9535)
p3(X) ~ Normal(mu: 0.0, sigma: 11.0754)
Auxiliary parameters
y_sigma ~ HalfStudentT(nu: 4.0, sigma: 0.3499)
So this is just a workaround to make it work now. I'll try to fix the bug.
Thank you!
@FabricatiDiem it's fixed if you install from the master branch now :)