strengejacke / sjPlot

sjPlot - Data Visualization for Statistics in Social Science

Home Page:https://strengejacke.github.io/sjPlot

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Error when using ggemmeans on random intercept model with a negative binomial distribution using gamlss

Twib-byte opened this issue · comments

Hello,

At the bottom you can find a reproducible example that shows under which conditions the error occurs. The data is fictional because I cannot share a sample of the real data, but the types of variables and nesting structure is similar.

I'm trying to use ggemmeans() to get the marginal means of one of my predictors but I keep getting this error:

Error: Variables of type 'logical' do not work, please coerce to factor and fit the model again.

I don't understand what this means because as far as I can tell the variables are integer or character variables.
The error also only occurs when I specifically use a model using the gamlss command in combination with a random intercept (m1).
Removing the random intercept makes the error go away (m2). Finally, the error also does not occur when I use glmer.nb on the same model (m3).

Any idea what could be causing this? I could simply switch to glmer.nb but I prefer not because my dataset is very large and gamlss is much faster.

library(gamlss)
library(ggeffects)
library(lme4)

y <- c(0,0,0,2,0,5,0,6,0,4,0,0,9,0,0,2,0,0,1,0,0,1,1,0,2,
          0,0,2,0,3,0,0,5,0,1,0,10,0,2,0,0,0,0,1,0,1,0,1,0,
          2,0,3,3,0,2,0,1,1,0,1,0,3,5,0,6,2,2,0,1,2,0)
x1 <- c(rep(200,6),rep(300,7),rep(500,6),rep(250,6),rep(50,6),
          rep(75,7),rep(60,11),rep(40,12),rep(60,10))
x2 <- c(1000,20000,300000,20000,600000,5000,1000,20000,300000,20000,
          600000,5000,9000,1000,20000,300000,20000,600000,9000,1000,20000,
          300000,20000,600000,5000,1000,20000,300000,20000,600000,5000,
          1000,20000,300000,20000,600000,5000,9000,1000,20000,300000,20000,
          600000,5000,9000,120000,5000,2000,9000,1000,20000,300000,20000,
          600000,5000,9000,120000,5000,2000,9000,7800,1000,20000,300000,
          20000,600000,5000,9000,120000,5000,2000)
x3 <- c(75000,200,80000,500,120000,1000,75000,200,80000,500,120000,1000,
          60000,75000,200,80000,500,120000,60000,75000,200,80000,500,120000,
          1000,75000,200,80000,500,120000,1000,75000,200,80000,500,120000,1000,
          60000,75000,200,80000,500,120000,1000,60000,50000,2000,4000,3000,75000,
          200,80000,500,120000,1000,60000,50000,2000,4000,3000,25000,75000,200,
          80000,500,120000,100,60000,50000,2000,4000)
x4 <- c(rep(1,6),rep(2,7),rep(3,6),rep(4,6),rep(5,6),rep(6,7),rep(7,11),rep(8,12),rep(9,10))
x5 <- c(rep("CAT1",25),rep("CAT2",24),rep("CAT3",22))
df <- data.frame(y,x1,x2,x3,x4,x5)

m1 <- gamlss(y~1+(1|x4)+log(x1)+log(x2)+log(x3)+x5,family=NBI,data=df)
ggemmeans(m1,terms="x5")

m2 <- gamlss(y~1+log(x1)+log(x2)+log(x3)+x5,family=NBI,data=df)
ggemmeans(m2,terms="x5")

m3 <- glmer.nb(y~1+(1|x4)+log(x1)+log(x2)+log(x3)+x5,data=df)
ggemmeans(m3,terms="x5")

(1|x4) is treated as logical variable:

library(gamlss)
#> Loading required package: splines
#> Loading required package: gamlss.data
#> 
#> Attaching package: 'gamlss.data'
#> The following object is masked from 'package:datasets':
#> 
#>     sleep
#> Loading required package: gamlss.dist
#> Loading required package: MASS
#> Loading required package: nlme
#> Loading required package: parallel
#>  **********   GAMLSS Version 5.4-3  **********
#> For more on GAMLSS look at https://www.gamlss.com/
#> Type gamlssNews() to see new features/changes/bug fixes.

y <- c(0,0,0,2,0,5,0,6,0,4,0,0,9,0,0,2,0,0,1,0,0,1,1,0,2,
       0,0,2,0,3,0,0,5,0,1,0,10,0,2,0,0,0,0,1,0,1,0,1,0,
       2,0,3,3,0,2,0,1,1,0,1,0,3,5,0,6,2,2,0,1,2,0)
x1 <- c(rep(200,6),rep(300,7),rep(500,6),rep(250,6),rep(50,6),
        rep(75,7),rep(60,11),rep(40,12),rep(60,10))
x2 <- c(1000,20000,300000,20000,600000,5000,1000,20000,300000,20000,
        600000,5000,9000,1000,20000,300000,20000,600000,9000,1000,20000,
        300000,20000,600000,5000,1000,20000,300000,20000,600000,5000,
        1000,20000,300000,20000,600000,5000,9000,1000,20000,300000,20000,
        600000,5000,9000,120000,5000,2000,9000,1000,20000,300000,20000,
        600000,5000,9000,120000,5000,2000,9000,7800,1000,20000,300000,
        20000,600000,5000,9000,120000,5000,2000)
x3 <- c(75000,200,80000,500,120000,1000,75000,200,80000,500,120000,1000,
        60000,75000,200,80000,500,120000,60000,75000,200,80000,500,120000,
        1000,75000,200,80000,500,120000,1000,75000,200,80000,500,120000,1000,
        60000,75000,200,80000,500,120000,1000,60000,50000,2000,4000,3000,75000,
        200,80000,500,120000,1000,60000,50000,2000,4000,3000,25000,75000,200,
        80000,500,120000,100,60000,50000,2000,4000)
x4 <- c(rep(1,6),rep(2,7),rep(3,6),rep(4,6),rep(5,6),rep(6,7),rep(7,11),rep(8,12),rep(9,10))
x5 <- c(rep("CAT1",25),rep("CAT2",24),rep("CAT3",22))
df <- data.frame(y,x1,x2,x3,x4,x5)

m1 <- gamlss(y~1+(1|x4)+log(x1)+log(x2)+log(x3)+x5,family=NBI,data=df)
#> GAMLSS-RS iteration 1: Global Deviance = 214.6302 
#> GAMLSS-RS iteration 2: Global Deviance = 214.6302
model.frame(m1) |> str()
#> 'data.frame':    71 obs. of  6 variables:
#>  $ y      : num  0 0 0 2 0 5 0 6 0 4 ...
#>  $ 1 | x4 : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
#>  $ log(x1): num  5.3 5.3 5.3 5.3 5.3 ...
#>  $ log(x2): num  6.91 9.9 12.61 9.9 13.3 ...
#>  $ log(x3): num  11.23 5.3 11.29 6.21 11.7 ...
#>  $ x5     : Factor w/ 3 levels "CAT1","CAT2",..: 1 1 1 1 1 1 1 1 1 1 ...

Created on 2022-06-13 by the reprex package (v2.0.1)

Thanks for the quick response!