atorus-research / Tplyr

Home Page:https://atorus-research.github.io/Tplyr/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Distinct Percentages not working for total groups

mstackhouse opened this issue · comments

Prerequisites

For more information, see the CONTRIBUTING guide.

Description

It looks like additional treatment groups aren't merging properly for denoms on the current devel branch

Steps to Reproduce (Bug Report Only)

library(Tplyr)

adae <- haven::read_xpt(url("https://github.com/phuse-org/TestDataFactory/raw/main/Updated/TDF_ADaM/adae.xpt"))

t <- tplyr_table(adae, TRTA) %>%
  add_total_group() %>%
  add_treat_grps(
    "Dosed" = c("Xanomeline High Dose", "Xanomeline Low Dose")
  ) %>% 
  add_layer(
    group_count("Any Body System") %>%
      set_distinct_by(USUBJID)
  ) 

t %>%
  build() %>% 
  dplyr::select(var1_Placebo, var1_Total, var1_Dosed)
#> # A tibble: 1 × 3
#>   var1_Placebo    var1_Total      var1_Dosed     
#>   <chr>           <chr>           <chr>          
#> 1 "  69 (100.0%)" " 225 (  Inf%)" " 156 (  Inf%)"

Created on 2022-07-19 by the reprex package (v0.3.0)

Expected behavior: [What you expected to happen]

Denoms shouldn't be inf - they should respect totals from the current treatment group's header N values:

header_n(t)

# A tibble: 5 × 2
  TRTA                     n
  <fct>                <int>
1 Dosed                  890
2 Placebo                301
3 Total                 1191
4 Xanomeline High Dose   455
5 Xanomeline Low Dose    435

Actual behavior: [What actually happened]

Infs are presented instead

Versions

You can get this information from executing sessionInfo().

Example

# Exercise 5 - creating ARDs

library(Tplyr)
library(haven)
library(dplyr)

# Read in datasets
adsl <- read_xpt("datasets/ADAM/adsl.xpt")
adae <- read_xpt("datasets/ADAM/adae.xpt")
adpft <- read_xpt("datasets/ADAM/adpft.xpt")


# A) ----------------------------------------------------------------------
# Add layers for ethnicity and baseline BMI to the demography table
demog <- adsl %>%
  tplyr_table(treat_var = TRT01A, where= SAFFL =="Y") %>%
  add_total_group() %>%
  add_layer(
    group_count(target_var = SEX, by = vars("Sex", "n (%)") )
  ) %>%
  add_layer(
    group_desc(target_var = AGE, by = "Age (years)")
  )  %>%
  add_layer(
    group_count(target_var = AGEGR1, by = vars("Age (years)", "n (%)") )
  ) %>%
  add_layer(
    group_count(target_var = RACE, by = vars("Race", "n (%)"))
  ) %>%
  add_layer(
    group_count(target_var = ETHNIC, by = vars("Ethnicity", "n (%)"))
  ) %>%
  #ANSWER
  add_layer(
    group_desc(target_var = HEIGHTBL, by = "Height (cm)")
  ) %>%
  add_layer(
    group_desc(target_var = WEIGHTBL, by = "Weight (kg)")
  ) %>%
  add_layer(
    group_desc(target_var = BMIBL, by = "BMI (kg/m^2)")
  ) %>%
  get_numeric_data()



# B) ----------------------------------------------------------------------
# Calculate the number and percentage of *unique* subjects with at least one AE
# by AEBODSYS, AETERM, and treatment (hint: you will need to use multiple target
# variables in `group_count`)

ae <- tplyr_table(adae, treat_var = TRT01A) %>%
  set_pop_data(adsl) %>%
  set_pop_treat_var(TRT01A) %>%
  #ANSWER
  add_layer(
    group_count(target_var = vars(AEBODSYS, AETERM)) %>%
      set_distinct_by(USUBJID)
  )   %>%build()

# C) ----------------------------------------------------------------------
# Calculate the number and percentage of *unique* subjects with any AE
# by adding an additional count layer to the code from 5B. Also add a total
# treatment group.
ae <- tplyr_table(adae, treat_var = TRT01A) %>%
  set_pop_data(adsl) %>%
  add_layer(
    group_count("Any Body System") %>%
      set_distinct_by(USUBJID)
  ) %>%
  #ANSWER
  add_total_group() %>%
  add_layer(
    group_count(target_var = vars(AEBODSYS, AETERM)) %>%
      set_distinct_by(USUBJID)
  )  %>%
  get_numeric_data()


# D) ----------------------------------------------------------------------
# For the safety population, calculate the following descriptive statistics for
# each pulmonary function tests at each visit and each treatment:
#     "n"
#     "Mean (SD)"
#     "Median"
#     "Q1, Q3"
#     "Min, Max"
#     "Missing"

pft <- adpft %>%
  tplyr_table(treat_var = TRTA, where= SAFFL =="Y") %>%
  add_total_group() %>%
  set_pop_data(adsl) %>%
  set_pop_treat_var(TRT01P) %>%
  add_layer(
    group_desc(target_var = AVAL, by=vars(PARAM, VISIT))
  ) %>%
  get_numeric_data()


# E) (bonus) ----------------------d------------------------------------------
# Get the same descriptive statistics as in (D) for the change from baseline of each
# pulmonary function test by visit and treatment

pft_chg <- adpft %>%
  filter(VISIT != "BASELINE") %>%
  tplyr_table(treat_var = TRTA, where= SAFFL =="Y") %>%
  add_total_group() %>%
  add_layer(
    group_desc(target_var = CHG, by=vars(PARAM, VISIT))
  ) %>%
  get_numeric_data()


I think the issue might be this chunk:

Tplyr/R/denom.R

Line 165 in c728cc2

if (total_extract == "n") {