30 Moderation with a Latent Class Variable (Arch, Nylund-Gibson, Ing, 2026)

Citation: Arch, Dina Ali Naji, Karen Nylund-Gibson, and Marsha Ing. “Moderation with a Latent Class Variable: A Tutorial and Example.” Behavior Research Methods 58, no. 4 (2026): 108. https://doi.org/10.3758/s13428-025-02886-x.


This appendix walks through the R code to apply moderation with a latent class variable using the MplusAutomation package.


Packages



Table 30.1: Table 30.2: Longitudinal Study of American Life
Name Description
LCA Indicator Variables
KA47A I Enjoy Science
KA47H Science is Useful in Everyday Problems
KA47I Science Helps Logical Thinking
KA47K Need Science for a Good Job
KA47L Will Use Science Often as an Adult
Predictor
ISCIIRT Science IRT Score (11th Grade)
Distal Outcome
KA9B Space Exploration
KA9D Science Issues
KA9G New Technologies
KA9K Energy Policy Issues
Covariates
URM Under-represented Minority (0 = represented, 1 = under-represented)
FEMALE Sex (0 = male, 1 = female)
MOTHED

Read in LSAL dataset

data <- read_csv(here("34-lca-moderation", "data", "LSAL_data.csv"))

30.1 Descriptive Statistics

30.1.1 Descriptive Statistics using R:

Quick view of all the variables in the dataset (excluding CASENUM, COHORT and SCHOOLID`):

data %>% 
  select(-CASENUM, -COHORT, -SCHOOLID) %>% 
  describe()

Proportion of indicators using R:

# Set up data to find proportions of binary indicators
ds <- data %>% 
  pivot_longer(KA47A:KA47L, names_to = "Variable") 

# Create table of variables and counts
tab <- table(ds$Variable, ds$value)

# Find proportions and round to 3 decimal places
prop <- prop.table(tab, margin = 1) %>% 
  round(3)

# Combine everything to one table 
dframe <- data.frame(Variables=rownames(tab), Proportion=prop[,2], Count=tab[,2])
#remove row names
row.names(dframe) <- NULL
# Format table using `kable()`
dframe %>%
  kable(caption = "Descriptive Summary",booktabs = TRUE, escape = FALSE) %>% 
  kableExtra::kable_styling(latex_options=c("HOLD_position")) 
# Format table using `markdown`
gt(dframe) %>% 
tab_header(title = md("**LCA Indicator Proportions**"), subtitle = md("&nbsp;")) %>%
tab_options(column_labels.font.weight = "bold", row_group.font.weight = "bold") 
LCA Indicator Proportions
 
Variables Proportion Count
KA47A 0.534 1793
KA47H 0.450 1502
KA47I 0.548 1825
KA47K 0.341 1139
KA47L 0.403 1352

30.1.2 Descriptive Statistics using MplusAutomation:

basic_mplus  <- mplusObject(
  TITLE = "LSAL Descriptive Statistics;",
  
  VARIABLE =
    "usevar = FEMALE MOTHED URM ISCIIRT KA9B KA9D KA9G KA9K
    KA47A KA47H KA47I KA47K KA47L;
    categorical = KA47A KA47H KA47I KA47K KA47L FEMALE MOTHED URM;",
  
  ANALYSIS = "TYPE=basic;",
  
  OUTPUT = "sampstat",  
  
  usevariables = colnames(data),
  rdata = data)

basic_mplus_fit <- mplusModeler(basic_mplus, 
                            dataout = here("34-lca-moderation","three_step", "LSAL_data.dat"),
                            modelout = here("34-lca-moderation","three_step","zero.inp"),
                            check = TRUE, run = TRUE, hashfilename = FALSE)

View of descriptive statistics using get_sampstat():

get_sampstat(m.step0.fit)
summary(data)

Or, view the .out file:


30.2 Enumeration

This code uses the mplusObject function in the MplusAutomation package and saves all model runs in the mplus_enum folder.


lca_6  <- lapply(1:6, function(k) {
  lca_enum  <- mplusObject(
    
    TITLE = glue("{k}-Class"), 
    
    VARIABLE = glue(
      "categorical = KA47A KA47H KA47I KA47K KA47L; 
     usevar = KA47A KA47H KA47I KA47K KA47L;
     classes = c({k});"),
    
    ANALYSIS = 
      "estimator = mlr;
      type = mixture;
    starts = 500 100;",
    
    OUTPUT = "sampstat residual tech11 tech14;",
    
    usevariables = colnames(data),
    
    rdata = data)
  
  lca_enum_fit <- mplusModeler(lca_enum,
                               
                               dataout=glue(here("34-lca-moderation","mplus_enum", "LSAL_data.dat")),
                               modelout=glue(here("34-lca-moderation","mplus_enum", "c{k}_lsal.inp")),
                               check=TRUE, run = TRUE, hashfilename = FALSE)
})

IMPORTANT: Before moving forward, make sure to open each output document to ensure models were estimated normally. In this example, the last two models (5- and 6-class models) did not produce reliable output and are excluded.


30.2.1 Table of Fit

First, extract data:

output_enum <- readModels(here("34-lca-moderation","mplus_enum")) 

enum_extract <- LatexSummaryTable(
  output_enum,
  keepCols = c(
    "Title",
    "Parameters",
    "LL",
    "BIC",
    "aBIC",
    "BLRT_PValue",
    "T11_VLMR_PValue",
    "Observations"
  ),
  sortBy = "Title"
) 


allFit <- enum_extract %>%
  mutate(CAIC = -2* LL + Parameters * (log(Observations)+1)) %>%
  mutate(AWE = -2*LL+2*Parameters*(log(Observations)+1.5)) %>%
  mutate(SIC = -.5*BIC) %>%
  mutate(expSIC = exp(SIC - max(SIC))) %>%
  mutate(BF = exp(SIC - lead(SIC))) %>%
  mutate(cmPk = expSIC / sum(expSIC)) %>%
  dplyr::select(1:5, 9:10, 6:7, 13, 14) %>%
  arrange(Parameters)

Then, create table using gt() instead of kable():

fit_table <- allFit %>%
  gt() %>%
  tab_header(title = md("Model Fit Summary Table")) %>%
  cols_label(
    Title = "Classes",
    Parameters = md("Par"),
    LL = md("*LL*"),
    T11_VLMR_PValue = "VLMR",
    BLRT_PValue = "BLRT",
    BF = md("BF"),
    cmPk = md("*cmPk*")
  ) %>%
  tab_footnote(
    footnote = md(
      "*Note.* Par = Parameters; *LL* = model log likelihood;
BIC = Bayesian information criterion;
aBIC = sample size adjusted BIC; CAIC = consistent Akaike information criterion;
AWE = approximate weight of evidence criterion;
BLRT = bootstrapped likelihood ratio test p-value;
VLMR = Vuong-Lo-Mendell-Rubin adjusted likelihood ratio test p-value;
*cmPk* = approximate correct model probability."
    ),
locations = cells_title()
  ) %>%
  fmt_number(c(3:7),
             decimals = 2) %>%
  sub_missing(1:11,
              missing_text = "--") %>%
  fmt(
    c(8:9, 11),
    fns = function(x)
      ifelse(x < 0.001, "<.001",
             scales::number(x, accuracy = .01))
  ) %>%
  fmt(
    10,
    fns = function (x)
      ifelse(x > 100, ">100",
             scales::number(x, accuracy = .01))
  ) %>%  
  tab_style(
    style = list(
      cell_text(weight = "bold")
      ),
    locations = list(cells_body(
     columns = BIC,
     row = BIC == min(BIC[c(1:nrow(allFit))]) 
    ),
    cells_body(
     columns = aBIC,
     row = aBIC == min(aBIC[1:nrow(allFit)])
    ),
    cells_body(
     columns = CAIC,
     row = CAIC == min(CAIC[1:nrow(allFit)])
    ),
    cells_body(
     columns = AWE,
     row = AWE == min(AWE[1:nrow(allFit)])
    ),
    cells_body(
     columns = cmPk,
     row =  cmPk == max(cmPk[1:nrow(allFit)])
     ),    
    cells_body(
     columns = BF,
     row =  BF > 10),
    cells_body( 
     columns =  T11_VLMR_PValue,
     row =  ifelse(T11_VLMR_PValue < .05 & lead(T11_VLMR_PValue) > .05, T11_VLMR_PValue < .05, NA)),
    cells_body(
     columns =  BLRT_PValue,
     row =  ifelse(BLRT_PValue < .05 & lead(BLRT_PValue) > .05, BLRT_PValue < .05, NA))
  )
)

fit_table
Model Fit Summary Table1
Classes Par LL BIC aBIC CAIC AWE BLRT VLMR BF cmPk
1-Class 5 −11,315.87 22,672.34 22,656.45 22,677.34 22,727.94 0.00 <.001
2-Class 11 −9,009.08 18,107.48 18,072.53 18,118.48 18,229.81 <.001 <.001 0.00 <.001
3-Class 17 −8,814.56 17,767.18 17,713.17 17,784.18 17,956.24 <.001 <.001 0.00 <.001
4-Class 23 −8,742.24 17,671.26 17,598.17 17,694.26 17,927.04 <.001 <.001 >100 1.00
5-Class 29 −8,734.82 17,705.15 17,613.01 17,734.15 18,027.66 <.001 0.01 >100 <.001
6-Class 35 −8,732.97 17,750.16 17,638.95 17,785.17 18,139.40 1.00 0.48 <.001
1 Note. Par = Parameters; LL = model log likelihood; BIC = Bayesian information criterion; aBIC = sample size adjusted BIC; CAIC = consistent Akaike information criterion; AWE = approximate weight of evidence criterion; BLRT = bootstrapped likelihood ratio test p-value; VLMR = Vuong-Lo-Mendell-Rubin adjusted likelihood ratio test p-value; cmPk = approximate correct model probability.

Save table:

gtsave(fit_table, here("34-lca-moderation","figures", "fit_table.png"))

30.2.2 Information Criteria Plot

allFit %>%
  dplyr::select(2:7) %>%
  rowid_to_column() %>%
  pivot_longer(`BIC`:`AWE`,
               names_to = "Index",
               values_to = "ic_value") %>%
  mutate(Index = factor(Index,
                        levels = c ("AWE", "CAIC", "BIC", "aBIC"))) %>%
  ggplot(aes(
    x = rowid,
    y = ic_value,
    color = Index,
    shape = Index,
    group = Index,
    lty = Index
  )) +
  geom_point(size = 2.0) + geom_line(size = .8) +
  scale_x_continuous(breaks = 1:nrow(allFit)) +
  scale_colour_grey(end = .5) +
  theme_cowplot() +
  labs(x = "Number of Classes", y = "Information Criteria Value", title = "Information Criteria") +
  theme(
    text = element_text(family = "serif", size = 12),
    legend.text = element_text(family="serif", size=12),
    legend.key.width = unit(3, "line"),
    legend.title = element_blank(),
    legend.position = "top"  
  )

Save figure:

ggsave(here("34-lca-moderation","figures", "info_criteria.png"), dpi = "retina", bg = "white",  height=5, width=7, units="in")

30.2.3 Compare Class Solutions

Compare probability plots for \(K = 1:6\) class solutions

model_results <- data.frame()

for (i in 1:length(output_enum)) {
  temp <- output_enum[[i]]$parameters$probability.scale %>%
    mutate(model = paste0(i, "-Class Model"))
  
  model_results <- rbind(model_results, temp)
}

compare_plot <- model_results %>%
  filter(category == 2) %>%
  mutate(est = as.numeric(est)) %>% 
  mutate(LatentClass = as.factor(LatentClass)) %>% 
  dplyr::select(est, model, LatentClass, param) 

compare_plot$param <- fct_inorder(compare_plot$param)

ggplot(
  compare_plot,
  aes(
    x = param,
    y = est,
    color = LatentClass,
    shape = LatentClass,
    group = LatentClass,
    lty = LatentClass
  )
) +
  geom_point() +
  geom_line() +
  scale_colour_viridis_d() +
  facet_wrap(~ model, ncol = 2) +
  labs(title = "Math Attitude Items", x = " ", y = "Probability") +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    axis.text.x = element_text(angle = -45, hjust = 0) 
  )                               

Save figure:

ggsave(here("34-lca-moderation","figures", "compare_kclass_plot.png"), dpi = "retina", bg = "white", height=5, width=7, units="in")

30.2.4 4-Class Probability Plot

Use the plot_lca function provided in the folder to plot the item probability plot. This function requires one argument: - model_name: The name of the Mplus readModels object (e.g., output_enum$c4_lsal.out)

source(here("34-lca-moderation","functions", "plot_lca.R"))

plot_lca(model_name = output_enum$c4_lsal.out)

Save figure:

ggsave(here("34-lca-moderation","figures", "probability_plot.png"), dpi="retina", height=5, width=7, units="in")

30.3 LCA Moderation - ML Three-Step


30.3.1 Step 1 - Estimate Unconditional Model w/ Auxiliary Specification



step1  <- mplusObject(
  TITLE = "Step 1 - Unconditional Model w/ Auxiliary Specification", 
  VARIABLE = "categorical = KA47A KA47H KA47I KA47K KA47L;
  usevar =  KA47A KA47H KA47I KA47K KA47L;
  classes = c(4);
  AUXILIARY = FEMALE MOTHED ISCIIRT KA9B KA9D KA9G KA9K URM;",
  
  ANALYSIS = 
   "estimator = mlr; 
    type = mixture;
    starts = 0;
    OPTSEED = 375590;",
  
  SAVEDATA = 
   "File=savedata.dat;
    Save=cprob;",
  
  OUTPUT = "sampstat residual tech11 tech14 svalues(1 3 4 2)", # `svalues` used here to relabel classes
  
  usevariables = colnames(data),
  rdata = data)

step1_fit <- mplusModeler(step1,
                            dataout=here("34-lca-moderation","three_step", "new.dat"),
                            modelout=here("34-lca-moderation","three_step", "one.inp") ,
                            check=TRUE, run = TRUE, hashfilename = FALSE)

Note: Ensure that the classes did not shift during this step (i.g., Class 1 in the enumeration run is now Class 4). Evaluate output and compare the class counts and proportions for the latent classes. Using the OPTSEED function ensures replication of the best loglikelihood value run.

Plot LCA

Latent Class Label
1 Pro-Science with Elevated Utility Value
2 Ambivalent with Minimal Utility Value
3 Ambivalent with Elevated Utility Value
4 Anti-Science with Minimal Utility Value
source(here("34-lca-moderation","functions", "plot_lca.R"))
output_one <- readModels(here("34-lca-moderation","three_step", "one.out"))

plot_lca(model_name = output_one)

Check that log-likelihood values are the same

output_one <- readModels(here("34-lca-moderation","three_step", "one.out"))
output_one$summaries$LL
#> [1] -8742.238

enumeration_c4 <- readModels(here("34-lca-moderation","mplus_enum", "c4_lsal.out"))
enumeration_c4$summaries$LL
#> [1] -8742.238

After selecting the latent class model, add class labels to item probability plot using the plot_lca_labels function. This function requires three arguments:

  • model_name: The Mplus readModels object (e.g., output_enum$c4_lsal.out)
  • item_labels: The item labels for x-axis (e.g.,c(“Enjoy”,“Useful”,“Logical”,“Job”,“Adult”))
  • class_labels: The class labels (e.g., c(“Pro-Science w/ Elevated Utility Value”, “Ambivalent w/ Minimal Utility Value”, “Ambivalent w/ Elevated Utility Value”, “Anti-Science w/ Minimal Utility Value”))

Note: Use \n to add a return if the label is lengthy.

source(here("34-lca-moderation","functions","plot_lca_labels.R"))

# Read in output from step 1.
output_one <- readModels(here("34-lca-moderation","three_step", "one.out"))

# Plot Title
title <- "LCA Probability Plot - LSAL"

#Identify item and class labels (Make sure they are in the order presented in the plot above)
item_labels <-  c(
  "I Enjoy \nScience",
  "Science is Useful \nin Everyday Problems",
  "Science Helps \nLogical Thinking",
  "Need Science for \na Good Job",
  "Will Use Science \nOften as an Adult"
)

class_labels <- c(
  
  "Pro-Science w/ \nElevated Utility Value",
  "Ambivalent w/ \nMinimal Utility Value",
  "Ambivalent w/ \nElevated Utility Value",
  "Anti-Science w/ \nMinimal Utility Value"
)

# Plot LCA plot
plot_lca_labels(model_name = output_one, item_labels, class_labels, title)

# Save
ggsave(here("34-lca-moderation","figures", "final_probability_plot_mod.png"), dpi = "retina", bg = "white", height=7, width=10, units="in")

30.3.2 Step 2 - Determine Measurement Error


Extract logits for the classification probabilities for the most likely latent class

logit_cprobs <- as.data.frame(output_one$class_counts$logitProbs.mostLikely)
logit_cprobs
#>        1      2       3 4
#> 1  8.959  6.319   5.092 0
#> 2 -0.610  2.237  -0.819 0
#> 3  4.497  4.352   6.199 0
#> 4 -8.219 -2.150 -13.705 0

Extract saved dataset from step one

savedata <- as.data.frame(output_one$savedata) %>% 
  rename(N = MLCC) #Rename the column in savedata named "MLCC" and change to "N"

Check variable names in savedata (Mplus will cut off variable names that are longer than 8 characters)

names(savedata)
#>  [1] "KA47A"   "KA47H"   "KA47I"   "KA47K"   "KA47L"  
#>  [6] "FEMALE"  "MOTHED"  "ISCIIRT" "KA9B"    "KA9D"   
#> [11] "KA9G"    "KA9K"    "URM"     "CPROB1"  "CPROB2" 
#> [16] "CPROB3"  "CPROB4"  "N"

30.3.3 Step 3 - Add Auxiliary Variables


To test for moderation, an overall test of equivalence of the regression of science issues on science ability across the latent classes was conducted using the omnibus Wald test. This is done in MplusAutomation using the MODELTEST command shown below. Mplus can run only one Wald test at a time. After evaluating the first Wald test (slopes), re-run step three for the second Wald test (intercepts). Pairwise comparisons can be tested simultaneously, but should be evaluated after significant Wald tests.

step3mod  <- mplusObject(
  TITLE = "Step 3 - LCA Moderation", 
  
  VARIABLE = 
  "USEVAR = FEMALE MOTHED ISCIIRT URM KA9B KA9D KA9G KA9K N;
  classes = c(4);
  nominal = N;",
  
  ANALYSIS = 
 "estimator = mlr; 
  type = mixture; 
  starts = 500 100;", 
  
  DEFINE = 
  "ISCIIRT = ISCIIRT/10;
   Center ISCIIRT (GRANDMEAN);",
     
  MODEL =
  glue("
!Covariates: URM FEMALE MOTHED ISCIIRT
!Distal: ISSUES

  %OVERALL%
  ISSUES by KA9B KA9D KA9G KA9K;
  ISSUES on FEMALE MOTHED URM;
  ISSUES on ISCIIRT;

         %C#1%
[N#1@{logit_cprobs[1,1]}];
[N#2@{logit_cprobs[1,2]}];
[N#3@{logit_cprobs[1,3]}];
      [ISSUES] (B01);        ! conditional distal mean
      ISSUES;                ! conditional distal variance (freely estimated)
      ISSUES on ISCIIRT(B11);! conditional slope (class 1)

        %C#2%
[N#1@{logit_cprobs[2,1]}];
[N#2@{logit_cprobs[2,2]}];
[N#3@{logit_cprobs[2,3]}];
      [ISSUES@0] (B02);
      ISSUES;
      ISSUES on ISCIIRT(B12);

        %C#3%
[N#1@{logit_cprobs[3,1]}];
[N#2@{logit_cprobs[3,2]}];
[N#3@{logit_cprobs[3,3]}];
      [ISSUES] (B03);
      ISSUES;
      ISSUES on ISCIIRT(B13);

        %C#4%
[N#1@{logit_cprobs[4,1]}];
[N#2@{logit_cprobs[4,2]}];
[N#3@{logit_cprobs[4,3]}];
      [ISSUES] (B04);
      ISSUES;
      ISSUES on ISCIIRT(B14);"),

MODELTEST = "
! can run only a single Omnibus test per model 
    ! Omnibus test 1 (Slope)
       B11=B12;
       B12=B13;
       B13=B14;
    ! Omnibus test 2 (Intercept)
       !B01=B03;
       !B03=B04;",
 
  MODELCONSTRAINT = 
   "NEW (slope12, slope13, slope14, slope23, slope24, slope34,
           int13, int14, int34);
           
      slope12=B11-B12; ! Test slope differences
      slope13=B11-B13;
      slope14=B11-B14;
      slope23=B12-B13;
      slope24=B12-B14;
      slope34=B13-B14;
      
      int13=B01-B03; ! Test intercept differences
      int14=B01-B04;
      int34=B03-B04;",
  
  usevariables = colnames(savedata), 
  rdata = savedata)

step3mod_fit <- mplusModeler(step3mod,
               dataout=here("34-lca-moderation","three_step", "Step3.dat"), 
               modelout=here("34-lca-moderation","three_step", "three_slope.inp"), 
               check=TRUE, run = TRUE, hashfilename = FALSE)

Rerun model with different model test:

# Update the model test by overwriting string
step3mod$MODELTEST <- "B01=B03; B03=B04"

# Then run it again
mplusModeler(step3mod,
               dataout=here("34-lca-moderation","three_step", "Step3.dat"), 
               modelout=here("34-lca-moderation","three_step", "three_intercept.inp"), 
               check=TRUE, run = TRUE, hashfilename = FALSE)

Compare Step 1 classes and Step 3 classes

output_one <- readModels(here("34-lca-moderation","three_step", "one.out"))
output_one$class_counts$modelEstimated
#>   class     count proportion
#> 1     1 1007.4747    0.29949
#> 2     2  879.5794    0.26147
#> 3     3  253.0090    0.07521
#> 4     4 1223.9368    0.36383


output_three <- readModels(here("34-lca-moderation","three_step", "three_slope.out"))
output_three$class_counts$modelEstimated
#>   class    count proportion
#> 1     1 824.8687    0.31836
#> 2     2 683.8538    0.26393
#> 3     3 193.1539    0.07455
#> 4     4 889.1236    0.34316

NOTE: If there are notable differences between class formation in the Step 1 and the Step 3 models, it means there are one of more covariates included in the Step 3 model that may be sources of unaccounted-for DIF


30.3.3.1 Wald Test Table

This is testing if there is a relation between the latent class variable and the distal outcome.

Note: There are two outputs, each containing separate Wald tests (one for slope and one for intercept). However, other than the Wald test, the outputs are identical. Either can be used for subsequent code.

# Make a Wald table function
wald_table <- function(mplus_model, table_title) {
  
  # Read the model
  model_output <- mplus_model
  
  # Extract information as data frame
  wald <- as.data.frame(model_output[["summaries"]]) %>%
    dplyr::select(WaldChiSq_Value:WaldChiSq_PValue) %>% 
    mutate(WaldChiSq_DF = paste0("(", WaldChiSq_DF, ")")) %>% 
    unite(wald_test, WaldChiSq_Value, WaldChiSq_DF, sep = " ") %>% 
    rename(pval = WaldChiSq_PValue) %>% 
    mutate(pval = ifelse(pval<0.001, paste0("<.001*"),
                       ifelse(pval<0.05, paste0(scales::number(pval, accuracy = .001), "*"),
                              scales::number(pval, accuracy = .001))))
  
  # Create the gt table
  wald %>% 
  gt() %>%
    tab_header(
    title = table_title) %>%
    cols_label(
      wald_test = md("Wald Test (*df*)"), 
      pval = md("*p*-value")) %>% 
  cols_align(align = "center") %>% 
  opt_align_table_header(align = "left") %>% 
  gt::tab_options(table.font.names = "serif")
}

Use wald_table function

output_three_slope <- readModels(here("34-lca-moderation","three_step", "three_slope.out"))
output_three_intercept <- readModels(here("34-lca-moderation","three_step", "three_intercept.out"))

wald_table(output_three_slope, "Wald Test Distal Means (Slope)")
Wald Test Distal Means (Slope)
Wald Test (df) p-value
11.003 (3) 0.012*
wald_table(output_three_intercept, "Wald Test Distal Means (Math IRT Scores)")
Wald Test Distal Means (Math IRT Scores)
Wald Test (df) p-value
205.616 (2) <.001*

30.3.3.2 Table of Slope and Intercept Values Across Classes

modelParams <- readModels(here("34-lca-moderation","three_step", "three_intercept.out"))

# Extract information as data frame
values <- as.data.frame(modelParams[["parameters"]][["unstandardized"]]) %>%
  filter(param %in% c("ISSUES", "ISCIIRT"),
         paramHeader != "Residual.Variances") %>% 
  mutate(param = str_replace(param, 
                                   pattern = "ISCIIRT", replacement = "Slope"),
         param = str_replace(param, 
                                   pattern = "ISSUES", replacement = "Intercept")) %>% 
  mutate(LatentClass = sub("^","Class ", LatentClass)) %>%  
  dplyr::select(!paramHeader) %>% 
  mutate(se = paste0("(", format(round(se,2), nsmall =2), ")")) %>% 
    unite(estimate, est, se, sep = " ") %>% 
  dplyr::select(-est_se) %>% 
  mutate(pval = ifelse(pval<0.001, paste0("<.001*"),
                       ifelse(pval<0.05, paste0(scales::number(pval, accuracy = .001), "*"),
                              scales::number(pval, accuracy = .001))))


# Create table

values %>% 
  gt(groupname_col = "LatentClass", rowname_col = "param") %>%
  tab_header(
    title = "Slope and Intercept Values Across Science Attitudes Classes") %>%
  cols_label(
    estimate = md("Estimate (*se*)"),
    pval = md("*p*-value")) %>% 
  sub_values(values = "999.000", replacement = "-") %>% 
  sub_missing(1:3,
              missing_text = "") %>%
  cols_align(align = "center") %>% 
  opt_align_table_header(align = "left") %>% 
  gt::tab_options(table.font.names = "serif")
Slope and Intercept Values Across Science Attitudes Classes
Estimate (se) p-value
Class 1
Slope 0.149 (0.01) <.001*
Intercept 0.246 (0.03) <.001*
Class 2
Slope 0.152 (0.02) <.001*
Intercept 0 (0.00) -
Class 3
Slope 0.117 (0.03) <.001*
Intercept 0.059 (0.05) 0.269
Class 4
Slope 0.077 (0.02) <.001*
Intercept -0.137 (0.03) <.001*

30.3.3.3 Table of Distal Outcome Differences

modelParams <- readModels(here("34-lca-moderation","three_step", "three_intercept.out"))

# Extract information as data frame
diff1 <- as.data.frame(modelParams[["parameters"]][["unstandardized"]]) %>%
  filter(grepl("INT", param)) %>% 
  dplyr::select(param:pval) %>% 
  mutate(se = paste0("(", format(round(se,2), nsmall =2), ")")) %>% 
  unite(estimate, est, se, sep = " ") %>% 
  mutate(param = str_remove(param, "INT"),
         param = as.numeric(param)) %>% 
  separate(param, into = paste0("Group", 1:2), sep = 1) %>% 
  mutate(class = paste0("Class ", Group1, " vs ", Group2)) %>% 
  dplyr::select(class, estimate, pval) %>% 
  mutate(pval = ifelse(pval<0.001, paste0("<.001*"),
                       ifelse(pval<0.05, paste0(scales::number(pval, accuracy = .001), "*"),
                              scales::number(pval, accuracy = .001))))


# Create table

diff1 %>% 
  gt() %>%
    tab_header(
    title = "Distal Outcome Differences") %>%
    cols_label(
      class = "Class",
      estimate = md("Mean Difference (*se*)"),
      pval = md("*p*-value")) %>% 
    sub_missing(1:3,
              missing_text = "") %>%
  cols_align(align = "center") %>% 
  opt_align_table_header(align = "left") %>% 
  gt::tab_options(table.font.names = "serif")
Distal Outcome Differences
Class Mean Difference (se) p-value
Class 1 vs 3 0.187 (0.05) <.001*
Class 1 vs 4 0.383 (0.03) <.001*
Class 3 vs 4 0.196 (0.05) <.001*

30.3.3.4 Table of Slope Differences

modelParams <- readModels(here("34-lca-moderation","three_step", "three_intercept.out"))

# Extract information as data frame
diff2 <- as.data.frame(modelParams[["parameters"]][["unstandardized"]]) %>%
  filter(grepl("SLOPE", param)) %>% 
  dplyr::select(param:pval) %>% 
  mutate(se = paste0("(", format(round(se,2), nsmall =2), ")")) %>% 
  unite(estimate, est, se, sep = " ") %>% 
  mutate(param = str_remove(param, "SLOPE"),
         param = as.numeric(param)) %>% 
  separate(param, into = paste0("Group", 1:2), sep = 1) %>% 
  mutate(class = paste0("Class ", Group1, " vs ", Group2)) %>% 
  dplyr::select(class, estimate, pval) %>% 
  mutate(pval = ifelse(pval<0.001, paste0("<.001*"),
                       ifelse(pval<0.05, paste0(scales::number(pval, accuracy = .001), "*"),
                              scales::number(pval, accuracy = .001))))


# Create table

diff2 %>% 
  gt() %>%
    tab_header(
    title = "Slope Differences") %>%
    cols_label(
      class = "Class",
      estimate = md("Mean (*se*)"),
      pval = md("*p*-value")) %>% 
    sub_missing(1:3,
              missing_text = "") %>%
  cols_align(align = "center") %>% 
  opt_align_table_header(align = "left") %>% 
  gt::tab_options(table.font.names = "serif")
Slope Differences
Class Mean (se) p-value
Class 1 vs 2 -0.003 (0.03) 0.924
Class 1 vs 3 0.032 (0.04) 0.388
Class 1 vs 4 0.072 (0.02) 0.001*
Class 2 vs 3 0.035 (0.04) 0.419
Class 2 vs 4 0.075 (0.03) 0.019*
Class 3 vs 4 0.04 (0.04) 0.285

30.3.3.5 Table of Covariates

modelParams <- readModels(here("34-lca-moderation","three_step", "three_intercept.out"))

# Extract information as data frame
cov <- as.data.frame(modelParams[["parameters"]][["unstandardized"]]) %>%
  filter(param %in% c("FEMALE", "MOTHED", "URM")) %>% 
  mutate(param = str_replace(param, "FEMALE", "Gender"),
         param = str_replace(param, "MOTHED", "Mother's Education"),
         param = str_replace(param, "URM", "Underrepresented Minority"))%>% 
  mutate(LatentClass = sub("^","Class ", LatentClass)) %>%  
  dplyr::select(!paramHeader) %>% 
  mutate(se = paste0("(", format(round(se,2), nsmall =2), ")")) %>% 
    unite(estimate, est, se, sep = " ") %>% 
  select(param, estimate, pval) %>% 
  distinct(param, .keep_all=TRUE)  %>% 
  mutate(pval = ifelse(pval<0.001, paste0("<.001*"),
                       ifelse(pval<0.05, paste0(scales::number(pval, accuracy = .001), "*"),
                              scales::number(pval, accuracy = .001))))


# Create table

cov %>% 
  gt(groupname_col = "LatentClass", rowname_col = "param") %>%
  tab_header(
    title = "Relations Between the Covariates and Distal Outcome") %>%
  cols_label(
    estimate = md("Estimate (*se*)"),
    pval = md("*p*-value")) %>% 
  sub_missing(1:3,
              missing_text = "") %>%
  sub_values(values = c(999.000), replacement = "-") %>% 
  cols_align(align = "center") %>% 
  opt_align_table_header(align = "left") %>% 
  gt::tab_options(table.font.names = "serif")
Relations Between the Covariates and Distal Outcome
Estimate (se) p-value
Gender -0.16 (0.02) <.001*
Mother's Education 0.004 (0.01) 0.677
Underrepresented Minority 0.043 (0.02) 0.072

30.3.3.6 Plot Distal Outcome

modelParams <- readModels(here("34-lca-moderation","three_step", "three_intercept.out"))

# Extract class size 
c_size <- as.data.frame(modelParams[["class_counts"]][["modelEstimated"]][["proportion"]]) %>%
  rename("cs" = 1) %>% 
  mutate(cs = round(cs*100, 2))

# Class labels
class_labels <- c(
  
  "Pro-Science w/ \nElevated Utility Value",
  "Ambivalent w/ \nMinimal Utility Value",
  "Ambivalent w/ \nElevated Utility Value",
  "Anti-Science w/ \nMinimal Utility Value"
)


# Keep this code if you want a generic label for the classes
#c_size_val <- paste0("C", 1:nrow(c_size), glue(" ({c_size[1:nrow(c_size),]}%)"))

# Otherwise use this:
c_size_val <- paste0(class_labels, glue(" ({c_size[1:nrow(c_size),]}%)"))

# Extract information as data frame
estimates <- as.data.frame(modelParams[["parameters"]][["unstandardized"]]) %>%
  filter(paramHeader == "Intercepts") %>%
  dplyr::select(param, est, se) %>% 
  filter(param == "ISSUES") %>% # Distal Outcome Name
  mutate(across(c(est, se), as.numeric)) %>% 
  mutate(LatentClass = c_size_val)

# Add labels (NOTE: You must change the labels to match the significance testing!!) 
#value_labels <- paste0(estimates$est, c("a"," bc"," abd"," cd"))

estimates$LatentClass <- fct_inorder(estimates$LatentClass)

# Plot bar graphs
estimates %>%
  ggplot(aes(x = LatentClass, y = est, fill = LatentClass)) +
  geom_col(position = "dodge", color = "black") +
  
  # Dynamic text labels
  geom_text(
    aes(
      label = paste0(round(est * 100, 2), "%"),
      vjust = ifelse(est >= 0, -0.5, 1.5) 
    ), 
    family = "serif", 
    size = 4,
    position = position_dodge(1)
  ) +  
  labs(
    y = "STEM Occupation", 
    x = "",
    title = "Distal Mean Comparison for STEM Occupation"
  ) +
  theme_cowplot() +
  theme(
    text = element_text(family = "serif"),
    legend.position = "none"
  )
# Save plot
ggsave(here("34-lca-moderation","figures","distal_plot.jpg"), dpi = "retina", bg = "white",  width=13, height = 7, units="in") 

30.3.3.7 Plot Slopes

modelParams <- readModels(here("34-lca-moderation","three_step", "three_intercept.out"))

# Minimum and Maximum Values
desc <- as.data.frame(modelParams$sampstat$univariate.sample.statistics) %>% 
  rownames_to_column("Variables")

# Class Sizes
c_size_val <- paste0(class_labels, glue(" ({c_size[1:nrow(c_size),]}%)"))

# Select min amd max values of covariate
xmin <- desc %>% 
  filter(Variables == "ISCIIRT") %>% 
  dplyr::select(Minimum) %>% 
  as.numeric()
xmax <- desc %>% 
  filter(Variables == "ISCIIRT") %>% 
  dplyr::select(Maximum) %>% 
  as.numeric()

# Add slope and intercept, Min and Max values 
line <- as.data.frame(modelParams$parameters$unstandardized) %>% 
  filter(str_detect(paramHeader, 'ON|Inter'),
         str_detect(param, 'ISCIIRT|ISSUES')) %>% # ISCIIRT is X and ISSUES is Y
  unite("param", paramHeader:param, remove = TRUE) %>% 
  mutate(param = replace(param,agrep(".ON",param),"slope"),
         param = replace(param,agrep("Inter", param), "intercept"),
         LatentClass = factor(LatentClass, labels = c_size_val)) %>% # Uses previous `c_size_val` object that contains the class labels
  dplyr::select(param, est, LatentClass) %>% 
  pivot_wider(names_from = param, values_from = est) %>%  
  add_column(x_max = xmax,
         x_min = xmin)


# Add column with y values
plot_data <- line %>% 
  mutate(y_min = (slope*xmin) + intercept,
         y_max = (slope*xmax) + intercept) %>% 
  dplyr::select(-slope, -intercept) %>% 
  pivot_longer(-LatentClass, 
               names_to = c("xvalues", "yvalues"), 
               names_sep="_" ) %>% 
  pivot_wider(names_from = xvalues, values_from = value) %>% 
  dplyr::select(-yvalues) 

# un-center and un-scale so values on x-axis are on the original scale
sampstat <- readModels(here("34-lca-moderation","three_step", "zero.out"))
desc <- as.data.frame(sampstat$sampstat$univariate.sample.statistics) %>% 
  rownames_to_column("Variables")
mean <- desc %>% 
  filter(Variables == "ISCIIRT") %>% 
  select(Mean) %>% 
  as.numeric()
  uncentered_data <- plot_data %>% mutate(x = x*10 + mean) 


# Plot 
uncentered_data %>%
  ggplot(aes(
    x = x,
    y = y,
    color = LatentClass,
    group = LatentClass
  )) +
  geom_point(size = 4) +
  geom_line(linewidth = 1) + 
  labs(
    x = "Interest in Science Issues",
    y = "Math Score"
  ) +
  scale_x_continuous(n.breaks = 10, limits = c(20, 100)) +
  theme_cowplot() +
  theme(
    text = element_text(family = "serif"),
    legend.position = "top",
    legend.title = element_blank()
  ) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))
# Save
ggsave(here("34-lca-moderation","figures","slope_plot.jpg"), dpi = "retina", bg = "white",  width=11, height = 7, units="in")