10 Classification Diagnostics
10.1 Load Packages
library(naniar)
library(tidyverse)
library(haven)
library(glue)
library(MplusAutomation)
library(here)
library(janitor)
library(gt)
library(tidyLPA)
library(pisaUSA15)
library(cowplot)
library(filesstrings)
library(patchwork)
library(RcppAlgos)10.2 LCA
Prepare Data
df_bully <- read_csv(here("data", "crdc_lca_data.csv")) %>%
clean_names() %>%
dplyr::select(report_dis, report_race, report_sex, counselors_fte, psych_fte, law_fte) Continuing the LCA example (3) in this bookdown, use Mplus to calculate k-class confidence intervals (Note: Change the syntax to make your chosen k-class model):
classification <- mplusObject(
TITLE = "C3 LCA - Calculated k-Class 95% CI",
VARIABLE =
"categorical = report_dis-law_fte;
usevar = report_dis-law_fte;
classes = c(3);",
ANALYSIS =
"estimator = ml;
type = mixture;
starts = 0;
processors = 10;
optseed = 802779;
bootstrap = 1000;",
MODEL =
"
!CHANGE THIS SECTION TO YOUR CHOSEN k-CLASS MODEL
%OVERALL%
[C#1](c1);
[C#2](C2);
Model Constraint:
New(p1 p2 p3);
p1 = exp(c1)/(1+exp(c1)+exp(c2));
p2 = exp(c2)/(1+exp(c1)+exp(c2));
p3 = 1/(1+exp(c1)+exp(c2));",
OUTPUT = "cinterval(bcbootstrap)",
usevariables = colnames(df_bully),
rdata = df_bully)
classification_fit <- mplusModeler(classification,
dataout=here("mplus", "bully.dat"),
modelout=here("mplus", "class.inp") ,
check=TRUE, run = TRUE, hashfilename = FALSE)Note: Ensure that the classes did not shift during this step (i.g., Class 1 in the enumeration run is now Class 4). Evaluate output and compare the class counts and proportions for the latent classes. Using the OPTSEED function ensures replication of the best loglikelihood value run.
Read in the 3-class model:
# Read in the 3-class model and extract information needed
output_enum <- readModels(here("mplus", "class.out"))
# 95% k-Class and k-class 95% Confidence Intervals
k_ci <- output_enum$parameters$ci.unstandardized %>%
filter(paramHeader == "New.Additional.Parameters") %>%
unite(CI, c(low2.5,up2.5), sep=", ", remove = TRUE) %>%
mutate(CI = paste0("[", CI, "]")) %>%
rename(kclass=est) %>%
dplyr::select(kclass, CI)
# AvePPk = Average Latent Class Probabilities for Most Likely Latent Class Membership (Row) by Latent Class (Column)
avePPk <- tibble(avePPk = diag(output_enum$class_counts$avgProbs.mostLikely))
# mcaPk = modal class assignment proportion
mcaPk <- round(output_enum$class_counts$mostLikely,3) %>%
mutate(model = paste0("Class ", class)) %>%
add_column(avePPk, k_ci) %>%
rename(mcaPk = proportion) %>%
dplyr::select(model, kclass, CI, mcaPk, avePPk)
# OCCk = odds of correct classification
OCCk <- mcaPk %>%
mutate(OCCk = round((avePPk/(1-avePPk))/(kclass/(1-kclass)),3))
# Put everything together
class_data <- data.frame(OCCk)Now, use gt to make a nicely formatted table
class_table <- class_data %>%
gt() %>%
tab_header(
title = "Model Classification Diagnostics for the 3-Class Solution") %>%
cols_label(
model = md("*k*-Class"),
kclass = md("*k*-Class Proportions"),
CI = "95% CI",
mcaPk = html("McaP<sub>k</sub>"),
avePPk = md("AvePP<sub>k</sub>"),
OCCk = md("OCC<sub>k</sub>")
) %>%
tab_footnote(
footnote = html(
"<i>Note.</i> McaP<sub>k</sub> = Modal class assignment proportion; AvePP<sub>k</sub> = Average posterior class probabilities; OCC<sub>k</sub> = Odds of correct classification; "
)
) %>%
cols_align(align = "center") %>%
opt_align_table_header(align = "left") %>%
gt::tab_options(table.font.names = "Times New Roman")
class_table| Model Classification Diagnostics for the 3-Class Solution | |||||
| k-Class | k-Class Proportions | 95% CI | McaPk | AvePPk | OCCk |
|---|---|---|---|---|---|
| Class 1 | 0.249 | [0.166, 0.329] | 0.282 | 0.675 | 6.264 |
| Class 2 | 0.106 | [0.083, 0.136] | 0.095 | 0.904 | 79.420 |
| Class 3 | 0.644 | [0.561, 0.731] | 0.623 | 0.893 | 4.614 |
| Note. McaPk = Modal class assignment proportion; AvePPk = Average posterior class probabilities; OCCk = Odds of correct classification; | |||||
10.3 LPA
Prepare Data
pisa <- pisaUSA15[1:500,] %>%
dplyr::select(broad_interest, enjoyment, instrumental_mot, self_efficacy)Continuing the LPA example (7) in this bookdown, use Mplus to calculate k-class confidence intervals (Note: Change the syntax to make your chosen k-class model):
classification <- mplusObject(
TITLE = "LPA - Calculated k-Class 95% CI",
VARIABLE =
"usevar = broad_interest-self_efficacy;
classes = c1(4);",
ANALYSIS =
"estimator = ml;
type = mixture;
starts = 0;
processors = 10;
optseed = 468036; ! This seed is taken from chosen model output
bootstrap = 1000;",
MODEL =
"
! This is copied and pasted from the chosen model input
%c1#1%
broad_interest (vbroad_interest);
enjoyment (venjoyment);
instrumental_mot (vinstrumental_mot);
self_efficacy (vself_efficacy);
broad_interest WITH enjoyment (broad_interestWenjoyment);
broad_interest WITH instrumental_mot (broad_interestWinstrumental_mot);
broad_interest WITH self_efficacy (broad_interestWself_efficacy);
enjoyment WITH instrumental_mot (enjoymentWinstrumental_mot);
enjoyment WITH self_efficacy (enjoymentWself_efficacy);
instrumental_mot WITH self_efficacy (instrumental_motWself_efficacy);
%c1#2%
broad_interest (vbroad_interest);
enjoyment (venjoyment);
instrumental_mot (vinstrumental_mot);
self_efficacy (vself_efficacy);
broad_interest WITH enjoyment (broad_interestWenjoyment);
broad_interest WITH instrumental_mot (broad_interestWinstrumental_mot);
broad_interest WITH self_efficacy (broad_interestWself_efficacy);
enjoyment WITH instrumental_mot (enjoymentWinstrumental_mot);
enjoyment WITH self_efficacy (enjoymentWself_efficacy);
instrumental_mot WITH self_efficacy (instrumental_motWself_efficacy);
%c1#3%
broad_interest (vbroad_interest);
enjoyment (venjoyment);
instrumental_mot (vinstrumental_mot);
self_efficacy (vself_efficacy);
broad_interest WITH enjoyment (broad_interestWenjoyment);
broad_interest WITH instrumental_mot (broad_interestWinstrumental_mot);
broad_interest WITH self_efficacy (broad_interestWself_efficacy);
enjoyment WITH instrumental_mot (enjoymentWinstrumental_mot);
enjoyment WITH self_efficacy (enjoymentWself_efficacy);
instrumental_mot WITH self_efficacy (instrumental_motWself_efficacy);
%c1#4%
broad_interest (vbroad_interest);
enjoyment (venjoyment);
instrumental_mot (vinstrumental_mot);
self_efficacy (vself_efficacy);
broad_interest WITH enjoyment (broad_interestWenjoyment);
broad_interest WITH instrumental_mot (broad_interestWinstrumental_mot);
broad_interest WITH self_efficacy (broad_interestWself_efficacy);
enjoyment WITH instrumental_mot (enjoymentWinstrumental_mot);
enjoyment WITH self_efficacy (enjoymentWself_efficacy);
instrumental_mot WITH self_efficacy (instrumental_motWself_efficacy);
!CHANGE THIS SECTION TO YOUR CHOSEN k-CLASS MODEL
%OVERALL%
[C1#1](c1);
[C1#2](c2);
[C1#3](c3);
Model Constraint:
New(p1 p2 p3 p4);
p1 = exp(c1)/(1+exp(c1)+exp(c2)+exp(c3));
p2 = exp(c2)/(1+exp(c1)+exp(c2)+exp(c3));
p3 = exp(c3)/(1+exp(c1)+exp(c2)+exp(c3));
p4 = 1/(1+exp(c1)+exp(c2)+exp(c3));",
OUTPUT = "cinterval(bcbootstrap)",
usevariables = colnames(pisa),
rdata = pisa)
classification_fit <- mplusModeler(classification,
dataout=here("lpa", "mplus", "class.dat"),
modelout=here("lpa", "mplus", "class.inp") ,
check=TRUE, run = TRUE, hashfilename = FALSE)Create table using diagnostics_table function
source(here("functions", "diagnostics_table.R"))
class_output <- readModels(here("mplus", "class.out"))
diagnostics_table(class_output)| Model Classification Diagnostics | |||||
| k-Class | k-Class Proportions | 95% CI | mcaPk | AvePPk | OCCk |
|---|---|---|---|---|---|
| Class 1 | 0.249 | [0.166, 0.329] | 0.282 | 0.675 | 6.264 |
| Class 2 | 0.106 | [0.083, 0.136] | 0.095 | 0.904 | 79.420 |
| Class 3 | 0.644 | [0.561, 0.731] | 0.623 | 0.893 | 4.614 |
| Note. McaPk = Modal class assignment proportion; AvePPk = Average posterior class probabilities; OCCk = Odds of correct classification; | |||||