Packages

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width=7.5,
  fig.path = "vigfig-"
)
    library(LKT)
    library(ggplot2)
    library(pROC)
    library(glmnet)
    library(crayon)
    library(dplyr)
    library(boot)
# precomputed as per https://ropensci.org/blog/2019/12/08/precompute-vignettes/

Load data (shows modifications to create needed columns)

set.seed(41)
    val<-largerawsample

    #clean it up
    val$KC..Default.<-val$Problem.Name
    # make it a data table
    val= setDT(val)

    #make unstratified folds for crossvaldiations
    val$fold<-sample(1:5,length(val$Anon.Student.Id),replace=T)

    # make student stratified folds (for crossvalidation for unseen sample)
    unq = sample(unique(val$Anon.Student.Id))
    sfold = rep(1:5,length.out=length(unq))
    val$fold = rep(0,length(val[,1]))
    for(i in 1:5){val$fold[which(val$Anon.Student.Id %in% unq[which(sfold==i)])]=i}

    # get the times of each trial in seconds from 1970
    val$CF..Time.<-as.numeric(as.POSIXct(as.character(val$Time),format="%Y-%m-%d %H:%M:%S"))

    #make sure it is ordered in the way the code expects
    val<-val[order(val$Anon.Student.Id, val$CF..Time.),]

    #create a binary response column to predict and extract only data with a valid value
    val$CF..ansbin.<-ifelse(tolower(val$Outcome)=="correct",1,ifelse(tolower(val$Outcome)=="incorrect",0,-1))
    val<-val[val$CF..ansbin.==0 | val$CF..ansbin.==1,]

    # create durations
    val$Duration..sec.<-(val$CF..End.Latency.+val$CF..Review.Latency.+500)/1000

    # this function needs times and durations but you don't need it if you don't want to model time effects
    val <- computeSpacingPredictors(val, "KC..Default.") #allows recency, spacing, forgetting features to run
    val <- computeSpacingPredictors(val, "KC..Cluster.") #allows recency, spacing, forgetting features to run
    val <- computeSpacingPredictors(val, "Anon.Student.Id") #allows recency, spacing, forgetting features to run
    val <- computeSpacingPredictors(val, "CF..Correct.Answer.") #allows recency, spacing, forgetting features to run


#save(val,file="..\\LKTCloze.RData")

Load MATHia (example how to load a remote dataset)

set.seed(41)
datafile<-"C:/Users/ppavl/Dropbox/Active projects/ds4845_tx_All_Data_6977_2021_0723_141809.txt" # CHANGE THIS VALUE TO THE DataShop export file IN YOUR R WORKING DIRECTORY
val2<-read.delim(colClasses = c("Anon.Student.Id"="character"),datafile,sep="\t", header=TRUE,quote="")
val2=as.data.table(val2)
val2$CF..Time.<-as.numeric(as.POSIXct(as.character(val2$Time),format="%Y-%m-%d %H:%M:%S"))

    #make sure it is ordered in the way the code expects
    val2<-val2[order(val2$Anon.Student.Id, val2$CF..Time.),]

    #create a binary response column to predict and extract only data with a valid value

    val2$Outcome<-ifelse(tolower(val2$Outcome)=="ok","CORRECT","INCORRECT")
    val2$CF..ansbin.<-ifelse(tolower(val2$Outcome)=="correct",1,0)
    val2<-val2[val2$CF..ansbin.==0 | val2$CF..ansbin.==1,]

#subtot<-  aggregate(val2$CF..ansbin.,by=list(val2$Anon.Student.Id),FUN=length)
 # subtot<- subtot[subtot$x<20,]
   # val2<-val2[!(val2$Anon.Student.Id %in% subtot$Group.1),]
    val2<-val2[val2$Attempt.At.Step==1,]
        val2<-val2[val2$KC..MATHia.!="",]
        # make student stratified folds (for crossvalidation for unseen population)
unq = sample(unique(val2$Anon.Student.Id))
sfold = rep(1:5,length.out=length(unq))
val2$fold = rep(0,length(val2[,1]))
for(i in 1:5){val2$fold[which(val2$Anon.Student.Id %in% unq[which(sfold==i)])]=i}

     val2 <- suppressWarnings(computeSpacingPredictors(val2, "KC..MATHia.")) #allows recency, spacing, forgetting features to run
    val2 <- suppressWarnings(computeSpacingPredictors(val2, "Problem.Name")) #allows recency, spacing, forgetting features to run
    val2 <- suppressWarnings(computeSpacingPredictors(val2, "Anon.Student.Id")) #allows recency, spacing, forgetting features to run



#save(val2,file="..\\MATHia.RData")

Load Assistments 2012 skillbuilder (example how to load a remote dataset)

set.seed(42)
# From Assistments https://sites.google.com/site/assistmentsdata/datasets/2012-13-school-data-with-affect dataset https://drive.google.com/file/d/1cU6Ft4R3hLqA7G1rIGArVfelSZvc6RxY/view
datafile<-"C:/Users/ppavl/Dropbox/Active projects/2012-2013-data-with-predictions-4-final.csv" # CHANGE THIS VALUE TO THE DataShop export file IN YOUR R WORKING DIRECTORY
val3<-fread(colClasses = c("user_id"="character"),datafile, header=TRUE)
val3$Anon.Student.Id<-val3$user_id

# Identify 5% of the unique users
selected_users <- sample(unique(val3$user_id), size = floor(.05 * length(unique(val3$user_id))))
# Filter out the selected users
val3 <- val3[val3$user_id %in% selected_users,]


val3<-val3[val3$skill!="",]

#Duplicate screening
val3 <-  val3 %>%  distinct(user_id,start_time,.keep_all=T)

val3$CF..Time.<-as.numeric(as.POSIXct(as.character(val3$start_time),format="%Y-%m-%d %H:%M:%S"))
val3<-val3[order(val3$Anon.Student.Id, val3$CF..Time.),]
val3$CF..ansbin.<-val3$correct
val3<-val3[val3$CF..ansbin.==0 | val3$CF..ansbin.==1,]
val3$Duration..sec.<-as.numeric(as.POSIXct(as.character(val3$end_time),format="%Y-%m-%d %H:%M:%S"))-as.numeric(as.POSIXct(as.character(val3$start_time),format="%Y-%m-%d %H:%M:%S")) #allows recency, spacing, forgetting features to run
val3$Outcome<-ifelse(val3$correct==1,"CORRECT","INCORRECT")

val3 <- val3 %>%
  group_by(user_id) %>%
  filter(n() >= 20) %>%
  ungroup()

# make student stratified folds (for crossvalidation for unseen population)
unq = sample(unique(val3$Anon.Student.Id))
sfold = rep(1:5,length.out=length(unq))
val3$fold = rep(0,length(val3[,1]))
for(i in 1:5){val3$fold[which(val3$Anon.Student.Id %in% unq[which(sfold==i)])]=i}


val3 <- computeSpacingPredictors(val3, "skill") #allows recency, spacing, forgetting features to run
val3 <- computeSpacingPredictors(val3, "Anon.Student.Id") #allows recency, spacing,

val3 <- computeSpacingPredictors(val3, "problem_type") #allows recency, spacing, forgetting features to run
val3 <- computeSpacingPredictors(val3, "type") #allows recency, spacing,forgetting features to runforgetting features to run
val3<-setDT(val3)


#save(val3,file="..\\Assistments2012rev.RData")

Additive Factors Model (AFM) fixed effect version

    modelob <- LKT(
      data = val, interc=FALSE,
      components = c("Anon.Student.Id","KC..Default.","KC..Default."),
      features = c("intercept", "intercept", "lineafm"))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> lineafm KC..Default.      
#> lineafmKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.280024 
#> LogLike logistic: -27347.20717315

Performance Factors Analysis (PFA) fixed effect version



  modelob <- LKT(data = val3, interc=FALSE,
      components = c("Anon.Student.Id", "skill", "skill", "skill"),
      features = c("intercept", "intercept", "linesuc$","linefail$"))
#> intercept Anon.Student.Id      
#> intercept skill      
#> linesuc$ skill      
#> linefail$ skill      
#> linefailskill:e$data$skill+linesucskill:e$data$skill+interceptskill+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.109656 
#> LogLike logistic: -67203.35361882

PFA using difficulty sensitive predictors (composite model requiring pred from prior model for estimation)

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default."),
      features = c("intercept", "intercept", "linesuc$","linefail$"))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> linesuc$ KC..Default.      
#> linefail$ KC..Default.      
#> linefailKC..Default.:e$data$KC..Default.+linesucKC..Default.:e$data$KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.295228 
#> LogLike logistic: -26769.69416527

    # have to have prior predictions in data to do the next model in and adaptive system
    #   this needs to be added to the data wth a first moodel like this
    val$pred<-modelob$prediction

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default."),
      features = c("intercept", "intercept", "diffcorComp","linefail"))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> diffcorComp KC..Default.      
#> linefail KC..Default.      
#> linefailKC..Default.+diffcorCompKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.283896 
#> LogLike logistic: -27200.10757759

Recent Performance Factors Analysis (RPFA)

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default."),
      features = c("intercept", "intercept", "propdec2","linefail"),
      seedpars=c(.9))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.9     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.303338 
#> LogLike logistic: -26461.64297989 
#> step par values =0.9
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.901     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.303331 
#> LogLike logistic: -26461.90817507 
#> step par values =0.901
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.899     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.303345 
#> LogLike logistic: -26461.37512859 
#> step par values =0.899
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 1e-05     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.303861 
#> LogLike logistic: -26441.79143124 
#> step par values =1e-05
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.00101     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.303889 
#> LogLike logistic: -26440.71586808 
#> step par values =0.00101
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 1e-05     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.303861 
#> LogLike logistic: -26441.79143124 
#> step par values =1e-05
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.721272126013422     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.305553 
#> LogLike logistic: -26377.5058647 
#> step par values =0.7212721
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.722272126013422     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.305537 
#> LogLike logistic: -26378.12033595 
#> step par values =0.7222721
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.720272126013422     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.305569 
#> LogLike logistic: -26376.89070318 
#> step par values =0.7202721
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.360641063006711     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309389 
#> LogLike logistic: -26231.79078822 
#> step par values =0.3606411
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.361641063006711     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309391 
#> LogLike logistic: -26231.74809 
#> step par values =0.3616411
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.359641063006711     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309388 
#> LogLike logistic: -26231.82871598 
#> step par values =0.3596411
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.431503524074648     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.30926 
#> LogLike logistic: -26236.70871037 
#> step par values =0.4315035
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.432503524074648     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309255 
#> LogLike logistic: -26236.89165611 
#> step par values =0.4325035
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.430503524074648     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309265 
#> LogLike logistic: -26236.53310439 
#> step par values =0.4305035
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.373666739924378     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309396 
#> LogLike logistic: -26231.53205027 
#> step par values =0.3736667
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.374666739924378     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309396 
#> LogLike logistic: -26231.53409763 
#> step par values =0.3746667
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.372666739924378     
#> linefail KC..Default.      
#> linefailKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309396 
#> LogLike logistic: -26231.52875846 
#> step par values =0.3726667

Recency tracing with logitdec

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default."),
      features = c("intercept", "intercept", "logitdec","recency"),
      fixedpars=c(.9,.5))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logitdec KC..Default. 0.9     
#> recency KC..Default. 0.5     
#> recencyKC..Default.+logitdecKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.329326 
#> LogLike logistic: -25474.53126101

Recency tracing with logitdec and transfer from cluster


system.time( modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default.","KC..Cluster."),
      features = c("intercept", "intercept", "logitdec","recency","logitdec"),
      fixedpars=c(.9,.5,.5)))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logitdec KC..Default. 0.9     
#> recency KC..Default. 0.5     
#> logitdec KC..Cluster. 0.5     
#> logitdecKC..Cluster.+recencyKC..Default.+logitdecKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.329987 
#> LogLike logistic: -25449.41953482
#>    user  system elapsed 
#>    0.28    0.00    1.70

Performance Prediction Equation (PPE)

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default."),
      features = c("intercept", "intercept", "ppe","logitdec"),
      fixedpars=c(0.3491901,0.2045801,1e-05,0.9734477,0.4443027))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> ppe KC..Default. 0.3491901 0.2045801 1e-05 0.9734477  
#> logitdec KC..Default. 0.4443027     
#> logitdecKC..Default.+ppeKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.349833 
#> LogLike logistic: -24695.58586712

base4 example

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default."),
      features = c("intercept", "intercept", "base4","logitdec"),
      fixedpars=c(0.1890747,0.6309054,0.05471752,.5,0.2160748))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> base4 KC..Default. 0.1890747 0.6309054 0.05471752 0.5  
#> logitdec KC..Default. 0.2160748     
#> logitdecKC..Default.+base4KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.316284 
#> LogLike logistic: -25969.92489408

Using other features #See LKT paper #See computefeatures function in the main R code for package https://github.com/Optimal-Learning-Lab/LKT/blob/master/R/LKTfunctions.R

Simple interactions

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id","KC..Default.","KC..Default."),
      features = c("logitdec", "logitdec", "lineafm"),fixedpars=c(.9,.8),
      interacts = c(NA,NA,"Level..Unitname."))
#> logitdec Anon.Student.Id 0.9     
#> logitdec KC..Default. 0.8     
#> lineafm KC..Default.      
#> lineafmKC..Default.:Level..Unitname.+logitdecKC..Default.+logitdecAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.187931 
#> LogLike logistic: -30845.19646772

Individualized Additive Factors Model (iAFM) fixed effect version

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id","KC..Default.","KC..Default.","KC..Default."),
      features = c("intercept", "intercept", "lineafm$","lineafm"),
      interacts = c(NA,NA,NA,"Anon.Student.Id"))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> lineafm$ KC..Default.      
#> lineafm KC..Default.      
#> lineafmKC..Default.:Anon.Student.Id+lineafmKC..Default.:e$data$KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.309291 
#> LogLike logistic: -26235.52237198

Connectors (another way to do interactions)

    modelob <- LKT(
      data = val, interc=TRUE,
      connectors = c("+","*"),
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default."),
      features = c("logitdec", "logitdec$", "lineafm$"),
      fixedpars = c(.9, .85) )
#> logitdec Anon.Student.Id 0.9     
#> logitdec$ KC..Default. 0.85     
#> lineafm$ KC..Default.      
#> lineafmKC..Default.:e$data$KC..Default.*logitdecKC..Default.:e$data$KC..Default.+logitdecAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.214132 
#> LogLike logistic: -29849.99089373

AutoKC


mnames<-c("IRT",
          "Faculty",
          "Log Full autoKC",
          "Log Simple PFA",
          "Log Full PFA",
          "Log Full PFA full autoKC additive",
          "Log Full PFA Faculty additive ",
          "Log Simple PFA Faculty interactive ",
          "Log Simple PFA full autoKC interactive",
          "Log Full PFA simple autoKC interactive",
          "Log Simple PFA simple autoKC interactive")
r2s<-data.frame(name=mnames,r2s=NA)
compl<-list(c("Anon.Student.Id","KC..Default."),
            c("Anon.Student.Id","KC..Default.", "Anon.Student.Id", "Anon.Student.Id"),
            c("Anon.Student.Id","KC..Default.", "KC..Default.", "KC..Default."),
            c("Anon.Student.Id","KC..Default.", "KC..Default.", "KC..Default."),
            c("Anon.Student.Id","KC..Default.", "KC..Default.", "KC..Default."),
            c("Anon.Student.Id","KC..Default.", "KC..Default.", "KC..Default." ,"KC..Default." ,"KC..Default."),
            c("Anon.Student.Id","KC..Default.", "KC..Default.","Anon.Student.Id", "KC..Default."  ,"Anon.Student.Id"),
            c("Anon.Student.Id","KC..Default.", "KC..Default.","Anon.Student.Id", "KC..Default."  ,"Anon.Student.Id"),
            c("Anon.Student.Id","KC..Default.", "KC..Default.", "KC..Default." ,"KC..Default." ,"KC..Default."),
            c("Anon.Student.Id","KC..Default.", "KC..Default.", "KC..Default." ,"KC..Default." ,"KC..Default."),
            c("Anon.Student.Id","KC..Default.", "KC..Default.", "KC..Default." ,"KC..Default." ,"KC..Default."))
featl<-list(c("intercept","intercept"),
            c("intercept","intercept",  "logfail",  "logsuc"),
            c("intercept","intercept",  "logfail$",  "logsuc$"),
            c("intercept","intercept",  "logfail", "logsuc"),
            c("intercept","intercept",  "logfail$", "logsuc$"),
            c("intercept","intercept",  "logfail$", "logfail$", "logsuc$", "logsuc$"),
            c("intercept","intercept",  "logfail$", "logfail", "logsuc$", "logsuc"),
            c("intercept","intercept",  "logfail", "logfail", "logsuc", "logsuc"),
            c("intercept","intercept",  "logfail", "logfail$", "logsuc", "logsuc$"),
            c("intercept","intercept",  "logfail$", "logfail", "logsuc$", "logsuc"),
            c("intercept","intercept",  "logfail", "logfail", "logsuc", "logsuc"))
connl<-list(c("+"),
            c("+","+","+"),
            c("+","+","+"),
            c("+","+","+"),
            c("+","+","+"),
            c("+","+","+","+","+"),
            c("+","+","+","+","+"),
            c("+","+","*","+","*"),
            c("+","+","*","+","*"),
            c("+","+","*","+","*"),
            c("+","+","*","+","*"))
autol <- list(c(0,0),
              c(0,0,0,0),
              c(0,0,40,40),
              c(0,0,0,0),
              c(0,0,0,0),
              c(0,0,0,40,0,40),
              c(0,0,0,0,0,0),
              c(0,0,0,0,0,0),
              c(0,0,0,40,0,40),
              c(0,0,0,40,0,40),
              c(0,0,0,40,0,40))
for(i in 1:length(compl)){
  modelob <<- LKT(data = val,components = compl[[i]],features = featl[[i]],connectors = connl[[i]],autoKC = autol[[i]],
                  verbose = TRUE)
  cat(" R2 =  ",modelob$r2,"\n")
  r2s$r2s[i]<-modelob$r2
}
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.17526 
#> LogLike logistic: -31326.49035196 
#>  R2 =   0.17526 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail Anon.Student.Id      
#> logsuc Anon.Student.Id      
#> logsucAnon.Student.Id+logfailAnon.Student.Id+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.193067 
#> LogLike logistic: -30650.10663326 
#>  R2 =   0.193067 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail$ AC3      
#> logsuc$ AC4      
#> logsucAC4:e$data$AC4+logfailAC3:e$data$AC3+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.276453 
#> LogLike logistic: -27482.82384024 
#>  R2 =   0.276453 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail KC..Default.      
#> logsuc KC..Default.      
#> logsucKC..Default.+logfailKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.305306 
#> LogLike logistic: -26386.87623529 
#>  R2 =   0.305306 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail$ KC..Default.      
#> logsuc$ KC..Default.      
#> logsucKC..Default.:e$data$KC..Default.+logfailKC..Default.:e$data$KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.317028 
#> LogLike logistic: -25941.64544463 
#>  R2 =   0.317028 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail$ KC..Default.      
#> logfail$ AC4      
#> logsuc$ KC..Default.      
#> logsuc$ AC6      
#> logsucAC6:e$data$AC6+logsucKC..Default.:e$data$KC..Default.+logfailAC4:e$data$AC4+logfailKC..Default.:e$data$KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.323463 
#> LogLike logistic: -25697.2472566 
#>  R2 =   0.323463 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail$ KC..Default.      
#> logfail Anon.Student.Id      
#> logsuc$ KC..Default.      
#> logsuc Anon.Student.Id      
#> logsucAnon.Student.Id+logsucKC..Default.:e$data$KC..Default.+logfailAnon.Student.Id+logfailKC..Default.:e$data$KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.318007 
#> LogLike logistic: -25904.46800482 
#>  R2 =   0.318007 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail KC..Default.      
#> logfail Anon.Student.Id      
#> logsuc KC..Default.      
#> logsuc Anon.Student.Id      
#> logsucAnon.Student.Id*logsucKC..Default.+logfailAnon.Student.Id*logfailKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.327939 
#> LogLike logistic: -25527.20709165 
#>  R2 =   0.327939 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail KC..Default.      
#> logfail$ AC4      
#> logsuc KC..Default.      
#> logsuc$ AC6      
#> logsucAC6:e$data$AC6*logsucKC..Default.+logfailAC4:e$data$AC4*logfailKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.333046 
#> LogLike logistic: -25333.24318844 
#>  R2 =   0.333046 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail$ KC..Default.      
#> logfail AC4      
#> logsuc$ KC..Default.      
#> logsuc AC6      
#> logsucAC6*logsucKC..Default.:e$data$KC..Default.+logfailAC4*logfailKC..Default.:e$data$KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.336243 
#> LogLike logistic: -25211.79208122 
#>  R2 =   0.336243 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> logfail KC..Default.      
#> logfail AC4      
#> logsuc KC..Default.      
#> logsuc AC6      
#> logsucAC6*logsucKC..Default.+logfailAC4*logfailKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.316213 
#> LogLike logistic: -25972.60067752 
#>  R2 =   0.316213


r2s$name <- factor(r2s$name,levels = rev(mnames))
plot<-ggplot(r2s,
             aes(name,r2s)) +
  geom_bar(stat = "identity") +xlab("Model Version") + ylab("R-squared Gain")+
  coord_flip()+ theme(text = element_text(size = 12))
plot

plot of chunk KC_search_method



mnames<-seq(2,71,10)
for (i in c(3,6)){
  r2s<-data.frame(name=mnames,r2s=NA,r2sr=NA)
  j<-1
  for(k in mnames){
    j<-j+1
    modelob <- LKT(data = val,components = compl[[i]],features = featl[[i]],connectors = connl[[i]],autoKC = k*(autol[[i]]>0),
                   verbose = FALSE)
    cat(" R2 =  ",modelob$r2,"\n")

    r2s$r2s[j-1]<-modelob$r2

        modelob <- LKT(data = val,components = compl[[i]],features = featl[[i]],connectors = connl[[i]],autoKC = k*(autol[[i]]>0),
                   verbose = FALSE, autoKCcont = rep("rand",length(featl[[i]])))
    cat(" R2 =  ",modelob$r2,"\n")

    r2s$r2sr[j-1]<-modelob$r2

  }

  r2s$name <- factor(r2s$name,levels = (mnames))
  plot<-ggplot(r2s, aes(name, group=1))+
    geom_line(aes(y = r2s)) +
  geom_line(aes(y = r2sr), linetype="twodash")+
    scale_x_discrete(breaks=seq(from = 2, to = 71, by = 5)) +xlab("autoKC Clusters") + ylab("McFadden's R-squared Gain")+ theme(text = element_text(size = 16)) +
      geom_point(aes(y = r2s))+
      geom_point(aes(y = r2sr))
  print(plot)
}
#>  R2 =   0.195021 
#>  R2 =   0.194694 
#>  R2 =   0.228315 
#>  R2 =   0.220281 
#>  R2 =   0.249023 
#>  R2 =   0.240697 
#>  R2 =   0.262488 
#>  R2 =   0.262813 
#>  R2 =   0.278989 
#>  R2 =   0.27456 
#>  R2 =   0.287932 
#>  R2 =   0.293118 
#>  R2 =   0.302358 
#>  R2 =   0.30531

plot of chunk KC_search_method

#>  R2 =   0.318751 
#>  R2 =   0.317605 
#>  R2 =   0.32162 
#>  R2 =   0.317793 
#>  R2 =   0.322841 
#>  R2 =   0.31803 
#>  R2 =   0.322601 
#>  R2 =   0.317869 
#>  R2 =   0.323598 
#>  R2 =   0.317767 
#>  R2 =   0.320678 
#>  R2 =   0.31757 
#>  R2 =   0.318027 
#>  R2 =   0.317205

plot of chunk KC_search_method

Synthetic discrimination parameter testing (experimental)

# discrimintion parameters normally control how well strongly ability affects correctness at an item level
# "synthetic" version here proposes something similar, that the student ability interacts with the item difficulty
# here a running estimate of student ability (logitdec) is multiplied by a value for the item to indicate the importance of student ability to answer that item
# consider the results, which suggest the first example is highly correlated with overall learning..., and the second example where it is slightly less than baseline value (.69 is the baseline for the influence of the student on all items)
# A model like this means that some items are discriminated better by higher or lower student ability (more or less senitive to ability)


mnames<-c("IRT",
          "IRT ad inter",
          "AFM",
          "IRT ad inter with AFM",
          "IRT ad")
r2s<-data.frame(name=mnames,r2s=NA)
compl<-list(c("Anon.Student.Id","KC..Default."),
            c("Anon.Student.Id","KC..Default."),
            c("Anon.Student.Id","KC..Default.","KC..Default."),
            c("Anon.Student.Id","KC..Default.","KC..Default."),
            c("Anon.Student.Id","KC..Default."))
featl<-list(c("intercept","intercept"),
            c("logitdec","intercept"),
            c("logitdec","intercept","lineafm"),
            c("logitdec","intercept","lineafm"),
            c("logitdec","intercept"))
connl<-list(c("+"),
            c("*"),
            c("+","+"),
            c("*","+"),
            c("+"))
for(i in 1:4){
  modelob <<- LKT(data = val,components = compl[[i]],features = featl[[i]],connectors = connl[[i]],fixedpars=c(.925),interc=TRUE,verbose = FALSE)
  cat("coefs",length(modelob$coefs))
  cat(" R2 =  ",modelob$r2,"\n")
  r2s$r2s[i]<-modelob$r2
}
#> coefs 549 R2 =   0.17526 
#> coefs 144 R2 =   0.170469 
#> coefs 74 R2 =   0.247155 
#> coefs 145 R2 =   0.249731

Credibility intervals


components = c( "KC..Default.", "KC..Default.", "KC..Default.")
features = c( "intercept", "linesuc$","linefail$")
fixedpars = NA

# or

#components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default.")
#features = c("logit", "logit", "linesuc","linefail")

mod1 = LKT(setDT(val),interc=TRUE,
           components=components,
           features=features,
           fixedpars = fixedpars,
           seedpars = c(NA))
#> intercept KC..Default.      
#> linesuc$ KC..Default.      
#> linefail$ KC..Default.      
#> linefailKC..Default.:e$data$KC..Default.+linesucKC..Default.:e$data$KC..Default.+interceptKC..Default.+1 
#> McFadden's R2 logistic: 0.21369 
#> LogLike logistic: -29866.78674638


n_students = 400
n_boot = 100
system.time({
  boot_res = LKT_HDI(val, n_boot, n_students, comps = components, feats = features, fixeds = fixedpars)
})
#> 0%...2%...3%...4%...5%...6%...7%...8%...9%...10%...11%...12%...13%...14%...15%...16%...17%...18%...19%...20%...21%...22%...23%...24%...25%...26%...27%...28%...29%...30%...31%...32%...33%...34%...35%...36%...37%...38%...39%...40%...41%...42%...43%...44%...45%...46%...47%...48%...49%...50%...51%...52%...53%...54%...55%...56%...57%...58%...59%...60%...61%...62%...63%...64%...65%...66%...67%...68%...69%...70%...71%...72%...73%...74%...75%...76%...77%...78%...79%...80%...81%...82%...83%...84%...85%...86%...87%...88%...89%...90%...91%...92%...93%...94%...95%...96%...97%...98%...99%...100%
#>    user  system elapsed 
#>   10.69    0.70   71.70



#Names of coefficients that are non-significant (interval includes zero)
zero_idx = which(boot_res$coef_hdi$includes_zero==TRUE)
boot_res$coef_hdi$coef_name[zero_idx]
#>  [1] "interceptKC..Default.A statistic describes a characteristic of a __________."                                                                                                                                                                                        
#>  [2] "interceptKC..Default.Standard deviation refers to how individual observations vary from the mean, while standard error refers to how __________ observations (i.e. samples) vary from the mean."                                                                     
#>  [3] "interceptKC..Default.The __________ for an observation is the squared difference from the mean."                                                                                                                                                                     
#>  [4] "interceptKC..Default.The __________ is a statistic that describes typical variability for a set of observations."                                                                                                                                                    
#>  [5] "interceptKC..Default.The normal distribution has __________ percent of its observations in the range between -1 and 1 standard deviations from the mean."                                                                                                            
#>  [6] "interceptKC..Default.The normal distribution has __________ percent of its observations in the range between -2 and 2 standard deviations from the mean."                                                                                                            
#>  [7] "interceptKC..Default.The sample mean, or average, is a __________ that represents what we expect observations in the sample to be centered around."                                                                                                                  
#>  [8] "interceptKC..Default.The sample mean, or average, is a statistic that __________ what we expect observations in the sample to be centered around."                                                                                                                   
#>  [9] "interceptKC..Default.The standard deviation is a __________ that describes typical variability for a set of observations."                                                                                                                                           
#> [10] "linefailKC..Default.:e$data$KC..Default.A normal distribution, the most common natural distribution, is symmetric, so for example, the __________ of observing a value in the range between -1 and 0 is equal to the __________ of the range between 0 and 1."       
#> [11] "linefailKC..Default.:e$data$KC..Default.A normal distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is __________ to the probability of the range between 0 and 1."
#> [12] "linefailKC..Default.:e$data$KC..Default.Although multiple __________ are variable they are intended to represent the population from which they come."                                                                                                               
#> [13] "linefailKC..Default.:e$data$KC..Default.Standard deviation refers to how __________ observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean."                                                    
#> [14] "linefailKC..Default.:e$data$KC..Default.The __________ distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean."                                                                                             
#> [15] "linefailKC..Default.:e$data$KC..Default.The __________ distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean."                                                                                             
#> [16] "linefailKC..Default.:e$data$KC..Default.The __________ distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean."                                                                                           
#> [17] "linefailKC..Default.:e$data$KC..Default.The __________ for an observation is the squared difference from the mean."                                                                                                                                                  
#> [18] "linefailKC..Default.:e$data$KC..Default.The density of a distribution between a __________ of values represents the probability that a new observation will fall within that __________ also."                                                                       
#> [19] "linefailKC..Default.:e$data$KC..Default.The density of a distribution between a range of values represents the probability that a new __________ will fall within that range also."                                                                                  
#> [20] "linefailKC..Default.:e$data$KC..Default.The normal distribution has 68 percent of its observations in the __________ between -1 and 1 standard deviations from the mean."                                                                                            
#> [21] "linefailKC..Default.:e$data$KC..Default.The normal distribution has 95 percent of its observations in the __________ between -2 and 2 standard deviations from the mean."                                                                                            
#> [22] "linefailKC..Default.:e$data$KC..Default.The variance for a __________ is computed by dividing the sum of the observation variances by 1 less than the total observations."                                                                                           
#> [23] "linefailKC..Default.:e$data$KC..Default.Variability refers to how __________ the observations are from each other and sometimes from the mean."                                                                                                                      
#> [24] "linefailKC..Default.:e$data$KC..Default.Variability refers to how different the observations are from each other and sometimes from the __________."                                                                                                                 
#> [25] "linefailKC..Default.:e$data$KC..Default.When a study __________ human subjects, the sample is the group of people who participated in the study."                                                                                                                    
#> [26] "linefailKC..Default.:e$data$KC..Default.When a study involves human subjects, the sample is the group of people who __________ in the study."


#10 lines of full report
print(boot_res$coef_hdi[1:20,])
#>                                                                                                                                                                                                                                            coef_name
#> 1                                                                                                                                                                                                                                        (Intercept)
#> 2                                                                                                                          interceptKC..Default.__________ refers to how different the observations are from each other and sometimes from the mean.
#> 3                                                              interceptKC..Default.__________ refers to how individual observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean.
#> 4                                                                                                                                                                          interceptKC..Default.A __________ describes a characteristic of a sample.
#> 5                                                                                                                                interceptKC..Default.A __________ describes the likelihood that observations will occur within any range of values.
#> 6   interceptKC..Default.A __________ distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is equal to the probability of the range between 0 and 1.
#> 7                                                                                                                              interceptKC..Default.A distribution describes the __________ that observations will occur within any range of values.
#> 8                                                                                                                                interceptKC..Default.A distribution describes the likelihood that __________ will occur within any range of values.
#> 9                                                                                                                         interceptKC..Default.A distribution describes the likelihood that observations will occur within any __________ of values.
#> 10     interceptKC..Default.A normal distribution, the most common natural distribution, is __________, so for example, the probability of observing a value in the range between -1 and 0 is equal to the probability of the range between 0 and 1.
#> 11        interceptKC..Default.A normal distribution, the most common natural distribution, is symmetric, so for example, the __________ of observing a value in the range between -1 and 0 is equal to the __________ of the range between 0 and 1.
#> 12 interceptKC..Default.A normal distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is __________ to the probability of the range between 0 and 1.
#> 13                                                                                                                                                                         interceptKC..Default.A statistic __________ a characteristic of a sample.
#> 14                                                                                                                                                                              interceptKC..Default.A statistic describes a __________ of a sample.
#> 15                                                                                                                                                                      interceptKC..Default.A statistic describes a characteristic of a __________.
#> 16                                                                                                                interceptKC..Default.Although multiple __________ are variable they are intended to represent the population from which they come.
#> 17                                                                                                                 interceptKC..Default.Although multiple samples are __________ they are intended to represent the population from which they come.
#> 18                                                                                                                  interceptKC..Default.Although multiple samples are variable they are intended to __________ the population from which they come.
#> 19                                                                                                                   interceptKC..Default.Although multiple samples are variable they are intended to represent the __________ from which they come.
#> 20                                                                                                                                 interceptKC..Default.Standard deviation is the __________ of the variance, also known as root mean squared error.
#>           lower       upper includes_zero credMass
#> 1  -0.776855037 -0.59171048         FALSE     0.95
#> 2  -0.719232010 -0.43196963         FALSE     0.95
#> 3   0.161850365  0.40390795         FALSE     0.95
#> 4   0.168099905  0.43184094         FALSE     0.95
#> 5  -1.413323014 -1.11748022         FALSE     0.95
#> 6   0.704606849  0.95048193         FALSE     0.95
#> 7   0.582447517  0.84438787         FALSE     0.95
#> 8  -0.774016704 -0.45712724         FALSE     0.95
#> 9   1.282567358  1.57571596         FALSE     0.95
#> 10 -1.004087198 -0.67439583         FALSE     0.95
#> 11 -0.514210871 -0.26269855         FALSE     0.95
#> 12  0.495067022  0.72139029         FALSE     0.95
#> 13  0.866342468  1.15006434         FALSE     0.95
#> 14 -0.468863915 -0.17434038         FALSE     0.95
#> 15 -0.003690688  0.25126859          TRUE     0.95
#> 16  0.255209434  0.49380662         FALSE     0.95
#> 17 -0.347731084 -0.11273262         FALSE     0.95
#> 18  1.224123892  1.50561037         FALSE     0.95
#> 19 -0.280749118 -0.04743209         FALSE     0.95
#> 20 -0.587209182 -0.32412091         FALSE     0.95

Recency tracing with RPFA propdec2 feature (the one in the original Galyardt and Goldin paper)

    modelob <- LKT(
      data = val, interc=TRUE,
      components = c("Anon.Student.Id", "KC..Default.", "KC..Default.", "KC..Default.", "KC..Default."),
      features = c("intercept","intercept",  "intercept", "propdec2","recency"),
      fixedpars=c(NA,NA))
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.5     
#> recency KC..Default. 0.5     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.348705 
#> LogLike logistic: -24738.45370715 
#> step par values =0.5,0.5
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.501     
#> recency KC..Default. 0.5     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.348706 
#> LogLike logistic: -24738.41073511 
#> step par values =0.501,0.5
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.499     
#> recency KC..Default. 0.5     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.348703 
#> LogLike logistic: -24738.50684569 
#> step par values =0.499,0.5
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.5     
#> recency KC..Default. 0.501     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.348676 
#> LogLike logistic: -24739.54599459 
#> step par values =0.5,0.501
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.5     
#> recency KC..Default. 0.499     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.348733 
#> LogLike logistic: -24737.37477104 
#> step par values =0.5,0.499
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.99999     
#> recency KC..Default. 1e-05     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.322684 
#> LogLike logistic: -25726.81721576 
#> step par values =0.99999,1e-05
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.99999     
#> recency KC..Default. 1e-05     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.322684 
#> LogLike logistic: -25726.81721576 
#> step par values =0.99999,1e-05
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.99899     
#> recency KC..Default. 1e-05     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.322692 
#> LogLike logistic: -25726.5050818 
#> step par values =0.99899,1e-05
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.99999     
#> recency KC..Default. 0.00101     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.322937 
#> LogLike logistic: -25717.20836833 
#> step par values =0.99999,0.00101
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.99999     
#> recency KC..Default. 1e-05     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.322684 
#> LogLike logistic: -25726.81721576 
#> step par values =0.99999,1e-05
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.651218348246111     
#> recency KC..Default. 0.348781651753889     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351149 
#> LogLike logistic: -24645.615571 
#> step par values =0.6512183,0.3487817
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.652218348246111     
#> recency KC..Default. 0.348781651753889     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351145 
#> LogLike logistic: -24645.76149327 
#> step par values =0.6522183,0.3487817
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.650218348246111     
#> recency KC..Default. 0.348781651753889     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351153 
#> LogLike logistic: -24645.47157992 
#> step par values =0.6502183,0.3487817
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.651218348246111     
#> recency KC..Default. 0.349781651753889     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351144 
#> LogLike logistic: -24645.79931004 
#> step par values =0.6512183,0.3497817
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.651218348246111     
#> recency KC..Default. 0.347781651753889     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351154 
#> LogLike logistic: -24645.43964141 
#> step par values =0.6512183,0.3477817
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.607763143584877     
#> recency KC..Default. 0.333699014292316     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351335 
#> LogLike logistic: -24638.53625491 
#> step par values =0.6077631,0.333699
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.608763143584877     
#> recency KC..Default. 0.333699014292316     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351333 
#> LogLike logistic: -24638.63269962 
#> step par values =0.6087631,0.333699
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.606763143584877     
#> recency KC..Default. 0.333699014292316     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351338 
#> LogLike logistic: -24638.44099279 
#> step par values =0.6067631,0.333699
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.607763143584877     
#> recency KC..Default. 0.334699014292316     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351334 
#> LogLike logistic: -24638.60117648 
#> step par values =0.6077631,0.334699
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.607763143584877     
#> recency KC..Default. 0.332699014292316     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351337 
#> LogLike logistic: -24638.47932095 
#> step par values =0.6077631,0.332699
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.52023430625712     
#> recency KC..Default. 0.327086012293456     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351435 
#> LogLike logistic: -24634.74557987 
#> step par values =0.5202343,0.327086
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.52123430625712     
#> recency KC..Default. 0.327086012293456     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351436 
#> LogLike logistic: -24634.72967931 
#> step par values =0.5212343,0.327086
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.51923430625712     
#> recency KC..Default. 0.327086012293456     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351435 
#> LogLike logistic: -24634.76282649 
#> step par values =0.5192343,0.327086
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.52023430625712     
#> recency KC..Default. 0.328086012293456     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351435 
#> LogLike logistic: -24634.75658647 
#> step par values =0.5202343,0.328086
#> 
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> intercept KC..Default.      
#> propdec2 KC..Default. 0.52023430625712     
#> recency KC..Default. 0.326086012293456     
#> recencyKC..Default.+propdec2KC..Default.+interceptKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.351435 
#> LogLike logistic: -24634.74277483 
#> step par values =0.5202343,0.326086

brpropdec (experimental feature)

    modelob <- LKT(
      data = val, interc=TRUE,dualfit = TRUE,
      components = c("KC..Default.","Anon.Student.Id","KC..Default.","KC..Default."),
      features = c("baseratepropdec", "logitdec", "logitdec","recency"),fixedpars =c(0.988209,0.9690458,0.9004974,0.2603806))
#> baseratepropdec KC..Default. 0.988209     
#> logitdec Anon.Student.Id 0.9690458     
#> logitdec KC..Default. 0.9004974     
#> recency KC..Default. 0.2603806     
#> recencyKC..Default.+logitdecKC..Default.+logitdecAnon.Student.Id+baseratepropdecKC..Default.+1 
#> R2 (cor squared) latency: 0.0782915425247155
#> McFadden's R2 logistic: 0.240331 
#> LogLike logistic: -28854.86021562 
#> Failure latency:  26.490591139291 
#> Latency Scalar: 0.774555522136028
#> Latency Intercept: 6.42015387155179

print(modelob$coefs)
#>                             coefficient
#> (Intercept)                  -0.7356173
#> recencyKC..Default.           6.1099085
#> logitdecKC..Default.          0.9436032
#> logitdecAnon.Student.Id       0.6307401
#> baseratepropdecKC..Default.   5.4948921

Simple adaptive model for practice optimization



modelob <- LKT(data = val, interc=FALSE,dualfit = FALSE,factrv = 1e11,
               components = c("Anon.Student.Id","KC..Default.","KC..Default.","KC..Default.")
               ,features = c("logitdec", "logsuc","recency","intercept"),fixedpars =c(0.98, 0.24))
#> logitdec Anon.Student.Id 0.98     
#> logsuc KC..Default.      
#> recency KC..Default. 0.24     
#> intercept KC..Default.      
#> interceptKC..Default.+recencyKC..Default.+logsucKC..Default.+logitdecAnon.Student.Id+0 
#> McFadden's R2 logistic: 0.317203 
#> LogLike logistic: -25935.00011818

Test of new feature to trace KC intercepts across time (not within subjects)

    val<-val[order(val$CF..Time.),]
modelob <- LKT(data = val, interc=TRUE,dualfit = FALSE,factrv = 1e11,
               components = c("Anon.Student.Id","KC..Default.","KC..Default.","KC..Default.")
               ,features = c("logitdec", "logsuc","recency","logitdecevol"),fixedpars =c(0.98, 0.24,.99))
#> logitdec Anon.Student.Id 0.98     
#> logsuc KC..Default.      
#> recency KC..Default. 0.24     
#> logitdecevol KC..Default. 0.99     
#> logitdecevolKC..Default.+recencyKC..Default.+logsucKC..Default.+logitdecAnon.Student.Id+1 
#> McFadden's R2 logistic: 0.305249 
#> LogLike logistic: -26389.05800396

Astonishing model (theory analysis)

modelob <- LKT(
      data = val, interc=TRUE,dualfit = TRUE,factrv=1e7,
      components = c("Anon.Student.Id","KC..Default.","KC..Default.","KC..Default."),
      features = c("intercept", "intercept","lineafm$","lineafm"),interacts = c(NA,NA,NA,"Anon.Student.Id") )
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> lineafm$ KC..Default.      
#> lineafm KC..Default.      
#> lineafmKC..Default.:Anon.Student.Id+lineafmKC..Default.:e$data$KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> R2 (cor squared) latency: 0.0416815097940554
#> McFadden's R2 logistic: 0.309291 
#> LogLike logistic: -26235.522445 
#> Failure latency:  26.490591139291 
#> Latency Scalar: 0.373132561162114
#> Latency Intercept: 6.78509151370996



modelob <- LKT(
      data = val, interc=TRUE,dualfit = TRUE,factrv=1e7,
      components = c("Anon.Student.Id","KC..Default.","KC..Default.","KC..Default."),
      features = c("intercept", "intercept","lineafm$","lineafm"),interacts = c(NA,NA,NA,"Anon.Student.Id") )
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> lineafm$ KC..Default.      
#> lineafm KC..Default.      
#> lineafmKC..Default.:Anon.Student.Id+lineafmKC..Default.:e$data$KC..Default.+interceptKC..Default.+interceptAnon.Student.Id+1 
#> R2 (cor squared) latency: 0.0416815097940554
#> McFadden's R2 logistic: 0.309291 
#> LogLike logistic: -26235.522445 
#> Failure latency:  26.490591139291 
#> Latency Scalar: 0.373132561162114
#> Latency Intercept: 6.78509151370996

Build LKT with special feature


q<-  buildLKTModel(data = val, interc=TRUE, specialcomponents = "CF..End.Latency.",specialfeatures = "numer",
      allcomponents = c("Anon.Student.Id", "KC..Default."),
      currentcomponents = c(),forv=100,bacv=80,
      allfeatures = c("lineafm","logafm","logsuc","logfail","linesuc","linefail"),
      currentfeatures = c( ),currentfixedpars = c(),forward=TRUE,backward=TRUE,
      maxitv=1,verbose=FALSE)
#> 
#> Step  1 start
#> 
#> trying to add
#> numer-CF..End.Latency. 2 63996.87 
#> lineafm-Anon.Student.Id 2 75499.33 
#> logafm-Anon.Student.Id 2 74904.98 
#> logsuc-Anon.Student.Id 2 73388.72 
#> logfail-Anon.Student.Id 2 75962.09 
#> linesuc-Anon.Student.Id 2 73580.92 
#> linefail-Anon.Student.Id 2 75732.58 
#> lineafm-KC..Default. 2 69816.85 
#> logafm-KC..Default. 2 68286.98 
#> logsuc-KC..Default. 2 64218.11 
#> logfail-KC..Default. 2 75599.86 
#> linesuc-KC..Default. 2 65835.7 
#> linefail-KC..Default. 2 75861.2 
#> added 
#> numer CF..End.Latency. 
#> 
#> Step 1 results - pars  2  current BIC 63996.87 current AIC 63979.04 current AUC 0.775248 current RMSE 0.4413198  McFadden's R2 0.157857 
#> numer 
#>  CF..End.Latency. 
#> 
#> Step  2 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 3 63736.29 
#> logafm-Anon.Student.Id 3 64585.22 
#> logsuc-Anon.Student.Id 3 64112.91 
#> logfail-Anon.Student.Id 3 64806.65 
#> linesuc-Anon.Student.Id 3 62792.94 
#> linefail-Anon.Student.Id 3 63697.03 
#> lineafm-KC..Default. 3 60764.31 
#> logafm-KC..Default. 3 59943.6 
#> logsuc-KC..Default. 3 57563.54 
#> logfail-KC..Default. 3 63895.99 
#> linesuc-KC..Default. 3 58876.49 
#> linefail-KC..Default. 3 63760.33 
#> added 
#> logsuc KC..Default. 
#> 
#> trying to remove
#> numer-CF..End.Latency. 2 64218.11 
#> logsuc-KC..Default. 2 63996.87 
#> 
#> Step 2 results - pars  3  current BIC 57563.54 current AIC 57536.79 current AUC 0.8193983 current RMSE 0.4158709  McFadden's R2 0.242687 
#> numer logsuc 
#>  CF..End.Latency. KC..Default. 
#> 
#> Step  3 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 4 57509.63 
#> logafm-Anon.Student.Id 4 58195.29 
#> logsuc-Anon.Student.Id 4 57400.04 
#> logfail-Anon.Student.Id 4 57477.94 
#> linesuc-Anon.Student.Id 4 57350.18 
#> linefail-Anon.Student.Id 4 56860.92 
#> lineafm-KC..Default. 4 57472.3 
#> logafm-KC..Default. 4 58067.81 
#> logfail-KC..Default. 4 57562.05 
#> linesuc-KC..Default. 4 57525.03 
#> linefail-KC..Default. 4 57548.66 
#> added 
#> linefail Anon.Student.Id 
#> 
#> trying to remove
#> numer-CF..End.Latency. 3 63431.1 
#> logsuc-KC..Default. 3 63697.03 
#> linefail-Anon.Student.Id 3 57563.54 
#> 
#> Step 3 results - pars  4  current BIC 56860.92 current AIC 56825.25 current AUC 0.8232049 current RMSE 0.4129523  McFadden's R2 0.252079 
#> numer logsuc linefail 
#>  CF..End.Latency. KC..Default. Anon.Student.Id 
#> 
#> Step  4 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 5 55831.78 
#> logafm-Anon.Student.Id 5 55722.02 
#> logsuc-Anon.Student.Id 5 55654.87 
#> logfail-Anon.Student.Id 5 56478.34 
#> linesuc-Anon.Student.Id 5 55822.7 
#> lineafm-KC..Default. 5 56831.06 
#> logafm-KC..Default. 5 56632.82 
#> logfail-KC..Default. 5 56813.31 
#> linesuc-KC..Default. 5 57307.66 
#> linefail-KC..Default. 5 56854.58 
#> added 
#> logsuc Anon.Student.Id 
#> 
#> trying to remove
#> numer-CF..End.Latency. 4 61258.47 
#> logsuc-KC..Default. 4 61397.2 
#> linefail-Anon.Student.Id 4 57400.04 
#> logsuc-Anon.Student.Id 4 56860.92 
#> 
#> Step 4 results - pars  5  current BIC 55654.87 current AIC 55610.28 current AUC 0.8313389 current RMSE 0.4080648  McFadden's R2 0.268099 
#> numer logsuc linefail logsuc 
#>  CF..End.Latency. KC..Default. Anon.Student.Id Anon.Student.Id 
#> 
#> Step  5 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 6 55752.5 
#> logafm-Anon.Student.Id 6 55658.97 
#> logfail-Anon.Student.Id 6 55721.81 
#> linesuc-Anon.Student.Id 6 55599.14 
#> lineafm-KC..Default. 6 55653.68 
#> logafm-KC..Default. 6 55637.54 
#> logfail-KC..Default. 6 55547.47 
#> linesuc-KC..Default. 6 56382.36 
#> linefail-KC..Default. 6 55636.05 
#> added 
#> logfail KC..Default. 
#> 
#> trying to remove
#> numer-CF..End.Latency. 5 61110.52 
#> logsuc-KC..Default. 5 60516.65 
#> linefail-Anon.Student.Id 5 57418.43 
#> logsuc-Anon.Student.Id 5 56813.31 
#> logfail-KC..Default. 5 55654.87 
#> 
#> Step 5 results - pars  6  current BIC 55547.47 current AIC 55493.96 current AUC 0.8327398 current RMSE 0.4072946  McFadden's R2 0.269657 
#> numer logsuc linefail logsuc logfail 
#>  CF..End.Latency. KC..Default. Anon.Student.Id Anon.Student.Id KC..Default. 
#> 
#> Step  6 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 7 55672.47 
#> logafm-Anon.Student.Id 7 55547.57 
#> logfail-Anon.Student.Id 7 55612.33 
#> linesuc-Anon.Student.Id 7 55698.12 
#> lineafm-KC..Default. 7 55628.32 
#> logafm-KC..Default. 7 55117.84 
#> linesuc-KC..Default. 7 56378.06 
#> linefail-KC..Default. 7 55393.31 
#> added 
#> logafm KC..Default. 
#> 
#> trying to remove
#> numer-CF..End.Latency. 6 59999.48 
#> logsuc-KC..Default. 6 54880.17 
#> linefail-Anon.Student.Id 6 57164.78 
#> logsuc-Anon.Student.Id 6 56408.04 
#> logfail-KC..Default. 6 55637.54 
#> logafm-KC..Default. 6 55547.47 
#> removed 
#> logsuc KC..Default. 
#> 
#> Step 6 results - pars  6  current BIC 54880.17 current AIC 54826.67 current AUC 0.8372245 current RMSE 0.404731  McFadden's R2 0.278441 
#> numer linefail logsuc logfail logafm 
#>  CF..End.Latency. Anon.Student.Id Anon.Student.Id KC..Default. KC..Default. 
#> 
#> Step  7 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 7 54847.56 
#> logafm-Anon.Student.Id 7 55057.73 
#> logfail-Anon.Student.Id 7 54858.77 
#> linesuc-Anon.Student.Id 7 54827.1 
#> lineafm-KC..Default. 7 56187.78 
#> logsuc-KC..Default. 7 55117.84 
#> linesuc-KC..Default. 7 56122.85 
#> linefail-KC..Default. 7 54686.1 
#> added 
#> linefail KC..Default. 
#> 
#> trying to remove
#> numer-CF..End.Latency. 6 59775.74 
#> linefail-Anon.Student.Id 6 56529.44 
#> logsuc-Anon.Student.Id 6 55846.92 
#> logfail-KC..Default. 6 54729.02 
#> logafm-KC..Default. 6 60776.93 
#> linefail-KC..Default. 6 54880.17 
#> removed 
#> logfail KC..Default. 
#> 
#> Step 7 results - pars  6  current BIC 54729.02 current AIC 54675.51 current AUC 0.8378522 current RMSE 0.4041251  McFadden's R2 0.28043 
#> numer linefail logsuc logafm linefail 
#>  CF..End.Latency. Anon.Student.Id Anon.Student.Id KC..Default. KC..Default. 
#> 
#> Step  8 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 7 54765.95 
#> logafm-Anon.Student.Id 7 54764.62 
#> logfail-Anon.Student.Id 7 54740.19 
#> linesuc-Anon.Student.Id 7 54730.23 
#> lineafm-KC..Default. 7 55235.34 
#> logsuc-KC..Default. 7 54793.31 
#> logfail-KC..Default. 7 54686.1 
#> linesuc-KC..Default. 7 54921.59 
#> 
#> trying to remove
#> numer-CF..End.Latency. 5 59864.59 
#> linefail-Anon.Student.Id 5 56542.02 
#> logsuc-Anon.Student.Id 5 56472.8 
#> logafm-KC..Default. 5 60740.66 
#> linefail-KC..Default. 5 56801.57 
#> 
#> Step 8 results - pars  6  current BIC 54729.02 current AIC 54675.51 current AUC 0.8378522 current RMSE 0.4041251  McFadden's R2 0.28043 
#> numer linefail logsuc logafm linefail 
#>  CF..End.Latency. Anon.Student.Id Anon.Student.Id KC..Default. KC..Default.

plot of chunk buildLKT_with_offset

AFMstartMATHia with CV

LLs <- numeric(0)
AUCs <- numeric(0)
RMSEs <- numeric(0)
R2s <- numeric(0)
for (i in 1:2) {
  print((1:2)[-i])

  modelob<-  buildLKTModel(
    usefolds = (1:2)[-i],data = val2, interc=TRUE,verbose=F,
    removecomp = c("Anon.Student.Id","Anon.Student.Id","Anon.Student.Id"),
    removefeat = c("intercept","logsuc$", "logfail$"),
      allcomponents = c("Anon.Student.Id",  "KC..MATHia.","Problem.Name"),
      forv=1200,bacv=1200,allfeatures = c("intercept","lineafm","logafm","logsuc","logfail",
                      "linesuc","linefail","logitdec","propdec","recency","logsuc$", "logfail$"),
      maxitv=8,
      currentfeatures = c("intercept", "intercept", "lineafm$"),
    currentcomponents = c("Anon.Student.Id","KC..MATHia.","KC..MATHia."))

  modelobtable <-modelob[[1]]
  modelob <-modelob[[2]]

  pred <- as.vector(pmin(pmax(inv.logit(
    as.matrix(modelob$predictors %*% modelob$coefs)[,]
  ), .00001), .99999)[modelob$newdata$fold %in% i])
  LLs[i] <-(sum(log(ifelse(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i] == 1, pred, 1 - pred))))
  AUCs[i] <-suppressMessages(auc(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i], pred)[1])
  print(AUCs[i])
  nullmodel <-glm(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i] ~ 1,data =
                    as.data.frame(rep(1, length(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i]))),
      family = binomial(logit))
  R2s[i] <- round(1 - LLs[i] / logLik(nullmodel)[1], 6)

    RMSEs[i]<- sqrt(mean((modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i]
                          -pred)^2))
  print(R2s[i])
}
#> [1] 2
#> 
#> Step 0 results - pars  517  current BIC 14347.07 current AIC 9792.225 current AUC 0.8083933 current RMSE 0.3873773  McFadden's R2 0.216632 
#> intercept intercept lineafm$ 
#>  Anon.Student.Id KC..MATHia. KC..MATHia. 
#> 
#> Step  1 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 518 14352.77 
#> logafm-Anon.Student.Id 518 14233.88 
#> logsuc-Anon.Student.Id 518 14253.92 
#> logfail-Anon.Student.Id 518 14205.38 
#> linesuc-Anon.Student.Id 518 14356.46 
#> linefail-Anon.Student.Id 518 14352.26 
#> logitdec-Anon.Student.Id 519 14327.89 
#> propdec-Anon.Student.Id 519 14312.66 
#> recency-Anon.Student.Id 519 14338.33 
#> lineafm-KC..MATHia. 517 14347.08 
#> logafm-KC..MATHia. 518 14256.42 
#> logsuc-KC..MATHia. 518 14242.87 
#> logfail-KC..MATHia. 518 14353.91 
#> linesuc-KC..MATHia. 518 14291.36 
#> linefail-KC..MATHia. 518 14291.36 
#> logitdec-KC..MATHia. 519 14250.69 
#> propdec-KC..MATHia. 519 14260.43 
#> recency-KC..MATHia. 519 14145.08 
#> logsuc$-KC..MATHia. 526 14309.97 
#> logfail$-KC..MATHia. 526 14427.97 
#> intercept-Problem.Name 615 15139.31 
#> lineafm-Problem.Name 518 14249.51 
#> logafm-Problem.Name 518 14297.28 
#> logsuc-Problem.Name 518 14235.24 
#> logfail-Problem.Name 518 14357.88 
#> linesuc-Problem.Name 518 14195.75 
#> linefail-Problem.Name 518 14356.89 
#> logitdec-Problem.Name 519 14315.84 
#> propdec-Problem.Name 519 14318.59 
#> recency-Problem.Name 519 14362.88 
#> logsuc$-Problem.Name 616 15088.17 
#> logfail$-Problem.Name 616 15229.74 
#> 
#> trying to remove
#> intercept-Anon.Student.Id 18 9949.243 
#> intercept-KC..MATHia. 509 14759.59 
#> lineafm$-KC..MATHia. 508 14639.02 
#> removed 
#> intercept Anon.Student.Id 
#> 
#> Step 1 results - pars  18  current BIC 9949.243 current AIC 9790.66 current AUC 0.7399834 current RMSE 0.4131985  McFadden's R2 0.127508 
#> intercept lineafm$ 
#>  KC..MATHia. KC..MATHia. 
#> 
#> Step  2 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 19 9959.247 
#> logafm-Anon.Student.Id 19 9826.039 
#> logsuc-Anon.Student.Id 19 9502.002 
#> logfail-Anon.Student.Id 19 9882.405 
#> linesuc-Anon.Student.Id 19 9634.275 
#> linefail-Anon.Student.Id 19 9474.936 
#> logitdec-Anon.Student.Id 20 9197.332 
#> propdec-Anon.Student.Id 20 9186.146 
#> recency-Anon.Student.Id 20 9910.435 
#> lineafm-KC..MATHia. 18 9949.243 
#> logafm-KC..MATHia. 19 9822.276 
#> logsuc-KC..MATHia. 19 9379.824 
#> logfail-KC..MATHia. 19 9692.893 
#> linesuc-KC..MATHia. 19 9440.8 
#> linefail-KC..MATHia. 19 9440.802 
#> logitdec-KC..MATHia. 20 9309.649 
#> propdec-KC..MATHia. 20 9317.477 
#> recency-KC..MATHia. 20 9557.742 
#> logsuc$-KC..MATHia. 27 9436.545 
#> logfail$-KC..MATHia. 27 9760.995 
#> intercept-Problem.Name 116 10691.83 
#> lineafm-Problem.Name 19 9880.353 
#> logafm-Problem.Name 19 9917.152 
#> logsuc-Problem.Name 19 9623.6 
#> logfail-Problem.Name 19 9832.055 
#> linesuc-Problem.Name 19 9562.115 
#> linefail-Problem.Name 19 9797.845 
#> logitdec-Problem.Name 20 9606.056 
#> propdec-Problem.Name 20 9609.37 
#> recency-Problem.Name 20 9931.174 
#> logsuc$-Problem.Name 117 10470.27 
#> logfail$-Problem.Name 117 10696.25 
#> 
#> trying to remove
#> intercept-KC..MATHia. 10 10306.25 
#> lineafm$-KC..MATHia. 9 9981.208 
#> removed 
#> lineafm$ KC..MATHia. 
#> 
#> Step 2 results - pars  9  current BIC 9981.208 current AIC 9901.916 current AUC 0.7145804 current RMSE 0.4167144  McFadden's R2 0.115947 
#> intercept 
#>  KC..MATHia. 
#> 
#> Step  3 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 10 9909.24 
#> logafm-Anon.Student.Id 10 9773.03 
#> logsuc-Anon.Student.Id 10 9555.038 
#> logfail-Anon.Student.Id 10 9985.973 
#> linesuc-Anon.Student.Id 10 9712.646 
#> linefail-Anon.Student.Id 10 9960.693 
#> logitdec-Anon.Student.Id 11 9128.654 
#> propdec-Anon.Student.Id 11 9116.544 
#> recency-Anon.Student.Id 11 9937.883 
#> lineafm-KC..MATHia. 10 9890.759 
#> logafm-KC..MATHia. 10 9793.949 
#> logsuc-KC..MATHia. 10 9482.071 
#> logfail-KC..MATHia. 10 9977.862 
#> linesuc-KC..MATHia. 10 9550.599 
#> linefail-KC..MATHia. 10 9948.712 
#> logitdec-KC..MATHia. 11 9259.838 
#> propdec-KC..MATHia. 11 9268.035 
#> recency-KC..MATHia. 11 9522.264 
#> logsuc$-KC..MATHia. 18 9549.792 
#> logfail$-KC..MATHia. 18 10024.54 
#> intercept-Problem.Name 107 10676.92 
#> lineafm-Problem.Name 10 9908.015 
#> logafm-Problem.Name 10 9946.513 
#> logsuc-Problem.Name 10 9625.221 
#> logfail-Problem.Name 10 9842.644 
#> linesuc-Problem.Name 10 9558.324 
#> linefail-Problem.Name 10 9804.46 
#> logitdec-Problem.Name 11 9591.671 
#> propdec-Problem.Name 11 9594.732 
#> recency-Problem.Name 11 9966.425 
#> logsuc$-Problem.Name 108 10454.92 
#> logfail$-Problem.Name 108 10682.2 
#> 
#> Step 3 results - pars  9  current BIC 9981.208 current AIC 9901.916 current AUC 0.7145804 current RMSE 0.4167144  McFadden's R2 0.115947 
#> intercept 
#>  KC..MATHia.

plot of chunk AFMstartMATHiaCV

#> [1] 0.7226126
#> [1] 0.120224
#> [1] 1
#> 
#> Step 0 results - pars  517  current BIC 14805.76 current AIC 10250.91 current AUC 0.8084342 current RMSE 0.3900935  McFadden's R2 0.219593 
#> intercept intercept lineafm$ 
#>  Anon.Student.Id KC..MATHia. KC..MATHia. 
#> 
#> Step  1 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 518 14814.12 
#> logafm-Anon.Student.Id 518 14697.48 
#> logsuc-Anon.Student.Id 518 14706.98 
#> logfail-Anon.Student.Id 518 14660.64 
#> linesuc-Anon.Student.Id 518 14816.45 
#> linefail-Anon.Student.Id 518 14801.49 
#> logitdec-Anon.Student.Id 519 14769.06 
#> propdec-Anon.Student.Id 519 14752.14 
#> recency-Anon.Student.Id 519 14813.86 
#> lineafm-KC..MATHia. 517 14805.77 
#> logafm-KC..MATHia. 518 14706.7 
#> logsuc-KC..MATHia. 518 14666.32 
#> logfail-KC..MATHia. 518 14808.77 
#> linesuc-KC..MATHia. 518 14756.34 
#> linefail-KC..MATHia. 518 14756.34 
#> logitdec-KC..MATHia. 519 14672.76 
#> propdec-KC..MATHia. 519 14700.12 
#> recency-KC..MATHia. 519 14566.82 
#> logsuc$-KC..MATHia. 526 14721.75 
#> logfail$-KC..MATHia. 526 14884.19 
#> intercept-Problem.Name 615 15649.73 
#> lineafm-Problem.Name 518 14692.17 
#> logafm-Problem.Name 518 14753.54 
#> logsuc-Problem.Name 518 14669.73 
#> logfail-Problem.Name 518 14816.17 
#> linesuc-Problem.Name 518 14620.79 
#> linefail-Problem.Name 518 14815.72 
#> logitdec-Problem.Name 519 14760.02 
#> propdec-Problem.Name 519 14761.88 
#> recency-Problem.Name 519 14815.32 
#> logsuc$-Problem.Name 616 15571.8 
#> logfail$-Problem.Name 616 15679.04 
#> 
#> trying to remove
#> intercept-Anon.Student.Id 18 10496.68 
#> intercept-KC..MATHia. 509 15279.69 
#> lineafm$-KC..MATHia. 508 15017.83 
#> removed 
#> intercept Anon.Student.Id 
#> 
#> Step 1 results - pars  18  current BIC 10496.68 current AIC 10338.09 current AUC 0.7366867 current RMSE 0.4175097  McFadden's R2 0.127709 
#> intercept lineafm$ 
#>  KC..MATHia. KC..MATHia. 
#> 
#> Step  2 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 19 10504.95 
#> logafm-Anon.Student.Id 19 10403.65 
#> logsuc-Anon.Student.Id 19 10037.29 
#> logfail-Anon.Student.Id 19 10376.39 
#> linesuc-Anon.Student.Id 19 10219.51 
#> linefail-Anon.Student.Id 19 10025.46 
#> logitdec-Anon.Student.Id 20 9632.309 
#> propdec-Anon.Student.Id 20 9621.993 
#> recency-Anon.Student.Id 20 10467.88 
#> lineafm-KC..MATHia. 18 10496.68 
#> logafm-KC..MATHia. 19 10386.96 
#> logsuc-KC..MATHia. 19 9825.761 
#> logfail-KC..MATHia. 19 10166.58 
#> linesuc-KC..MATHia. 19 9990.289 
#> linefail-KC..MATHia. 19 9990.289 
#> logitdec-KC..MATHia. 20 9764.859 
#> propdec-KC..MATHia. 20 9796.996 
#> recency-KC..MATHia. 20 10090.98 
#> logsuc$-KC..MATHia. 27 9882.072 
#> logfail$-KC..MATHia. 27 10245.09 
#> intercept-Problem.Name 116 11311.41 
#> lineafm-Problem.Name 19 10412.28 
#> logafm-Problem.Name 19 10459.4 
#> logsuc-Problem.Name 19 10143.2 
#> logfail-Problem.Name 19 10394.11 
#> linesuc-Problem.Name 19 10068.88 
#> linefail-Problem.Name 19 10345.74 
#> logitdec-Problem.Name 20 10111.37 
#> propdec-Problem.Name 20 10117.37 
#> recency-Problem.Name 20 10477.54 
#> logsuc$-Problem.Name 117 11034.67 
#> logfail$-Problem.Name 117 11200.36 
#> 
#> trying to remove
#> intercept-KC..MATHia. 10 10908.84 
#> lineafm$-KC..MATHia. 9 10467.63 
#> removed 
#> lineafm$ KC..MATHia. 
#> 
#> Step 2 results - pars  9  current BIC 10467.63 current AIC 10388.34 current AUC 0.7230999 current RMSE 0.419285  McFadden's R2 0.121931 
#> intercept 
#>  KC..MATHia. 
#> 
#> Step  3 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 10 10436.61 
#> logafm-Anon.Student.Id 10 10338.55 
#> logsuc-Anon.Student.Id 10 10122.39 
#> logfail-Anon.Student.Id 10 10477.01 
#> linesuc-Anon.Student.Id 10 10283.68 
#> linefail-Anon.Student.Id 10 10427.49 
#> logitdec-Anon.Student.Id 11 9544.34 
#> propdec-Anon.Student.Id 11 9534.359 
#> recency-Anon.Student.Id 11 10435.29 
#> lineafm-KC..MATHia. 10 10429.45 
#> logafm-KC..MATHia. 10 10349.96 
#> logsuc-KC..MATHia. 10 10010.72 
#> logfail-KC..MATHia. 10 10427.02 
#> linesuc-KC..MATHia. 10 10141.08 
#> linefail-KC..MATHia. 10 10400.04 
#> logitdec-KC..MATHia. 11 9721.357 
#> propdec-KC..MATHia. 11 9743.969 
#> recency-KC..MATHia. 11 10020.22 
#> logsuc$-KC..MATHia. 18 10072.14 
#> logfail$-KC..MATHia. 18 10489.41 
#> intercept-Problem.Name 107 11261.98 
#> lineafm-Problem.Name 10 10383.15 
#> logafm-Problem.Name 10 10429.19 
#> logsuc-Problem.Name 10 10099.82 
#> logfail-Problem.Name 10 10353.24 
#> linesuc-Problem.Name 10 10023.82 
#> linefail-Problem.Name 10 10302.4 
#> logitdec-Problem.Name 11 10056.72 
#> propdec-Problem.Name 11 10061.53 
#> recency-Problem.Name 11 10446.54 
#> logsuc$-Problem.Name 108 10981.5 
#> logfail$-Problem.Name 108 11149.86 
#> 
#> Step 3 results - pars  9  current BIC 10467.63 current AIC 10388.34 current AUC 0.7230999 current RMSE 0.419285  McFadden's R2 0.121931 
#> intercept 
#>  KC..MATHia.

plot of chunk AFMstartMATHiaCV

#> [1] 0.7134662
#> [1] 0.114235

print(AUCs)
#> [1] 0.7226126 0.7134662

print(R2s)
#> [1] 0.120224 0.114235

print(RMSEs)
#> [1] 0.4198516 0.4172793

savedata<-list(modelobtable,AUCs,R2s,RMSEs)
save(savedata,file="AFMMATHia.RData")

EmptystartMATHia with CV

LLs <- numeric(0)
AUCs <- numeric(0)
RMSEs <- numeric(0)
R2s <- numeric(0)
for (i in 1:2) {
  print((1:2)[-i])

  modelob<-  buildLKTModel(
    usefolds = (1:2)[-i],data = val2, interc=TRUE,verbose=F,
    removecomp = c("Anon.Student.Id","Anon.Student.Id","Anon.Student.Id"),
    removefeat = c("intercept","logsuc$", "logfail$"),
      allcomponents = c("Anon.Student.Id",  "KC..MATHia.","Problem.Name"),
      forv=1200,bacv=1200,allfeatures = c("intercept","lineafm","logafm","logsuc","logfail",
                      "linesuc","linefail","logitdec","propdec","recency","logsuc$", "logfail$"),
      maxitv=8)

  modelobtable <-modelob[[1]]
  modelob <-modelob[[2]]

  pred <- as.vector(pmin(pmax(inv.logit(
    as.matrix(modelob$predictors %*% modelob$coefs)[,]
  ), .00001), .99999)[modelob$newdata$fold %in% i])
  LLs[i] <-(sum(log(ifelse(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i] == 1, pred, 1 - pred))))
  AUCs[i] <-suppressMessages(auc(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i], pred)[1])
  print(AUCs[i])
  nullmodel <-glm(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i] ~ 1,data =
                    as.data.frame(rep(1, length(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i]))),
      family = binomial(logit))
  R2s[i] <- round(1 - LLs[i] / logLik(nullmodel)[1], 6)

    RMSEs[i]<- sqrt(mean((modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i]
                          -pred)^2))
  print(R2s[i])
}
#> [1] 2
#> 
#> Step  1 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 2 11133.52 
#> logafm-Anon.Student.Id 2 11018.67 
#> logsuc-Anon.Student.Id 2 10844.6 
#> logfail-Anon.Student.Id 2 11196.46 
#> linesuc-Anon.Student.Id 2 10967.21 
#> linefail-Anon.Student.Id 2 11174.23 
#> logitdec-Anon.Student.Id 3 10525.26 
#> propdec-Anon.Student.Id 3 10524.45 
#> recency-Anon.Student.Id 3 10953.82 
#> intercept-KC..MATHia. 9 9981.208 
#> lineafm-KC..MATHia. 2 11179.33 
#> logafm-KC..MATHia. 2 11067.77 
#> logsuc-KC..MATHia. 2 10513.05 
#> logfail-KC..MATHia. 2 10877.56 
#> linesuc-KC..MATHia. 2 10646.3 
#> linefail-KC..MATHia. 2 10899.19 
#> logitdec-KC..MATHia. 3 9499.341 
#> propdec-KC..MATHia. 3 9511.302 
#> recency-KC..MATHia. 3 10818.73 
#> logsuc$-KC..MATHia. 10 9761.247 
#> logfail$-KC..MATHia. 10 10597.12 
#> intercept-Problem.Name 99 11911.38 
#> lineafm-Problem.Name 2 11198.29 
#> logafm-Problem.Name 2 11200.39 
#> logsuc-Problem.Name 2 11162.76 
#> logfail-Problem.Name 2 11044.96 
#> linesuc-Problem.Name 2 11159.23 
#> linefail-Problem.Name 2 11004.29 
#> logitdec-Problem.Name 3 11017.56 
#> propdec-Problem.Name 3 11018.05 
#> recency-Problem.Name 3 11014.3 
#> logsuc$-Problem.Name 100 12028.45 
#> logfail$-Problem.Name 100 11923.73 
#> added 
#> logitdec KC..MATHia. 
#> 
#> Step 1 results - pars  3  current BIC 9499.341 current AIC 9472.91 current AUC 0.7661558 current RMSE 0.4062256  McFadden's R2 0.153245 
#> logitdec 
#>  KC..MATHia. 
#> pars 0.9052715 
#> 
#> Step  2 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 4 9510.091 
#> logafm-Anon.Student.Id 4 9491.307 
#> logsuc-Anon.Student.Id 4 9480.244 
#> logfail-Anon.Student.Id 4 9506.821 
#> linesuc-Anon.Student.Id 4 9508.23 
#> linefail-Anon.Student.Id 4 9508.052 
#> logitdec-Anon.Student.Id 5 9455.96 
#> propdec-Anon.Student.Id 5 9449.379 
#> recency-Anon.Student.Id 5 9473.898 
#> intercept-KC..MATHia. 11 9268.779 
#> lineafm-KC..MATHia. 4 9508.709 
#> logafm-KC..MATHia. 4 9504.937 
#> logsuc-KC..MATHia. 4 9509.77 
#> logfail-KC..MATHia. 4 9509.846 
#> linesuc-KC..MATHia. 4 9508.585 
#> linefail-KC..MATHia. 4 9509.09 
#> propdec-KC..MATHia. 5 9515.875 
#> recency-KC..MATHia. 5 9383.647 
#> logsuc$-KC..MATHia. 12 9398.836 
#> logfail$-KC..MATHia. 12 9480.257 
#> intercept-Problem.Name 101 10291.66 
#> lineafm-Problem.Name 4 9502.482 
#> logafm-Problem.Name 4 9505.914 
#> logsuc-Problem.Name 4 9480.988 
#> logfail-Problem.Name 4 9495.765 
#> linesuc-Problem.Name 4 9466.111 
#> linefail-Problem.Name 4 9482.268 
#> logitdec-Problem.Name 5 9469.132 
#> propdec-Problem.Name 5 9471.58 
#> recency-Problem.Name 5 9492.881 
#> logsuc$-Problem.Name 102 10344.13 
#> logfail$-Problem.Name 102 10372.93 
#> 
#> Step 2 results - pars  3  current BIC 9499.341 current AIC 9472.91 current AUC 0.7661558 current RMSE 0.4062256  McFadden's R2 0.153245 
#> logitdec 
#>  KC..MATHia. 
#> pars 0.9052715

plot of chunk EmptystartMATHiaCV

#> [1] 0.7651378
#> [1] 0.156892
#> [1] 1
#> 
#> Step  1 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 2 11799.97 
#> logafm-Anon.Student.Id 2 11722.32 
#> logsuc-Anon.Student.Id 2 11553.71 
#> logfail-Anon.Student.Id 2 11830.43 
#> linesuc-Anon.Student.Id 2 11674.62 
#> linefail-Anon.Student.Id 2 11786.66 
#> logitdec-Anon.Student.Id 3 11124.07 
#> propdec-Anon.Student.Id 3 11125.62 
#> recency-Anon.Student.Id 3 11610.99 
#> intercept-KC..MATHia. 9 10467.63 
#> lineafm-KC..MATHia. 2 11825.8 
#> logafm-KC..MATHia. 2 11725.98 
#> logsuc-KC..MATHia. 2 11102.95 
#> logfail-KC..MATHia. 2 11412.06 
#> linesuc-KC..MATHia. 2 11337.37 
#> linefail-KC..MATHia. 2 11446.91 
#> logitdec-KC..MATHia. 3 9982.423 
#> propdec-KC..MATHia. 3 10005.83 
#> recency-KC..MATHia. 3 11452.57 
#> logsuc$-KC..MATHia. 10 10340 
#> logfail$-KC..MATHia. 10 11068.63 
#> intercept-Problem.Name 99 12639.33 
#> lineafm-Problem.Name 2 11826.9 
#> logafm-Problem.Name 2 11827.52 
#> logsuc-Problem.Name 2 11801.79 
#> logfail-Problem.Name 2 11689.36 
#> linesuc-Problem.Name 2 11794.93 
#> linefail-Problem.Name 2 11635.89 
#> logitdec-Problem.Name 3 11672.1 
#> propdec-Problem.Name 3 11673.35 
#> recency-Problem.Name 3 11637.15 
#> logsuc$-Problem.Name 100 12705.73 
#> logfail$-Problem.Name 100 12527.88 
#> added 
#> logitdec KC..MATHia. 
#> 
#> Step 1 results - pars  3  current BIC 9982.423 current AIC 9955.993 current AUC 0.7652242 current RMSE 0.4088065  McFadden's R2 0.157522 
#> logitdec 
#>  KC..MATHia. 
#> pars 0.924255 
#> 
#> Step  2 start
#> 
#> trying to add
#> lineafm-Anon.Student.Id 4 9990.7 
#> logafm-Anon.Student.Id 4 9988.031 
#> logsuc-Anon.Student.Id 4 9980.646 
#> logfail-Anon.Student.Id 4 9993.08 
#> linesuc-Anon.Student.Id 4 9992.58 
#> linefail-Anon.Student.Id 4 9987.351 
#> logitdec-Anon.Student.Id 5 9929.233 
#> propdec-Anon.Student.Id 5 9927.475 
#> recency-Anon.Student.Id 5 9955.036 
#> intercept-KC..MATHia. 11 9729.463 
#> lineafm-KC..MATHia. 4 9986.543 
#> logafm-KC..MATHia. 4 9990.717 
#> logsuc-KC..MATHia. 4 9993.225 
#> logfail-KC..MATHia. 4 9993.216 
#> linesuc-KC..MATHia. 4 9982.681 
#> linefail-KC..MATHia. 4 9990.333 
#> propdec-KC..MATHia. 5 9999.429 
#> recency-KC..MATHia. 5 9848.394 
#> logsuc$-KC..MATHia. 12 9899.419 
#> logfail$-KC..MATHia. 12 9963.977 
#> intercept-Problem.Name 101 10834.99 
#> lineafm-Problem.Name 4 9981.552 
#> logafm-Problem.Name 4 9988.183 
#> logsuc-Problem.Name 4 9959.042 
#> logfail-Problem.Name 4 9982.199 
#> linesuc-Problem.Name 4 9937.565 
#> linefail-Problem.Name 4 9964.93 
#> logitdec-Problem.Name 5 9951.006 
#> propdec-Problem.Name 5 9953.461 
#> recency-Problem.Name 5 9966.36 
#> logsuc$-Problem.Name 102 10848.5 
#> logfail$-Problem.Name 102 10859.71 
#> 
#> Step 2 results - pars  3  current BIC 9982.423 current AIC 9955.993 current AUC 0.7652242 current RMSE 0.4088065  McFadden's R2 0.157522 
#> logitdec 
#>  KC..MATHia. 
#> pars 0.924255

plot of chunk EmptystartMATHiaCV

#> [1] 0.7660516
#> [1] 0.152626

print(AUCs)
#> [1] 0.7651378 0.7660516

print(R2s)
#> [1] 0.156892 0.152626

print(RMSEs)
#> [1] 0.4089813 0.4062784

savedata<-list(modelobtable,AUCs,R2s,RMSEs)
save(savedata,file="emptyMATHia.RData")

example of LASSOLKT

LASSO_Model<- LASSOLKTModel(
  data = setDT(val),gridpars=(1:9)/10,
  removecomp = c("Anon.Student.Id","Anon.Student.Id","Anon.Student.Id",
                 "KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default.","KC..Default."),
  removefeat = c("intercept","logsuc$","logfail$",
                 "lineafm","logafm","logsuc","logfail","logsuc","logfail","linesuc","linefail","logitdec","propdec","recency","logsuc$","logfail$"),
  allcomponents = c("Anon.Student.Id","KC..Cluster.","KC..Default."),
  allfeatures = c("intercept","lineafm","logafm","logsuc","logfail","logsuc","logfail","linesuc","linefail","logitdec","propdec","recency","logsuc$","logfail$"),#,"logsuc","logfail","linesuc","linefail","logitdec","propdec","recency"
  target_n = 100, test_fold = 1)
#> 
#> Start making data
#> lineafm Anon.Student.Id      
#> logafm Anon.Student.Id      
#> logsuc Anon.Student.Id      
#> logfail Anon.Student.Id      
#> logsuc Anon.Student.Id      
#> logfail Anon.Student.Id      
#> linesuc Anon.Student.Id      
#> linefail Anon.Student.Id      
#> logitdec Anon.Student.Id 0.1     
#> logitdec Anon.Student.Id 0.2     
#> logitdec Anon.Student.Id 0.3     
#> logitdec Anon.Student.Id 0.4     
#> logitdec Anon.Student.Id 0.5     
#> logitdec Anon.Student.Id 0.6     
#> logitdec Anon.Student.Id 0.7     
#> logitdec Anon.Student.Id 0.8     
#> logitdec Anon.Student.Id 0.9     
#> propdec Anon.Student.Id 0.1     
#> propdec Anon.Student.Id 0.2     
#> propdec Anon.Student.Id 0.3     
#> propdec Anon.Student.Id 0.4     
#> propdec Anon.Student.Id 0.5     
#> propdec Anon.Student.Id 0.6     
#> propdec Anon.Student.Id 0.7     
#> propdec Anon.Student.Id 0.8     
#> propdec Anon.Student.Id 0.9     
#> recency Anon.Student.Id 0.1     
#> recency Anon.Student.Id 0.2     
#> recency Anon.Student.Id 0.3     
#> recency Anon.Student.Id 0.4     
#> recency Anon.Student.Id 0.5     
#> recency Anon.Student.Id 0.6     
#> recency Anon.Student.Id 0.7     
#> recency Anon.Student.Id 0.8     
#> recency Anon.Student.Id 0.9     
#> intercept KC..Cluster.      
#> lineafm KC..Cluster.      
#> logafm KC..Cluster.      
#> logsuc KC..Cluster.      
#> logfail KC..Cluster.      
#> logsuc KC..Cluster.      
#> logfail KC..Cluster.      
#> linesuc KC..Cluster.      
#> linefail KC..Cluster.      
#> logitdec KC..Cluster. 0.1     
#> logitdec KC..Cluster. 0.2     
#> logitdec KC..Cluster. 0.3     
#> logitdec KC..Cluster. 0.4     
#> logitdec KC..Cluster. 0.5     
#> logitdec KC..Cluster. 0.6     
#> logitdec KC..Cluster. 0.7     
#> logitdec KC..Cluster. 0.8     
#> logitdec KC..Cluster. 0.9     
#> propdec KC..Cluster. 0.1     
#> propdec KC..Cluster. 0.2     
#> propdec KC..Cluster. 0.3     
#> propdec KC..Cluster. 0.4     
#> propdec KC..Cluster. 0.5     
#> propdec KC..Cluster. 0.6     
#> propdec KC..Cluster. 0.7     
#> propdec KC..Cluster. 0.8     
#> propdec KC..Cluster. 0.9     
#> recency KC..Cluster. 0.1     
#> recency KC..Cluster. 0.2     
#> recency KC..Cluster. 0.3     
#> recency KC..Cluster. 0.4     
#> recency KC..Cluster. 0.5     
#> recency KC..Cluster. 0.6     
#> recency KC..Cluster. 0.7     
#> recency KC..Cluster. 0.8     
#> recency KC..Cluster. 0.9     
#> logsuc$ KC..Cluster.      
#> logfail$ KC..Cluster.      
#> intercept KC..Default.      
#> interceptKC..Default.+logfailKC..Cluster.:e$data$KC..Cluster.+logsucKC..Cluster.:e$data$KC..Cluster.+recency0.9KC..Cluster.+recency0.8KC..Cluster.+recency0.7KC..Cluster.+recency0.6KC..Cluster.+recency0.5KC..Cluster.+recency0.4KC..Cluster.+recency0.3KC..Cluster.+recency0.2KC..Cluster.+recency0.1KC..Cluster.+propdec0.9KC..Cluster.+propdec0.8KC..Cluster.+propdec0.7KC..Cluster.+propdec0.6KC..Cluster.+propdec0.5KC..Cluster.+propdec0.4KC..Cluster.+propdec0.3KC..Cluster.+propdec0.2KC..Cluster.+propdec0.1KC..Cluster.+logitdec0.9KC..Cluster.+logitdec0.8KC..Cluster.+logitdec0.7KC..Cluster.+logitdec0.6KC..Cluster.+logitdec0.5KC..Cluster.+logitdec0.4KC..Cluster.+logitdec0.3KC..Cluster.+logitdec0.2KC..Cluster.+logitdec0.1KC..Cluster.+linefailKC..Cluster.+linesucKC..Cluster.+logfailKC..Cluster.+logsucKC..Cluster.+logfailKC..Cluster.+logsucKC..Cluster.+logafmKC..Cluster.+lineafmKC..Cluster.+interceptKC..Cluster.+recency0.9Anon.Student.Id+recency0.8Anon.Student.Id+recency0.7Anon.Student.Id+recency0.6Anon.Student.Id+recency0.5Anon.Student.Id+recency0.4Anon.Student.Id+recency0.3Anon.Student.Id+recency0.2Anon.Student.Id+recency0.1Anon.Student.Id+propdec0.9Anon.Student.Id+propdec0.8Anon.Student.Id+propdec0.7Anon.Student.Id+propdec0.6Anon.Student.Id+propdec0.5Anon.Student.Id+propdec0.4Anon.Student.Id+propdec0.3Anon.Student.Id+propdec0.2Anon.Student.Id+propdec0.1Anon.Student.Id+logitdec0.9Anon.Student.Id+logitdec0.8Anon.Student.Id+logitdec0.7Anon.Student.Id+logitdec0.6Anon.Student.Id+logitdec0.5Anon.Student.Id+logitdec0.4Anon.Student.Id+logitdec0.3Anon.Student.Id+logitdec0.2Anon.Student.Id+logitdec0.1Anon.Student.Id+linefailAnon.Student.Id+linesucAnon.Student.Id+logfailAnon.Student.Id+logsucAnon.Student.Id+logfailAnon.Student.Id+logsucAnon.Student.Id+logafmAnon.Student.Id+lineafmAnon.Student.Id+0 
#> Time difference of 15.35146 secs
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases
#> [1] 1
#> [1] 2
#> [1] 3
#> [1] 4
#> [1] 5
#> [1] 6
#> [1] 7
#> [1] 8
#> [1] 9
#> [1] 10
#> [1] 11
#> [1] 12
#> [1] 13
#> [1] 14
#> [1] 15
#> [1] 16
#> [1] 17
#> [1] 18
#> [1] 19
#> [1] 20
#> [1] 21
#> [1] 22
#> [1] 23
#> [1] 24
#> [1] 25
#> [1] 26
#> [1] 27
#> [1] 28
#> [1] 29
#> [1] 30
#> [1] 31
#> [1] 32
#> [1] 33
#> [1] 34
#> [1] 35
#> [1] 36
#> [1] 37
#> [1] 38
#> [1] 39
#> [1] 40
#> [1] 41
#> [1] 42
#> [1] 43
#> [1] 44
#> [1] 45
#> [1] 46
#> [1] 47
#> [1] 48
#> [1] 49
#> [1] 50
#> [1] 51
#> [1] 52
#> [1] 53
#> [1] 54
#> [1] 55
#> [1] 56
#> [1] 57
#> [1] 58
#> [1] 59
#> [1] 60
#> [1] 61
#> [1] 62
#> [1] 63
#> [1] 64
#> [1] 65
#> [1] 66
#> [1] 67
#> [1] 68
#> [1] 69
#> [1] 70
#> [1] 71
#> [1] 72
#> [1] 73
#> [1] 74
#> [1] 75
#> [1] 76
#> [1] 77
#> [1] 78
#> [1] 79
#> [1] 80
#> [1] 81
#> [1] 82
#> [1] 83
#> [1] 84
#> [1] 85


plot(LASSO_Model$n_features,LASSO_Model$auc_lambda,pch=16)
points(LASSO_Model$n_features[LASSO_Model$target_idx],LASSO_Model$auc_lambda[LASSO_Model$target_idx],col="red",pch=15)

plot of chunk LASSOLKT

plot(LASSO_Model$n_features,LASSO_Model$rmse_lambda,pch=16)
points(LASSO_Model$n_features[LASSO_Model$target_idx],LASSO_Model$rmse_lambda[LASSO_Model$target_idx],col="red",pch=15)

plot of chunk LASSOLKT

plot(LASSO_Model$n_features,LASSO_Model$BIC_lambda,pch=16)
points(LASSO_Model$n_features[LASSO_Model$target_idx],LASSO_Model$BIC_lambda[LASSO_Model$target_idx],col="red",pch=15)

plot of chunk LASSOLKT


#Kept features in model with target N features
LASSO_Model$model_features
#>                                                                                                                                                                                                                                        kept_features
#> 1                                                                                                                                                                                                                                        (Intercept)
#> 2                                                                                                                          interceptKC..Default.__________ refers to how different the observations are from each other and sometimes from the mean.
#> 3                                                                                                                                interceptKC..Default.A __________ describes the likelihood that observations will occur within any range of values.
#> 4   interceptKC..Default.A __________ distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is equal to the probability of the range between 0 and 1.
#> 5                                                                                                                              interceptKC..Default.A distribution describes the __________ that observations will occur within any range of values.
#> 6                                                                                                                                interceptKC..Default.A distribution describes the likelihood that __________ will occur within any range of values.
#> 7                                                                                                                         interceptKC..Default.A distribution describes the likelihood that observations will occur within any __________ of values.
#> 8      interceptKC..Default.A normal distribution, the most common natural distribution, is __________, so for example, the probability of observing a value in the range between -1 and 0 is equal to the probability of the range between 0 and 1.
#> 9         interceptKC..Default.A normal distribution, the most common natural distribution, is symmetric, so for example, the __________ of observing a value in the range between -1 and 0 is equal to the __________ of the range between 0 and 1.
#> 10 interceptKC..Default.A normal distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is __________ to the probability of the range between 0 and 1.
#> 11                                                                                                                                                                         interceptKC..Default.A statistic __________ a characteristic of a sample.
#> 12                                                                                                                                                                              interceptKC..Default.A statistic describes a __________ of a sample.
#> 13                                                                                                                 interceptKC..Default.Although multiple samples are __________ they are intended to represent the population from which they come.
#> 14                                                                                                                  interceptKC..Default.Although multiple samples are variable they are intended to __________ the population from which they come.
#> 15                                                                                                                   interceptKC..Default.Although multiple samples are variable they are intended to represent the __________ from which they come.
#> 16                                                                                                                                 interceptKC..Default.Standard deviation is the __________ of the variance, also known as root mean squared error.
#> 17                                                                                                                              interceptKC..Default.Standard deviation is the square root of the __________, also known as root mean squared error.
#> 18                                                                                                                                             interceptKC..Default.Standard deviation is the square root of the variance, also known as __________.
#> 19                                                     interceptKC..Default.Standard deviation refers to how __________ observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean.
#> 20                                                         interceptKC..Default.Standard deviation refers to how individual observations vary from the mean, while __________ refers to how multiple observations (i.e. samples) vary from the mean.
#> 21                                                   interceptKC..Default.Standard deviation refers to how individual observations vary from the mean, while standard error refers to how __________ observations (i.e. samples) vary from the mean.
#> 22                                                                                              interceptKC..Default.The __________ distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean.
#> 23                                                                                              interceptKC..Default.The __________ distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean.
#> 24                                                                                            interceptKC..Default.The __________ distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean.
#> 25                                                                                            interceptKC..Default.The __________ for a sample is computed by dividing the sum of the observation __________s by 1 less than the total observations.
#> 26                                                                                                                                                   interceptKC..Default.The __________ for an observation is the squared difference from the mean.
#> 27                                                                                                                interceptKC..Default.The __________ for the standard deviation statistic are the same __________ used to measure the observations.
#> 28                                                                                                                                  interceptKC..Default.The __________ is a statistic that describes typical variability for a set of observations.
#> 29                                                                                                    interceptKC..Default.The __________ is computed by taking the sum of all the numbers in a set, divided by the count of the numbers in the set.
#> 30                                                                               interceptKC..Default.The __________ of a distribution between a range of values represents the probability that a new observation will fall within that range also.
#> 31                                                                                                  interceptKC..Default.The __________, or average, is a statistic that represents what we expect observations in the sample to be centered around.
#> 32                                                                        interceptKC..Default.The density of a distribution between a __________ of values represents the probability that a new observation will fall within that __________ also.
#> 33                                                                                   interceptKC..Default.The density of a distribution between a range of values represents the __________ that a new observation will fall within that range also.
#> 34                                                                                   interceptKC..Default.The density of a distribution between a range of values represents the probability that a new __________ will fall within that range also.
#> 35                                                                                                   interceptKC..Default.The mean is computed by taking the __________ of all the numbers in a set, divided by the count of the numbers in the set.
#> 36                                                                                                       interceptKC..Default.The mean is computed by taking the sum of all the numbers in a set, __________ by the count of the numbers in the set.
#> 37                                                                                                     interceptKC..Default.The mean is computed by taking the sum of all the numbers in a set, divided by the __________ of the numbers in the set.
#> 38                                                                                             interceptKC..Default.The normal distribution has 68 percent of its observations in the __________ between -1 and 1 standard deviations from the mean.
#> 39                                                                                interceptKC..Default.The normal distribution has 68 percent of its observations in the range between -__________ and __________ standard deviations from the mean.
#> 40                                                                                             interceptKC..Default.The normal distribution has 95 percent of its observations in the __________ between -2 and 2 standard deviations from the mean.
#> 41                                                                                interceptKC..Default.The normal distribution has 95 percent of its observations in the range between -__________ and __________ standard deviations from the mean.
#> 42                                                                                           interceptKC..Default.The normal distribution has 99.7 percent of its observations in the __________ between -3 and 3 standard deviations from the mean.
#> 43                                                                              interceptKC..Default.The normal distribution has 99.7 percent of its observations in the range between -__________ and __________ standard deviations from the mean.
#> 44                                                                                                interceptKC..Default.The sample mean, or average, is a __________ that represents what we expect observations in the sample to be centered around.
#> 45                                                                                             interceptKC..Default.The sample mean, or average, is a statistic that represents what we __________ observations in the sample to be centered around.
#> 46                                                                                                                           interceptKC..Default.The standard deviation is a statistic that describes typical __________ for a set of observations.
#> 47                                                                                                                            interceptKC..Default.The standard deviation is a statistic that describes typical variability for a set of __________.
#> 48                                                                                                                                  interceptKC..Default.The units for the __________ statistic are the same units used to measure the observations.
#> 49                                                                                                                    interceptKC..Default.The units for the standard deviation statistic are the __________ units used to measure the observations.
#> 50                                                                                                                       interceptKC..Default.The units for the standard deviation statistic are the same units used to __________ the observations.
#> 51                                                                                            interceptKC..Default.The variance for a __________ is computed by dividing the sum of the observation variances by 1 less than the total observations.
#> 52                                                                                              interceptKC..Default.The variance for a sample is computed by __________ the sum of the observation variances by 1 less than the total observations.
#> 53                                                                                       interceptKC..Default.The variance for a sample is computed by dividing the sum of the observation variances by __________ less than the total observations.
#> 54                                                                                                                                                  interceptKC..Default.The variance for an observation is the __________ difference from the mean.
#> 55                                                                                                                                                     interceptKC..Default.The variance for an observation is the squared __________ from the mean.
#> 56                                                                                                                                               interceptKC..Default.The variance for an observation is the squared difference from the __________.
#> 57                                                                                                                       interceptKC..Default.Variability refers to how __________ the observations are from each other and sometimes from the mean.
#> 58                                                                                                                          interceptKC..Default.Variability refers to how different the __________ are from each other and sometimes from the mean.
#> 59                                                                                                                  interceptKC..Default.Variability refers to how different the observations are from each other and sometimes from the __________.
#> 60                                                                                                                     interceptKC..Default.When a study __________ human subjects, the sample is the group of people who participated in the study.
#> 61                                                                                                                    interceptKC..Default.When a study involves human __________s, the sample is the group of people who participated in the study.
#> 62                                                                                                                         interceptKC..Default.When a study involves human subjects, the sample is the group of people who __________ in the study.
#> 63                                                                                                                                                                                                                            recency0.3KC..Cluster.
#> 64                                                                                                                                                                                                                            recency0.2KC..Cluster.
#> 65                                                                                                                                                                                                                            propdec0.4KC..Cluster.
#> 66                                                                                                                                                                                                                           logitdec0.1KC..Cluster.
#> 67                                                                                                                                                                                                                                logsucKC..Cluster.
#> 68                                                                                                                                                                                                                                logafmKC..Cluster.
#> 69                                                                                             interceptKC..Cluster.10 The normal distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean.
#> 70                                                                                                                       interceptKC..Cluster.12 The standard deviation is a statistic that describes typical variability for a set of observations.
#> 71                                                                                                                    interceptKC..Cluster.17 When a study involves human subjects, the sample is the group of people who participated in the study.
#> 72                                                                                                                interceptKC..Cluster.21 Although multiple samples are variable they are intended to represent the population from which they come.
#> 73                                                                                                                             interceptKC..Cluster.22 Standard deviation is the square root of the variance, also known as root mean squared error.
#> 74                                                  interceptKC..Cluster.23 Standard deviation refers to how individual observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean.
#> 75                                                                                               interceptKC..Cluster.27 The normal distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean.
#> 76                                                                                                                       interceptKC..Cluster.30 The standard deviation is a statistic that describes typical variability for a set of observations.
#> 77                                                                                                                    interceptKC..Cluster.35 When a study involves human subjects, the sample is the group of people who participated in the study.
#> 78                                                                                                                              interceptKC..Cluster.4 Standard deviation is the square root of the variance, also known as root mean squared error.
#> 79                                                   interceptKC..Cluster.5 Standard deviation refers to how individual observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean.
#> 80                                                                                                        interceptKC..Cluster.7 The mean is computed by taking the sum of all the numbers in a set, divided by the count of the numbers in the set.
#> 81                                                                                                interceptKC..Cluster.9 The normal distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean.
#> 82                                                                                                                                                                                                                         recency0.9Anon.Student.Id
#> 83                                                                                                                                                                                                                         recency0.1Anon.Student.Id
#> 84                                                                                                                                                                                                                         propdec0.9Anon.Student.Id
#> 85                                                                                                                                                                                                                        logitdec0.9Anon.Student.Id
#> 86                                                                                                                                                                                                                           linefailAnon.Student.Id
#> 87                                                                                                                                                                                                                            linesucAnon.Student.Id
#> 88                                                                                                                                                                                                                             logsucAnon.Student.Id
#> 89                                                                                              logfailKC..Cluster.:e$data$KC..Cluster.21 Although multiple samples are variable they are intended to represent the population from which they come.
#> 90                                                                             logfailKC..Cluster.:e$data$KC..Cluster.26 The normal distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean.
#> 91                                                                           logfailKC..Cluster.:e$data$KC..Cluster.28 The normal distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean.
#> 92                                                              logfailKC..Cluster.:e$data$KC..Cluster.6 The density of a distribution between a range of values represents the probability that a new observation will fall within that range also.
#> 93                                                                                      logfailKC..Cluster.:e$data$KC..Cluster.7 The mean is computed by taking the sum of all the numbers in a set, divided by the count of the numbers in the set.
#> 94                                                                              logfailKC..Cluster.:e$data$KC..Cluster.8 The normal distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean.
#> 95                                                                              logfailKC..Cluster.:e$data$KC..Cluster.9 The normal distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean.
#> 96                                                                             e$data$KC..Cluster.11 The sample mean, or average, is a statistic that represents what we expect observations in the sample to be centered around.:logsucKC..Cluster.
#> 97                                                                                                      e$data$KC..Cluster.12 The standard deviation is a statistic that describes typical variability for a set of observations.:logsucKC..Cluster.
#> 98                                                                                                                                 e$data$KC..Cluster.15 The variance for an observation is the squared difference from the mean.:logsucKC..Cluster.
#> 99                                                                                                                                 e$data$KC..Cluster.33 The variance for an observation is the squared difference from the mean.:logsucKC..Cluster.
#>      kept_coefs
#> 1  -1.816826389
#> 2  -0.911123701
#> 3  -1.498668874
#> 4   0.688278758
#> 5   0.121006737
#> 6  -0.793616513
#> 7   0.828945147
#> 8  -0.806109153
#> 9  -0.532510436
#> 10  0.258683284
#> 11  0.463811152
#> 12 -0.496619498
#> 13 -0.385441554
#> 14  1.120294150
#> 15 -0.227145693
#> 16 -0.113991696
#> 17 -0.431914852
#> 18 -1.551580455
#> 19 -0.255718272
#> 20 -0.138747855
#> 21 -0.071578590
#> 22  0.342598246
#> 23  0.210858466
#> 24  0.324670243
#> 25 -0.555573661
#> 26 -0.592673546
#> 27 -0.312019227
#> 28 -0.305971277
#> 29  1.037760962
#> 30 -1.346992118
#> 31  1.825101833
#> 32  1.165152396
#> 33  0.556865002
#> 34 -0.646382304
#> 35  0.533280899
#> 36  0.923574082
#> 37 -0.579962082
#> 38  0.831382390
#> 39  0.466909051
#> 40  0.841298314
#> 41  0.110776090
#> 42  0.959427040
#> 43  0.300997442
#> 44 -0.442927263
#> 45 -0.068528950
#> 46 -1.460167523
#> 47 -0.254359887
#> 48 -1.023589541
#> 49  0.251960060
#> 50  0.409158508
#> 51 -0.519686467
#> 52  0.287016892
#> 53  0.141816467
#> 54 -0.837247651
#> 55 -0.665271667
#> 56  0.603521705
#> 57  0.741991024
#> 58 -0.732473446
#> 59  0.698578805
#> 60  0.351053421
#> 61  0.242872100
#> 62  0.475472058
#> 63  1.008638023
#> 64  3.126992751
#> 65  0.603335279
#> 66  0.023475155
#> 67  0.110862834
#> 68  0.233971454
#> 69  0.117105566
#> 70 -0.152717694
#> 71  0.440187857
#> 72 -0.103523771
#> 73 -0.089118604
#> 74 -0.188481959
#> 75  0.095987311
#> 76 -0.150733075
#> 77  0.541034249
#> 78 -0.111814735
#> 79 -0.273469148
#> 80  0.061091158
#> 81  0.069973196
#> 82 -0.030496971
#> 83  0.548870546
#> 84  0.029286700
#> 85  0.317994860
#> 86 -0.016550235
#> 87  0.006324641
#> 88  0.038868303
#> 89 -0.002204814
#> 90  0.055460301
#> 91  0.108243841
#> 92 -0.113197517
#> 93  0.063151094
#> 94  0.182769186
#> 95  0.137566230
#> 96 -0.010039018
#> 97 -0.024184304
#> 98 -0.003734224
#> 99 -0.014543091


#New model features from different lambda penalty term
new_lambda = which.min(abs(LASSO_Model$n_features-150))
target_coefs = coef(LASSO_Model$fit, s = LASSO_Model$fit$lambda[new_lambda])
kept_features = rownames(target_coefs)[which(!(target_coefs==0))]
kept_coefs = target_coefs[which(!(target_coefs==0))]
kept_features
#>   [1] "(Intercept)"                                                                                                                                                                                                                                                      
#>   [2] "interceptKC..Default.__________ refers to how different the observations are from each other and sometimes from the mean."                                                                                                                                        
#>   [3] "interceptKC..Default.A __________ describes a characteristic of a sample."                                                                                                                                                                                        
#>   [4] "interceptKC..Default.A __________ describes the likelihood that observations will occur within any range of values."                                                                                                                                              
#>   [5] "interceptKC..Default.A __________ distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is equal to the probability of the range between 0 and 1."                 
#>   [6] "interceptKC..Default.A distribution describes the __________ that observations will occur within any range of values."                                                                                                                                            
#>   [7] "interceptKC..Default.A distribution describes the likelihood that __________ will occur within any range of values."                                                                                                                                              
#>   [8] "interceptKC..Default.A distribution describes the likelihood that observations will occur within any __________ of values."                                                                                                                                       
#>   [9] "interceptKC..Default.A normal distribution, the most common natural distribution, is __________, so for example, the probability of observing a value in the range between -1 and 0 is equal to the probability of the range between 0 and 1."                    
#>  [10] "interceptKC..Default.A normal distribution, the most common natural distribution, is symmetric, so for example, the __________ of observing a value in the range between -1 and 0 is equal to the __________ of the range between 0 and 1."                       
#>  [11] "interceptKC..Default.A normal distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is __________ to the probability of the range between 0 and 1."                
#>  [12] "interceptKC..Default.A statistic __________ a characteristic of a sample."                                                                                                                                                                                        
#>  [13] "interceptKC..Default.A statistic describes a __________ of a sample."                                                                                                                                                                                             
#>  [14] "interceptKC..Default.Although multiple samples are __________ they are intended to represent the population from which they come."                                                                                                                                
#>  [15] "interceptKC..Default.Although multiple samples are variable they are intended to __________ the population from which they come."                                                                                                                                 
#>  [16] "interceptKC..Default.Although multiple samples are variable they are intended to represent the __________ from which they come."                                                                                                                                  
#>  [17] "interceptKC..Default.Standard deviation is the __________ of the variance, also known as root mean squared error."                                                                                                                                                
#>  [18] "interceptKC..Default.Standard deviation is the square root of the __________, also known as root mean squared error."                                                                                                                                             
#>  [19] "interceptKC..Default.Standard deviation is the square root of the variance, also known as __________."                                                                                                                                                            
#>  [20] "interceptKC..Default.Standard deviation refers to how __________ observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean."                                                                    
#>  [21] "interceptKC..Default.Standard deviation refers to how individual observations vary from the mean, while __________ refers to how multiple observations (i.e. samples) vary from the mean."                                                                        
#>  [22] "interceptKC..Default.Standard deviation refers to how individual observations vary from the mean, while standard error refers to how __________ observations (i.e. samples) vary from the mean."                                                                  
#>  [23] "interceptKC..Default.The __________ distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean."                                                                                                             
#>  [24] "interceptKC..Default.The __________ distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean."                                                                                                             
#>  [25] "interceptKC..Default.The __________ distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean."                                                                                                           
#>  [26] "interceptKC..Default.The __________ for a sample is computed by dividing the sum of the observation __________s by 1 less than the total observations."                                                                                                           
#>  [27] "interceptKC..Default.The __________ for an observation is the squared difference from the mean."                                                                                                                                                                  
#>  [28] "interceptKC..Default.The __________ for the standard deviation statistic are the same __________ used to measure the observations."                                                                                                                               
#>  [29] "interceptKC..Default.The __________ is a statistic that describes typical variability for a set of observations."                                                                                                                                                 
#>  [30] "interceptKC..Default.The __________ is computed by taking the sum of all the numbers in a set, divided by the count of the numbers in the set."                                                                                                                   
#>  [31] "interceptKC..Default.The __________ of a distribution between a range of values represents the probability that a new observation will fall within that range also."                                                                                              
#>  [32] "interceptKC..Default.The __________, or average, is a statistic that represents what we expect observations in the sample to be centered around."                                                                                                                 
#>  [33] "interceptKC..Default.The density of a distribution between a __________ of values represents the probability that a new observation will fall within that __________ also."                                                                                       
#>  [34] "interceptKC..Default.The density of a distribution between a range of values represents the __________ that a new observation will fall within that range also."                                                                                                  
#>  [35] "interceptKC..Default.The density of a distribution between a range of values represents the probability that a new __________ will fall within that range also."                                                                                                  
#>  [36] "interceptKC..Default.The mean is computed by taking the __________ of all the numbers in a set, divided by the count of the numbers in the set."                                                                                                                  
#>  [37] "interceptKC..Default.The mean is computed by taking the sum of all the numbers in a set, __________ by the count of the numbers in the set."                                                                                                                      
#>  [38] "interceptKC..Default.The mean is computed by taking the sum of all the numbers in a set, divided by the __________ of the numbers in the set."                                                                                                                    
#>  [39] "interceptKC..Default.The normal distribution has __________ percent of its observations in the range between -1 and 1 standard deviations from the mean."                                                                                                         
#>  [40] "interceptKC..Default.The normal distribution has 68 percent of its observations in the __________ between -1 and 1 standard deviations from the mean."                                                                                                            
#>  [41] "interceptKC..Default.The normal distribution has 68 percent of its observations in the range between -__________ and __________ standard deviations from the mean."                                                                                               
#>  [42] "interceptKC..Default.The normal distribution has 95 percent of its observations in the __________ between -2 and 2 standard deviations from the mean."                                                                                                            
#>  [43] "interceptKC..Default.The normal distribution has 95 percent of its observations in the range between -__________ and __________ standard deviations from the mean."                                                                                               
#>  [44] "interceptKC..Default.The normal distribution has 99.7 percent of its observations in the __________ between -3 and 3 standard deviations from the mean."                                                                                                          
#>  [45] "interceptKC..Default.The normal distribution has 99.7 percent of its observations in the range between -__________ and __________ standard deviations from the mean."                                                                                             
#>  [46] "interceptKC..Default.The sample mean, or average, is a __________ that represents what we expect observations in the sample to be centered around."                                                                                                               
#>  [47] "interceptKC..Default.The sample mean, or average, is a statistic that represents what we __________ observations in the sample to be centered around."                                                                                                            
#>  [48] "interceptKC..Default.The standard deviation is a statistic that describes typical __________ for a set of observations."                                                                                                                                          
#>  [49] "interceptKC..Default.The standard deviation is a statistic that describes typical variability for a set of __________."                                                                                                                                           
#>  [50] "interceptKC..Default.The units for the __________ statistic are the same units used to measure the observations."                                                                                                                                                 
#>  [51] "interceptKC..Default.The units for the standard deviation statistic are the __________ units used to measure the observations."                                                                                                                                   
#>  [52] "interceptKC..Default.The units for the standard deviation statistic are the same units used to __________ the observations."                                                                                                                                      
#>  [53] "interceptKC..Default.The variance for a __________ is computed by dividing the sum of the observation variances by 1 less than the total observations."                                                                                                           
#>  [54] "interceptKC..Default.The variance for a sample is computed by __________ the sum of the observation variances by 1 less than the total observations."                                                                                                             
#>  [55] "interceptKC..Default.The variance for a sample is computed by dividing the sum of the observation variances by __________ less than the total observations."                                                                                                      
#>  [56] "interceptKC..Default.The variance for an observation is the __________ difference from the mean."                                                                                                                                                                 
#>  [57] "interceptKC..Default.The variance for an observation is the squared __________ from the mean."                                                                                                                                                                    
#>  [58] "interceptKC..Default.The variance for an observation is the squared difference from the __________."                                                                                                                                                              
#>  [59] "interceptKC..Default.Variability refers to how __________ the observations are from each other and sometimes from the mean."                                                                                                                                      
#>  [60] "interceptKC..Default.Variability refers to how different the __________ are from each other and sometimes from the mean."                                                                                                                                         
#>  [61] "interceptKC..Default.Variability refers to how different the observations are from each other and sometimes from the __________."                                                                                                                                 
#>  [62] "interceptKC..Default.When a study __________ human subjects, the sample is the group of people who participated in the study."                                                                                                                                    
#>  [63] "interceptKC..Default.When a study involves human __________s, the sample is the group of people who participated in the study."                                                                                                                                   
#>  [64] "interceptKC..Default.When a study involves human subjects, the sample is the group of people who __________ in the study."                                                                                                                                        
#>  [65] "recency0.9KC..Cluster."                                                                                                                                                                                                                                           
#>  [66] "recency0.3KC..Cluster."                                                                                                                                                                                                                                           
#>  [67] "recency0.2KC..Cluster."                                                                                                                                                                                                                                           
#>  [68] "propdec0.5KC..Cluster."                                                                                                                                                                                                                                           
#>  [69] "propdec0.4KC..Cluster."                                                                                                                                                                                                                                           
#>  [70] "logitdec0.1KC..Cluster."                                                                                                                                                                                                                                          
#>  [71] "linefailKC..Cluster."                                                                                                                                                                                                                                             
#>  [72] "logfailKC..Cluster."                                                                                                                                                                                                                                              
#>  [73] "logafmKC..Cluster."                                                                                                                                                                                                                                               
#>  [74] "interceptKC..Cluster.10 The normal distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean."                                                                                                            
#>  [75] "interceptKC..Cluster.12 The standard deviation is a statistic that describes typical variability for a set of observations."                                                                                                                                      
#>  [76] "interceptKC..Cluster.17 When a study involves human subjects, the sample is the group of people who participated in the study."                                                                                                                                   
#>  [77] "interceptKC..Cluster.20 A statistic describes a characteristic of a sample."                                                                                                                                                                                      
#>  [78] "interceptKC..Cluster.21 Although multiple samples are variable they are intended to represent the population from which they come."                                                                                                                               
#>  [79] "interceptKC..Cluster.22 Standard deviation is the square root of the variance, also known as root mean squared error."                                                                                                                                            
#>  [80] "interceptKC..Cluster.23 Standard deviation refers to how individual observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean."                                                                 
#>  [81] "interceptKC..Cluster.24 The density of a distribution between a range of values represents the probability that a new observation will fall within that range also."                                                                                              
#>  [82] "interceptKC..Cluster.27 The normal distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean."                                                                                                              
#>  [83] "interceptKC..Cluster.28 The normal distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean."                                                                                                            
#>  [84] "interceptKC..Cluster.29 The sample mean, or average, is a statistic that represents what we expect observations in the sample to be centered around."                                                                                                             
#>  [85] "interceptKC..Cluster.30 The standard deviation is a statistic that describes typical variability for a set of observations."                                                                                                                                      
#>  [86] "interceptKC..Cluster.32 The variance for a sample is computed by dividing the sum of the observation variances by 1 less than the total observations."                                                                                                            
#>  [87] "interceptKC..Cluster.33 The variance for an observation is the squared difference from the mean."                                                                                                                                                                 
#>  [88] "interceptKC..Cluster.35 When a study involves human subjects, the sample is the group of people who participated in the study."                                                                                                                                   
#>  [89] "interceptKC..Cluster.4 Standard deviation is the square root of the variance, also known as root mean squared error."                                                                                                                                             
#>  [90] "interceptKC..Cluster.5 Standard deviation refers to how individual observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean."                                                                  
#>  [91] "interceptKC..Cluster.7 The mean is computed by taking the sum of all the numbers in a set, divided by the count of the numbers in the set."                                                                                                                       
#>  [92] "interceptKC..Cluster.8 The normal distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean."                                                                                                               
#>  [93] "interceptKC..Cluster.9 The normal distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean."                                                                                                               
#>  [94] "recency0.8Anon.Student.Id"                                                                                                                                                                                                                                        
#>  [95] "recency0.7Anon.Student.Id"                                                                                                                                                                                                                                        
#>  [96] "recency0.6Anon.Student.Id"                                                                                                                                                                                                                                        
#>  [97] "recency0.1Anon.Student.Id"                                                                                                                                                                                                                                        
#>  [98] "logitdec0.9Anon.Student.Id"                                                                                                                                                                                                                                       
#>  [99] "logitdec0.8Anon.Student.Id"                                                                                                                                                                                                                                       
#> [100] "linefailAnon.Student.Id"                                                                                                                                                                                                                                          
#> [101] "linesucAnon.Student.Id"                                                                                                                                                                                                                                           
#> [102] "logfailAnon.Student.Id"                                                                                                                                                                                                                                           
#> [103] "logsucAnon.Student.Id"                                                                                                                                                                                                                                            
#> [104] "logfailKC..Cluster.:e$data$KC..Cluster.14 The variance for a sample is computed by dividing the sum of the observation variances by 1 less than the total observations."                                                                                          
#> [105] "logfailKC..Cluster.:e$data$KC..Cluster.15 The variance for an observation is the squared difference from the mean."                                                                                                                                               
#> [106] "logfailKC..Cluster.:e$data$KC..Cluster.16 Variability refers to how different the observations are from each other and sometimes from the mean."                                                                                                                  
#> [107] "logfailKC..Cluster.:e$data$KC..Cluster.18 A distribution describes the likelihood that observations will occur within any range of values."                                                                                                                       
#> [108] "logfailKC..Cluster.:e$data$KC..Cluster.19 A normal distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is equal to the probability of the range between 0 and 1."
#> [109] "logfailKC..Cluster.:e$data$KC..Cluster.2 A statistic describes a characteristic of a sample."                                                                                                                                                                     
#> [110] "logfailKC..Cluster.:e$data$KC..Cluster.20 A statistic describes a characteristic of a sample."                                                                                                                                                                    
#> [111] "logfailKC..Cluster.:e$data$KC..Cluster.21 Although multiple samples are variable they are intended to represent the population from which they come."                                                                                                             
#> [112] "logfailKC..Cluster.:e$data$KC..Cluster.24 The density of a distribution between a range of values represents the probability that a new observation will fall within that range also."                                                                            
#> [113] "logfailKC..Cluster.:e$data$KC..Cluster.25 The mean is computed by taking the sum of all the numbers in a set, divided by the count of the numbers in the set."                                                                                                    
#> [114] "logfailKC..Cluster.:e$data$KC..Cluster.26 The normal distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean."                                                                                            
#> [115] "logfailKC..Cluster.:e$data$KC..Cluster.28 The normal distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean."                                                                                          
#> [116] "logfailKC..Cluster.:e$data$KC..Cluster.3 Although multiple samples are variable they are intended to represent the population from which they come."                                                                                                              
#> [117] "logfailKC..Cluster.:e$data$KC..Cluster.31 The units for the standard deviation statistic are the same units used to measure the observations."                                                                                                                    
#> [118] "logfailKC..Cluster.:e$data$KC..Cluster.32 The variance for a sample is computed by dividing the sum of the observation variances by 1 less than the total observations."                                                                                          
#> [119] "logfailKC..Cluster.:e$data$KC..Cluster.34 Variability refers to how different the observations are from each other and sometimes from the mean."                                                                                                                  
#> [120] "logfailKC..Cluster.:e$data$KC..Cluster.35 When a study involves human subjects, the sample is the group of people who participated in the study."                                                                                                                 
#> [121] "logfailKC..Cluster.:e$data$KC..Cluster.4 Standard deviation is the square root of the variance, also known as root mean squared error."                                                                                                                           
#> [122] "logfailKC..Cluster.:e$data$KC..Cluster.5 Standard deviation refers to how individual observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean."                                                
#> [123] "logfailKC..Cluster.:e$data$KC..Cluster.6 The density of a distribution between a range of values represents the probability that a new observation will fall within that range also."                                                                             
#> [124] "logfailKC..Cluster.:e$data$KC..Cluster.7 The mean is computed by taking the sum of all the numbers in a set, divided by the count of the numbers in the set."                                                                                                     
#> [125] "logfailKC..Cluster.:e$data$KC..Cluster.8 The normal distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean."                                                                                             
#> [126] "logfailKC..Cluster.:e$data$KC..Cluster.9 The normal distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean."                                                                                             
#> [127] "e$data$KC..Cluster.1 A normal distribution, the most common natural distribution, is symmetric, so for example, the probability of observing a value in the range between -1 and 0 is equal to the probability of the range between 0 and 1.:logsucKC..Cluster."  
#> [128] "e$data$KC..Cluster.10 The normal distribution has 99.7 percent of its observations in the range between -3 and 3 standard deviations from the mean.:logsucKC..Cluster."                                                                                           
#> [129] "e$data$KC..Cluster.11 The sample mean, or average, is a statistic that represents what we expect observations in the sample to be centered around.:logsucKC..Cluster."                                                                                            
#> [130] "e$data$KC..Cluster.12 The standard deviation is a statistic that describes typical variability for a set of observations.:logsucKC..Cluster."                                                                                                                     
#> [131] "e$data$KC..Cluster.14 The variance for a sample is computed by dividing the sum of the observation variances by 1 less than the total observations.:logsucKC..Cluster."                                                                                           
#> [132] "e$data$KC..Cluster.15 The variance for an observation is the squared difference from the mean.:logsucKC..Cluster."                                                                                                                                                
#> [133] "e$data$KC..Cluster.17 When a study involves human subjects, the sample is the group of people who participated in the study.:logsucKC..Cluster."                                                                                                                  
#> [134] "e$data$KC..Cluster.2 A statistic describes a characteristic of a sample.:logsucKC..Cluster."                                                                                                                                                                      
#> [135] "e$data$KC..Cluster.20 A statistic describes a characteristic of a sample.:logsucKC..Cluster."                                                                                                                                                                     
#> [136] "e$data$KC..Cluster.22 Standard deviation is the square root of the variance, also known as root mean squared error.:logsucKC..Cluster."                                                                                                                           
#> [137] "e$data$KC..Cluster.23 Standard deviation refers to how individual observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean.:logsucKC..Cluster."                                                
#> [138] "e$data$KC..Cluster.25 The mean is computed by taking the sum of all the numbers in a set, divided by the count of the numbers in the set.:logsucKC..Cluster."                                                                                                     
#> [139] "e$data$KC..Cluster.26 The normal distribution has 68 percent of its observations in the range between -1 and 1 standard deviations from the mean.:logsucKC..Cluster."                                                                                             
#> [140] "e$data$KC..Cluster.27 The normal distribution has 95 percent of its observations in the range between -2 and 2 standard deviations from the mean.:logsucKC..Cluster."                                                                                             
#> [141] "e$data$KC..Cluster.29 The sample mean, or average, is a statistic that represents what we expect observations in the sample to be centered around.:logsucKC..Cluster."                                                                                            
#> [142] "e$data$KC..Cluster.30 The standard deviation is a statistic that describes typical variability for a set of observations.:logsucKC..Cluster."                                                                                                                     
#> [143] "e$data$KC..Cluster.31 The units for the standard deviation statistic are the same units used to measure the observations.:logsucKC..Cluster."                                                                                                                     
#> [144] "e$data$KC..Cluster.32 The variance for a sample is computed by dividing the sum of the observation variances by 1 less than the total observations.:logsucKC..Cluster."                                                                                           
#> [145] "e$data$KC..Cluster.33 The variance for an observation is the squared difference from the mean.:logsucKC..Cluster."                                                                                                                                                
#> [146] "e$data$KC..Cluster.34 Variability refers to how different the observations are from each other and sometimes from the mean.:logsucKC..Cluster."                                                                                                                   
#> [147] "e$data$KC..Cluster.4 Standard deviation is the square root of the variance, also known as root mean squared error.:logsucKC..Cluster."                                                                                                                            
#> [148] "e$data$KC..Cluster.5 Standard deviation refers to how individual observations vary from the mean, while standard error refers to how multiple observations (i.e. samples) vary from the mean.:logsucKC..Cluster."

example of LASSOLKT with preset

    modelob <- LASSOLKTModel(
      data = val, gridpars=(1:9)/10,interc = F,
      allcomponents = c("Anon.Student.Id","KC..Default."),
      preset = "PFA",target_n = 5)
#> Error in LASSOLKTModel(data = val, gridpars = (1:9)/10, interc = F, allcomponents = c("Anon.Student.Id", : unused argument (interc = F)



str(modelob)
#> List of 12
#>  $ model        :List of 6
#>   ..$ TypeDetail: chr "L2-regularized logistic regression primal (L2R_LR)"
#>   ..$ Type      : num 0
#>   ..$ W         : num [1, 1:2] 0.591 0.883
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : NULL
#>   .. .. ..$ : chr [1:2] "W1" "W2"
#>   ..$ Bias      : num 0
#>   ..$ ClassNames: int [1:2] 0 1
#>   ..$ NbClass   : int 2
#>   ..- attr(*, "class")= chr "LiblineaR"
#>  $ coefs        : num [1:2, 1] 0.591 0.883
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:2] "(Intercept)" "logitdecKC..MATHia."
#>   .. ..$ : chr "coefficient"
#>  $ r2           : num 0.158
#>  $ prediction   : num [1:49521] NA NA NA NA NA NA NA NA NA NA ...
#>  $ nullmodel    :List of 30
#>   ..$ coefficients     : Named num 0.931
#>   .. ..- attr(*, "names")= chr "(Intercept)"
#>   ..$ residuals        : Named num [1:9915] -3.54 1.39 -3.54 -3.54 1.39 ...
#>   .. ..- attr(*, "names")= chr [1:9915] "1" "2" "3" "4" ...
#>   ..$ fitted.values    : Named num [1:9915] 0.717 0.717 0.717 0.717 0.717 ...
#>   .. ..- attr(*, "names")= chr [1:9915] "1" "2" "3" "4" ...
#>   ..$ effects          : Named num [1:9915] -41.732 0.644 -1.577 -1.577 0.644 ...
#>   .. ..- attr(*, "names")= chr [1:9915] "(Intercept)" "" "" "" ...
#>   ..$ R                : num [1, 1] -44.8
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr "(Intercept)"
#>   .. .. ..$ : chr "(Intercept)"
#>   ..$ rank             : int 1
#>   ..$ qr               :List of 5
#>   .. ..$ qr   : num [1:9915, 1] -44.84 0.01 0.01 0.01 0.01 ...
#>   .. .. ..- attr(*, "dimnames")=List of 2
#>   .. .. .. ..$ : chr [1:9915] "1" "2" "3" "4" ...
#>   .. .. .. ..$ : chr "(Intercept)"
#>   .. ..$ rank : int 1
#>   .. ..$ qraux: num 1.01
#>   .. ..$ pivot: int 1
#>   .. ..$ tol  : num 1e-11
#>   .. ..- attr(*, "class")= chr "qr"
#>   ..$ family           :List of 13
#>   .. ..$ family    : chr "binomial"
#>   .. ..$ link      : chr "logit"
#>   .. ..$ linkfun   :function (mu)  
#>   .. ..$ linkinv   :function (eta)  
#>   .. ..$ variance  :function (mu)  
#>   .. ..$ dev.resids:function (y, mu, wt)  
#>   .. ..$ aic       :function (y, n, mu, wt, dev)  
#>   .. ..$ mu.eta    :function (eta)  
#>   .. ..$ initialize: language {     if (NCOL(y) == 1) { ...
#>   .. ..$ validmu   :function (mu)  
#>   .. ..$ valideta  :function (eta)  
#>   .. ..$ simulate  :function (object, nsim)  
#>   .. ..$ dispersion: num 1
#>   .. ..- attr(*, "class")= chr "family"
#>   ..$ linear.predictors: Named num [1:9915] 0.931 0.931 0.931 0.931 0.931 ...
#>   .. ..- attr(*, "names")= chr [1:9915] "1" "2" "3" "4" ...
#>   ..$ deviance         : num 11810
#>   ..$ aic              : num 11812
#>   ..$ null.deviance    : num 11810
#>   ..$ iter             : int 4
#>   ..$ weights          : Named num [1:9915] 0.203 0.203 0.203 0.203 0.203 ...
#>   .. ..- attr(*, "names")= chr [1:9915] "1" "2" "3" "4" ...
#>   ..$ prior.weights    : Named num [1:9915] 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..- attr(*, "names")= chr [1:9915] "1" "2" "3" "4" ...
#>   ..$ df.residual      : int 9914
#>   ..$ df.null          : int 9914
#>   ..$ y                : Named num [1:9915] 0 1 0 0 1 1 1 0 0 0 ...
#>   .. ..- attr(*, "names")= chr [1:9915] "1" "2" "3" "4" ...
#>   ..$ converged        : logi TRUE
#>   ..$ boundary         : logi FALSE
#>   ..$ model            :'data.frame':    9915 obs. of  1 variable:
#>   .. ..$ CF..ansbin.: num [1:9915] 0 1 0 0 1 1 1 0 0 0 ...
#>   .. ..- attr(*, "terms")=Classes 'terms', 'formula'  language CF..ansbin. ~ 1
#>   .. .. .. ..- attr(*, "variables")= language list(CF..ansbin.)
#>   .. .. .. ..- attr(*, "factors")= int(0) 
#>   .. .. .. ..- attr(*, "term.labels")= chr(0) 
#>   .. .. .. ..- attr(*, "order")= int(0) 
#>   .. .. .. ..- attr(*, "intercept")= int 1
#>   .. .. .. ..- attr(*, "response")= int 1
#>   .. .. .. ..- attr(*, ".Environment")=<environment: 0x00000279a5d46f90> 
#>   .. .. .. ..- attr(*, "predvars")= language list(CF..ansbin.)
#>   .. .. .. ..- attr(*, "dataClasses")= Named chr "numeric"
#>   .. .. .. .. ..- attr(*, "names")= chr "CF..ansbin."
#>   ..$ call             : language glm(formula = as.formula(paste("CF..ansbin.~ 1", sep = "")), family = binomial(logit),      data = e$data[e$data$| __truncated__
#>   ..$ formula          :Class 'formula'  language CF..ansbin. ~ 1
#>   .. .. ..- attr(*, ".Environment")=<environment: 0x00000279a5d46f90> 
#>   ..$ terms            :Classes 'terms', 'formula'  language CF..ansbin. ~ 1
#>   .. .. ..- attr(*, "variables")= language list(CF..ansbin.)
#>   .. .. ..- attr(*, "factors")= int(0) 
#>   .. .. ..- attr(*, "term.labels")= chr(0) 
#>   .. .. ..- attr(*, "order")= int(0) 
#>   .. .. ..- attr(*, "intercept")= int 1
#>   .. .. ..- attr(*, "response")= int 1
#>   .. .. ..- attr(*, ".Environment")=<environment: 0x00000279a5d46f90> 
#>   .. .. ..- attr(*, "predvars")= language list(CF..ansbin.)
#>   .. .. ..- attr(*, "dataClasses")= Named chr "numeric"
#>   .. .. .. ..- attr(*, "names")= chr "CF..ansbin."
#>   ..$ data             :Classes 'data.table' and 'data.frame':   9915 obs. of  72 variables:
#>   .. ..$ Row                           : int [1:9915] 1598 1600 1601 1609 1618 1619 1620 1621 1632 1634 ...
#>   .. ..$ Sample.Name                   : chr [1:9915] "All Data" "All Data" "All Data" "All Data" ...
#>   .. ..$ Transaction.Id                : chr [1:9915] "f0235f6869afc0b60e629f909b4e05c3" "68fd9e921521e24dc7373f445aeb5b92" "81fa4f800a583bf8be3d4a1b2e7b9d60" "58801273327f7d702031ae3ed91ce112" ...
#>   .. ..$ Anon.Student.Id               : chr [1:9915] "01dad350-985b-4ff0-8182-4a7d4606c5ec" "01dad350-985b-4ff0-8182-4a7d4606c5ec" "01dad350-985b-4ff0-8182-4a7d4606c5ec" "01dad350-985b-4ff0-8182-4a7d4606c5ec" ...
#>   .. ..$ Session.Id                    : chr [1:9915] "no_session_tracking" "no_session_tracking" "no_session_tracking" "no_session_tracking" ...
#>   .. ..$ Time                          : chr [1:9915] "2020-01-06 11:34:13" "2020-01-06 11:34:34" "2020-01-06 11:35:27" "2020-01-06 11:38:11" ...
#>   .. ..$ Time.Zone                     : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ Duration..sec.                : chr [1:9915] "0" "7" "13.25" "47" ...
#>   .. ..$ Student.Response.Type         : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ Student.Response.Subtype      : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ Tutor.Response.Type           : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ Tutor.Response.Subtype        : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ Level..Workspace.Id.          : chr [1:9915] "worksheet_grapher_a1_patterns_2step_expr" "worksheet_grapher_a1_patterns_2step_expr" "worksheet_grapher_a1_patterns_2step_expr" "worksheet_grapher_a1_patterns_2step_expr" ...
#>   .. ..$ Problem.Name                  : chr [1:9915] "worksheet_grapher_a1_patterns_2step_expr_088" "worksheet_grapher_a1_patterns_2step_expr_088" "worksheet_grapher_a1_patterns_2step_expr_088" "worksheet_grapher_a1_patterns_2step_expr_088" ...
#>   .. ..$ Problem.View                  : int [1:9915] 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ Problem.Start.Time            : chr [1:9915] "2020-01-06 11:34:13" "2020-01-06 11:34:13" "2020-01-06 11:34:13" "2020-01-06 11:34:13" ...
#>   .. ..$ Step.Name                     : chr [1:9915] "Unit-Dep" "Unit-Indep" "Q1-Indep" "Q1-Dep" ...
#>   .. ..$ Attempt.At.Step               : int [1:9915] 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ Is.Last.Attempt               : int [1:9915] 0 1 0 0 1 1 1 0 0 0 ...
#>   .. ..$ Outcome                       : chr [1:9915] "INCORRECT" "CORRECT" "INCORRECT" "INCORRECT" ...
#>   .. ..$ Selection                     : chr [1:9915] "" "" "" "" ...
#>   .. ..$ Action                        : chr [1:9915] "Attempt" "Attempt" "Hint Request" "Attempt" ...
#>   .. ..$ Input                         : chr [1:9915] "\"{\"\"escape-in-messages\"\" : \"\"false\"\", \"\"value\"\" : \"\"2,235\"\"}\"" "\"{\"\"escape-in-messages\"\" : \"\"false\"\", \"\"value\"\" : \"\"hour\"\"}\"" "" "\"{\"\"escape-in-messages\"\" : \"\"false\"\", \"\"value\"\" : \"\"32\"\"}\"" ...
#>   .. ..$ Feedback.Text                 : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ Feedback.Classification       : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ Help.Level                    : int [1:9915] 0 0 1 0 0 0 0 0 0 1 ...
#>   .. ..$ Total.Num.Hints               : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ KC..MATHia.                   : chr [1:9915] "identifying units-1" "identifying units-1" "enter given, reading numerals-1" "find y, any form-1" ...
#>   .. ..$ KC.Category..MATHia.          : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ KC..Single.KC.                : chr [1:9915] "Single-KC" "Single-KC" "Single-KC" "Single-KC" ...
#>   .. ..$ KC.Category..Single.KC.       : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ KC..Unique.step.              : chr [1:9915] "KC433" "KC140" "KC97" "KC272" ...
#>   .. ..$ KC.Category..Unique.step.     : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ School                        : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ Class                         : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ CF..Etalon.                   : chr [1:9915] "miles" "hours" "2" "45" ...
#>   .. ..$ CF..Ruleid.                   : chr [1:9915] "Hints" "" "numeric value given - requires unit conversion" "forgot intercept" ...
#>   .. ..$ CF..Semantic.Event.Id.        : chr [1:9915] "ac0eea0c-b10d-4ed6-8bd2-d1cea9b2f9fb" "3ccc2099-25de-4de5-b2ec-5eb0af815d70" "7c9eb2e9-eab9-45c5-93dc-98a7a1cb3c4f" "6e7c5c1f-d6d1-4200-9a08-0828307bc2f3" ...
#>   .. ..$ CF..Skill.New.p.Known.        : num [1:9915] 0.244 0.601 0.1 0.28 0.46 ...
#>   .. ..$ CF..Skill.Previous.p.Known.   : num [1:9915] 0.25 0.244 0.1 0.1 0.1 ...
#>   .. ..$ CF..Workspace.Progress.Status.: chr [1:9915] "GRADUATED" "GRADUATED" "GRADUATED" "GRADUATED" ...
#>   .. ..$ Event.Type                    : logi [1:9915] NA NA NA NA NA NA ...
#>   .. ..$ CF..Time.                     : num [1:9915] 1.58e+09 1.58e+09 1.58e+09 1.58e+09 1.58e+09 ...
#>   .. ..$ CF..ansbin.                   : num [1:9915] 0 1 0 0 1 1 1 0 0 0 ...
#>   .. ..$ fold                          : num [1:9915] 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ CF..reltime.                  : num [1:9915] 0 0 7 20.2 67.2 ...
#>   .. ..$ index                         : chr [1:9915] "identifying units-101dad350-985b-4ff0-8182-4a7d4606c5ec" "identifying units-101dad350-985b-4ff0-8182-4a7d4606c5ec" "enter given, reading numerals-101dad350-985b-4ff0-8182-4a7d4606c5ec" "find y, any form-101dad350-985b-4ff0-8182-4a7d4606c5ec" ...
#>   .. ..$ KC..MATHia.spacing            : num [1:9915] 0 21 0 0 0 ...
#>   .. ..$ KC..MATHia.relspacing         : num [1:9915] 0 0 0 0 0 72 0 0 46 669 ...
#>   .. ..$ KC..MATHia.prev               : num [1:9915] 0 0 0 0 0 0 0 0 1 1 ...
#>   .. ..$ KC..MATHia.meanspacing        : num [1:9915] 0 -1 0 0 0 -1 0 0 159 -1 ...
#>   .. ..$ KC..MATHia.relmeanspacing     : num [1:9915] 0 -1 0 0 0 -1 0 0 72 -1 ...
#>   .. ..$ KC..MATHia.spacinglagged      : num [1:9915] 0 0 0 0 0 0 0 0 159 0 ...
#>   .. ..$ Problem.Namespacing           : num [1:9915] 0 21 53 164 139 ...
#>   .. ..$ Problem.Namerelspacing        : num [1:9915] 0 0 7 13.2 47 ...
#>   .. ..$ Problem.Nameprev              : num [1:9915] 0 0 1 0 0 1 1 1 0 0 ...
#>   .. ..$ Problem.Namemeanspacing       : num [1:9915] 0 -1 21 37 79.3 ...
#>   .. ..$ Problem.Namerelmeanspacing    : num [1:9915] 0 -1 0 3.5 6.75 ...
#>   .. ..$ Problem.Namespacinglagged     : num [1:9915] 0 0 21 53 164 139 20 24 0 0 ...
#>   .. ..$ Anon.Student.Idspacing        : num [1:9915] 0 21 53 164 139 ...
#>   .. ..$ Anon.Student.Idrelspacing     : num [1:9915] 0 0 7 13.2 47 ...
#>   .. ..$ Anon.Student.Idprev           : num [1:9915] 0 0 1 0 0 1 1 1 0 0 ...
#>   .. ..$ Anon.Student.Idmeanspacing    : num [1:9915] 0 -1 21 37 79.3 ...
#>   .. ..$ Anon.Student.Idrelmeanspacing : num [1:9915] 0 -1 0 3.5 6.75 ...
#>   .. ..$ Anon.Student.Idspacinglagged  : num [1:9915] 0 0 21 53 164 139 20 24 4 669 ...
#>   .. ..$ indexcomp                     : chr [1:9915] "identifying units-1" "identifying units-1" "enter given, reading numerals-1" "find y, any form-1" ...
#>   .. ..$ temp                          : int [1:9915] 0 1 0 0 0 1 0 0 1 0 ...
#>   .. ..$ cor                           : int [1:9915] 0 0 0 0 0 0 0 0 1 1 ...
#>   .. ..$ icor                          : int [1:9915] 0 1 0 0 0 1 0 0 1 0 ...
#>   .. ..$ logitdecKC..MATHia.           : num [1:9915] 0 -0.734 0 0 0 ...
#>   .. ..$ pred                          : num [1:9915] 0.644 0.486 0.644 0.644 0.644 ...
#>   .. ..$ curvefeat                     : num [1:9915] 0.644 0.486 0.644 0.644 0.644 ...
#>   .. ..- attr(*, ".internal.selfref")=<externalptr> 
#>   ..$ offset           : NULL
#>   ..$ control          :List of 3
#>   .. ..$ epsilon: num 1e-08
#>   .. ..$ maxit  : num 25
#>   .. ..$ trace  : logi FALSE
#>   ..$ method           : chr "glm.fit"
#>   ..$ contrasts        : NULL
#>   ..$ xlevels          : NULL
#>   ..- attr(*, "class")= chr [1:2] "glm" "lm"
#>  $ latencymodel : NULL
#>  $ optimizedpars:List of 5
#>   ..$ par        : num 0.924
#>   ..$ value      : num 4975
#>   ..$ counts     : Named int [1:2] 6 6
#>   .. ..- attr(*, "names")= chr [1:2] "function" "gradient"
#>   ..$ convergence: int 0
#>   ..$ message    : chr "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
#>  $ studentRMSE  :'data.frame':   500 obs. of  2 variables:
#>   ..$ Group.1: chr [1:500] "0046f7a1-59e2-49ba-aeb6-777376485104" "00c9fac8-4c0e-4054-9c1d-ebb7935b1d51" "0134ae92-1e66-4a00-b184-37013f0bf33f" "01479966-09c3-496a-a4db-5cbeddb6fc72" ...
#>   ..$ x      : num [1:500] NA NA NA NA NA ...
#>  $ newdata      :Classes 'data.table' and 'data.frame':  49521 obs. of  72 variables:
#>   ..$ Row                           : int [1:49521] 1 2 3 13 14 20 21 28 35 36 ...
#>   ..$ Sample.Name                   : chr [1:49521] "All Data" "All Data" "All Data" "All Data" ...
#>   ..$ Transaction.Id                : chr [1:49521] "7fe5b81e02c5d2000c55c1baea2f36bc" "9e87045f35ae9b12fe22f0e3baaf92fe" "c7ea53b83d42ad154b32a3734934b1ae" "0c62c9039338e624756dfa35900f419f" ...
#>   ..$ Anon.Student.Id               : chr [1:49521] "0046f7a1-59e2-49ba-aeb6-777376485104" "0046f7a1-59e2-49ba-aeb6-777376485104" "0046f7a1-59e2-49ba-aeb6-777376485104" "0046f7a1-59e2-49ba-aeb6-777376485104" ...
#>   ..$ Session.Id                    : chr [1:49521] "no_session_tracking" "no_session_tracking" "no_session_tracking" "no_session_tracking" ...
#>   ..$ Time                          : chr [1:49521] "2020-02-19 22:21:19" "2020-02-19 22:21:38" "2020-02-19 22:21:55" "2020-02-19 22:23:05" ...
#>   ..$ Time.Zone                     : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ Duration..sec.                : chr [1:49521] "0" "19" "17" "7" ...
#>   ..$ Student.Response.Type         : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ Student.Response.Subtype      : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ Tutor.Response.Type           : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ Tutor.Response.Subtype        : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ Level..Workspace.Id.          : chr [1:49521] "worksheet_grapher_a1_patterns_2step_expr" "worksheet_grapher_a1_patterns_2step_expr" "worksheet_grapher_a1_patterns_2step_expr" "worksheet_grapher_a1_patterns_2step_expr" ...
#>   ..$ Problem.Name                  : chr [1:49521] "worksheet_grapher_a1_patterns_2step_expr_085" "worksheet_grapher_a1_patterns_2step_expr_085" "worksheet_grapher_a1_patterns_2step_expr_085" "worksheet_grapher_a1_patterns_2step_expr_085" ...
#>   ..$ Problem.View                  : int [1:49521] 1 1 1 1 1 1 1 1 1 1 ...
#>   ..$ Problem.Start.Time            : chr [1:49521] "2020-02-19 22:21:19" "2020-02-19 22:21:19" "2020-02-19 22:21:19" "2020-02-19 22:21:19" ...
#>   ..$ Step.Name                     : chr [1:49521] "Unit-Indep" "Q1-Dep" "Q1-Indep" "Q2-Indep" ...
#>   ..$ Attempt.At.Step               : int [1:49521] 1 1 1 1 1 1 1 1 1 1 ...
#>   ..$ Is.Last.Attempt               : int [1:49521] 0 0 1 1 1 1 0 0 0 1 ...
#>   ..$ Outcome                       : chr [1:49521] "INCORRECT" "INCORRECT" "CORRECT" "CORRECT" ...
#>   ..$ Selection                     : chr [1:49521] "" "" "" "" ...
#>   ..$ Action                        : chr [1:49521] "Attempt" "Attempt" "Attempt" "Attempt" ...
#>   ..$ Input                         : chr [1:49521] "\"{\"\"escape-in-messages\"\" : \"\"false\"\", \"\"value\"\" : \"\"5\"\"}\"" "\"{\"\"escape-in-messages\"\" : \"\"false\"\", \"\"value\"\" : \"\"-15\"\"}\"" "\"{\"\"escape-in-messages\"\" : \"\"false\"\", \"\"value\"\" : \"\"8\"\"}\"" "\"{\"\"escape-in-messages\"\" : \"\"false\"\", \"\"value\"\" : \"\"13\"\"}\"" ...
#>   ..$ Feedback.Text                 : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ Feedback.Classification       : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ Help.Level                    : int [1:49521] 0 0 0 0 0 0 1 0 0 0 ...
#>   ..$ Total.Num.Hints               : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ KC..MATHia.                   : chr [1:49521] "identifying units-1" "find y, any form-1" "enter given, reading words-1" "enter given, reading words-1" ...
#>   ..$ KC.Category..MATHia.          : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ KC..Single.KC.                : chr [1:49521] "Single-KC" "Single-KC" "Single-KC" "Single-KC" ...
#>   ..$ KC.Category..Single.KC.       : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ KC..Unique.step.              : chr [1:49521] "KC573" "KC369" "KC199" "KC18" ...
#>   ..$ KC.Category..Unique.step.     : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ School                        : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ Class                         : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ CF..Etalon.                   : chr [1:49521] "days" "-65" "8" "13" ...
#>   ..$ CF..Ruleid.                   : chr [1:49521] "Hints" "any student answer" "" "" ...
#>   ..$ CF..Semantic.Event.Id.        : chr [1:49521] "995bdf0a-eefd-4bc0-bdf3-ccc0480933a2" "ed474e13-b35a-4ea8-97ad-e1cd403a9e27" "8242e1e0-f560-46d1-a406-643a3686c5c7" "17cb5a3e-0137-4579-83c0-833009af09b8" ...
#>   ..$ CF..Skill.New.p.Known.        : num [1:49521] 0.244 0.28 0.46 0.812 0.682 ...
#>   ..$ CF..Skill.Previous.p.Known.   : num [1:49521] 0.25 0.1 0.1 0.46 0.28 ...
#>   ..$ CF..Workspace.Progress.Status.: chr [1:49521] "GRADUATED" "GRADUATED" "GRADUATED" "GRADUATED" ...
#>   ..$ Event.Type                    : logi [1:49521] NA NA NA NA NA NA ...
#>   ..$ CF..Time.                     : num [1:49521] 1.58e+09 1.58e+09 1.58e+09 1.58e+09 1.58e+09 ...
#>   ..$ CF..ansbin.                   : num [1:49521] 0 0 1 1 1 1 0 0 0 1 ...
#>   ..$ fold                          : num [1:49521] 3 3 3 3 3 3 3 3 3 3 ...
#>   ..$ CF..reltime.                  : num [1:49521] 0 0 19 36 43 ...
#>   ..$ index                         : chr [1:49521] "identifying units-10046f7a1-59e2-49ba-aeb6-777376485104" "find y, any form-10046f7a1-59e2-49ba-aeb6-777376485104" "enter given, reading words-10046f7a1-59e2-49ba-aeb6-777376485104" "enter given, reading words-10046f7a1-59e2-49ba-aeb6-777376485104" ...
#>   ..$ KC..MATHia.spacing            : num [1:49521] 0 0 0 70 97 314 0 0 67 8 ...
#>   ..$ KC..MATHia.relspacing         : num [1:49521] 0 0 0 17 43 53 0 0 45.5 35 ...
#>   ..$ KC..MATHia.prev               : num [1:49521] 0 0 0 1 0 0 0 0 1 0 ...
#>   ..$ KC..MATHia.meanspacing        : num [1:49521] 0 0 0 -1 -1 ...
#>   ..$ KC..MATHia.relmeanspacing     : num [1:49521] 0 0 0 -1 -1 ...
#>   ..$ KC..MATHia.spacinglagged      : num [1:49521] 0 0 0 0 0 0 0 0 314 67 ...
#>   ..$ Problem.Namespacing           : num [1:49521] 0 19 17 70 10 198 4 12 0 8 ...
#>   ..$ Problem.Namerelspacing        : num [1:49521] 0 0 19 17 7 10 44 1 0 35 ...
#>   ..$ Problem.Nameprev              : num [1:49521] 0 0 0 1 1 1 1 0 0 0 ...
#>   ..$ Problem.Namemeanspacing       : num [1:49521] 0 -1 19 18 35.3 ...
#>   ..$ Problem.Namerelmeanspacing    : num [1:49521] 0 -1 0 9.5 12 ...
#>   ..$ Problem.Namespacinglagged     : num [1:49521] 0 0 19 17 70 10 198 4 0 0 ...
#>   ..$ Anon.Student.Idspacing        : num [1:49521] 0 19 17 70 10 198 4 12 51 8 ...
#>   ..$ Anon.Student.Idrelspacing     : num [1:49521] 0 0 19 17 7 10 44 1 0.5 35 ...
#>   ..$ Anon.Student.Idprev           : num [1:49521] 0 0 0 1 1 1 1 0 0 0 ...
#>   ..$ Anon.Student.Idmeanspacing    : num [1:49521] 0 -1 19 18 35.3 ...
#>   ..$ Anon.Student.Idrelmeanspacing : num [1:49521] 0 -1 0 9.5 12 ...
#>   ..$ Anon.Student.Idspacinglagged  : num [1:49521] 0 0 19 17 70 10 198 4 12 51 ...
#>   ..$ indexcomp                     : chr [1:49521] "identifying units-1" "find y, any form-1" "enter given, reading words-1" "enter given, reading words-1" ...
#>   ..$ temp                          : int [1:49521] 0 0 0 0 1 1 0 0 1 2 ...
#>   ..$ cor                           : int [1:49521] 0 0 0 1 0 0 0 0 1 1 ...
#>   ..$ icor                          : int [1:49521] 0 0 0 0 1 1 0 0 1 2 ...
#>   ..$ logitdecKC..MATHia.           : num [1:49521] 0 0 0 0.734 -0.734 ...
#>   ..$ pred                          : num [1:49521] NA NA NA NA NA NA NA NA NA NA ...
#>   ..$ curvefeat                     : num [1:49521] NA NA NA NA NA NA NA NA NA NA ...
#>   ..- attr(*, ".internal.selfref")=<externalptr> 
#>   ..- attr(*, "index")= int(0) 
#>   .. ..- attr(*, "__Outcome")= int [1:49521] 3 4 5 6 10 12 17 18 19 21 ...
#>  $ predictors   :Formal class 'matrix.csr' [package "SparseM"] with 4 slots
#>   .. ..@ ra       : num [1:94629] 1 1 1 1 0.734 ...
#>   .. ..@ ja       : int [1:94629] 1 1 1 1 2 1 2 1 2 1 ...
#>   .. ..@ ia       : int [1:49522] 1 2 3 4 6 8 10 11 12 14 ...
#>   .. ..@ dimension: int [1:2] 49521 2
#>  $ loglike      : num -4975
#>  $ automat      : list()

example for the nosolve parameter to make it return the model matrix

    modelob <- LKT(
      data = val, interc=FALSE,
      components = c("Anon.Student.Id","KC..Default.","KC..Default."),
      features = c("intercept", "intercept", "lineafm"),nosolve=TRUE)
#> intercept Anon.Student.Id      
#> intercept KC..Default.      
#> lineafm KC..Default.      
#> lineafmKC..Default.+interceptKC..Default.+interceptAnon.Student.Id+0

Assistments data CV example (True crossvalidation of entire search process)

LLs <- numeric(0)
AUCs <- numeric(0)
R2s <- numeric(0)
RMSEs <- numeric(0)

# Filter rows from val3 where Anon.Student.Id exists in both val3 (95% heldout) and val4 (the 5% fit previously) - from EDM2023 journal paper
#val3 <- val3 %>%
#  anti_join(val4, by = "Anon.Student.Id")

#uncomment above lines and section in Load MATHia to load big file into val3, then load small 5% file in val4, and these linese will then crossvaldiate for the held out subset
# currently this code (without lines above) simple crossvalidates with the 5% using the difficulty_levels described in the EDM 2023 conference journal paper (submitted by invite after the conference)

val3$problem_id <- as.character(val3$problem_id)
for (i in 1:5) {
  print((1:5)[-i])

val3$mean_correct<-NULL
# Calculate the mean for each problem_id when fold is not i
mean_data_not_i <- val3 %>%
  filter(fold != i) %>%
  group_by(problem_id) %>%
  summarise(mean_correct = mean(correct)) %>%
  ungroup()

# Perform k-means clustering
kmeans_result_not_i <- kmeans(mean_data_not_i$mean_correct, centers = 4)

# Create breaks
breaks_not_i <- c(min(mean_data_not_i$mean_correct), sort(kmeans_result_not_i$centers), max(mean_data_not_i$mean_correct))

# Merge the calculated means back into the original data frame
val3 <- left_join(val3, mean_data_not_i, by = "problem_id")

# Label quintiles for the entire dataset based on the calculated breaks
 val3 <- val3 %>%
  mutate(
    difficuty_level = cut(mean_correct,
                         breaks = breaks_not_i,
                         labels = c("a", "b", "c", "d", "e"),
                         include.lowest = TRUE)  )

# Assign label "c" to problem_id instances that have NA in difficuty_level
val3 <- val3 %>%
  mutate(
    difficuty_level = if_else(is.na(difficuty_level), "c", as.character(difficuty_level))
  )

val3[, difficuty_level := ifelse(.N <= 4, "c", difficuty_level), by = problem_id]

 #spacing,forgetting features to run
val3<-setDT(val3)

#propdec logitdec recency
 #skill type skill
#pars 0.8027777 0.9387416 0.2549787

  modelob <- LKT(
    usefolds = (1:5)[-i],
    data = val3,
    interc = TRUE,
    fixedpars = c(.8,.94,.25),
    cost = 512,
    components = c(
      "skill",
      "type",
      "skill",
      "difficuty_level"
    ),
    features = c("propdec", "logitdec", "recency", "intercept")
  )

  pred <- as.vector(pmin(pmax(inv.logit(
    as.matrix(modelob$predictors %*% modelob$coefs)[,]
  ), .00001), .99999)[modelob$newdata$fold %in% i])
  LLs[i] <-(sum(log(ifelse(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i] == 1, pred, 1 - pred))))
  AUCs[i] <-suppressMessages(auc(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i], pred)[1])
  print(AUCs[i])
  nullmodel <-glm(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i] ~ 1,data =
                    as.data.frame(rep(1, length(modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i]))),
      family = binomial(logit))
  R2s[i] <- round(1 - LLs[i] / logLik(nullmodel)[1], 6)
  RMSEs[i]<- sqrt(mean((modelob$newdata$CF..ansbin.[modelob$newdata$fold %in% i]
                          -pred)^2))
  print(R2s[i])
  print(RMSEs[i])
}
#> [1] 2 3 4 5
#> Warning in set(x, j = name, value = value): Column 'mean_correct' does not
#> exist to remove
#> propdec skill 0.8     
#> logitdec type 0.94     
#> recency skill 0.25     
#> intercept difficuty_level      
#> interceptdifficuty_level+recencyskill+logitdectype+propdecskill+1 
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> 0.0001014 0.5763520 0.7339936 0.6964468 0.8517272 0.9999900 
#> McFadden's R2 logistic: 0.198595 
#> LogLike logistic: -46635.59557564 
#> [1] 0.6799578
#> [1] -0.241313
#> [1] 0.4487302
#> [1] 1 3 4 5
#> propdec skill 0.8     
#> logitdec type 0.94     
#> recency skill 0.25     
#> intercept difficuty_level      
#> interceptdifficuty_level+recencyskill+logitdectype+propdecskill+1 
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> 0.0000854 0.5869383 0.7333530 0.7005640 0.8480261 0.9999900 
#> McFadden's R2 logistic: 0.186269 
#> LogLike logistic: -50372.56456185 
#> [1] 0.7059893
#> [1] -0.194551
#> [1] 0.4453246
#> [1] 1 2 4 5
#> propdec skill 0.8     
#> logitdec type 0.94     
#> recency skill 0.25     
#> intercept difficuty_level      
#> interceptdifficuty_level+recencyskill+logitdectype+propdecskill+1 
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> 0.0000697 0.5795646 0.7211032 0.6942280 0.8360215 0.9999900 
#> McFadden's R2 logistic: 0.178751 
#> LogLike logistic: -48197.12274073 
#> [1] 0.6934229
#> [1] -0.23242
#> [1] 0.4388652
#> [1] 1 2 3 5
#> propdec skill 0.8     
#> logitdec type 0.94     
#> recency skill 0.25     
#> intercept difficuty_level      
#> interceptdifficuty_level+recencyskill+logitdectype+propdecskill+1 
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> 0.0000714 0.5771851 0.7204136 0.6923188 0.8356020 0.9999900 
#> McFadden's R2 logistic: 0.178124 
#> LogLike logistic: -51150.5559661 
#> [1] 0.6861564
#> [1] -0.201047
#> [1] 0.437756
#> [1] 1 2 3 4
#> propdec skill 0.8     
#> logitdec type 0.94     
#> recency skill 0.25     
#> intercept difficuty_level      
#> interceptdifficuty_level+recencyskill+logitdectype+propdecskill+1 
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> 0.0000674 0.5855747 0.7239428 0.6967907 0.8354551 0.9999900 
#> McFadden's R2 logistic: 0.174579 
#> LogLike logistic: -50261.79928407 
#> [1] 0.7030545
#> [1] -0.231515
#> [1] 0.4403767

Examples

Philip I. Pavlik Jr.

2024-07-01

Packages

Load data (shows modifications to create needed columns)

Load MATHia (example how to load a remote dataset)

Load Assistments 2012 skillbuilder (example how to load a remote dataset)

Additive Factors Model (AFM) fixed effect version

Performance Factors Analysis (PFA) fixed effect version

PFA using difficulty sensitive predictors (composite model requiring pred from prior model for estimation)

Recent Performance Factors Analysis (RPFA)

Recency tracing with logitdec

Recency tracing with logitdec and transfer from cluster

Performance Prediction Equation (PPE)

base4 example

Simple interactions

Individualized Additive Factors Model (iAFM) fixed effect version

Connectors (another way to do interactions)

AutoKC

Synthetic discrimination parameter testing (experimental)

Credibility intervals

Recency tracing with RPFA propdec2 feature (the one in the original Galyardt and Goldin paper)

brpropdec (experimental feature)

Simple adaptive model for practice optimization

Test of new feature to trace KC intercepts across time (not within subjects)

Astonishing model (theory analysis)

Build LKT with special feature

AFMstartMATHia with CV

EmptystartMATHia with CV

example of LASSOLKT

example of LASSOLKT with preset

example for the nosolve parameter to make it return the model matrix

Assistments data CV example (True crossvalidation of entire search process)