sigr formatting

John Mount

2023-08-19

Examples of sigr formatting. Inspired by APA format (American Psychological Association), but not fully compliant. Discussed here.

Please see the APA stat guidelines for more notes.

Simple formatting.

library("sigr")
sigr::getRenderingFormat()

[1] “html”

cat(render(wrapSignificance(1/300)))

p=0.003333

F-test examples (quality of a numeric model of a numeric outcome).

cat(render(wrapFTestImpl(numdf=2,dendf=55,FValue=5.56)))

F Test summary: (R²=0.1682, F(2,55)=5.56, p=0.006322).

d <- data.frame(x=0.2*(1:20))
d$y <- cos(d$x)
model <- lm(y~x,data=d)
d$prediction <- predict(model,newdata=d)
print(summary(model))

## 
## Call:
## lm(formula = y ~ x, data = d)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34441 -0.24493  0.00103  0.18320  0.65001 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.95686    0.12869   7.436 6.84e-07 ***
## x           -0.56513    0.05371 -10.521 4.06e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.277 on 18 degrees of freedom
## Multiple R-squared:  0.8601, Adjusted R-squared:  0.8524 
## F-statistic: 110.7 on 1 and 18 DF,  p-value: 4.062e-09

cat(render(wrapFTest(model),pSmallCutoff=1.0e-12))

F Test summary: (R²=0.8601, F(1,18)=110.7, p=4.062e-09).

cat(render(wrapFTest(d,'prediction','y'),
                              pSmallCutoff=1.0e-12))

F Test summary: (R²=0.8601, F(1,18)=110.7, p=4.062e-09).

Chi-squared test examples (quality of a probability model of a two category outcome).

d <- data.frame(x=c(1,2,3,4,5,6,7,7),
       y=c(TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE))
model <- glm(y~x,data=d,family=binomial)
model$converged

## [1] TRUE

summary(model)

## 
## Call:
## glm(formula = y ~ x, family = binomial, data = d)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.7455     1.6672  -0.447    0.655
## x             0.1702     0.3429   0.496    0.620
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 11.090  on 7  degrees of freedom
## Residual deviance: 10.837  on 6  degrees of freedom
## AIC: 14.837
## 
## Number of Fisher Scoring iterations: 4

d$pred <- predict(model,type='response',newdata=d)

cat(render(wrapChiSqTest(model),pLargeCutoff=1))

Chi-Square Test summary: pseudo-R²=0.02282 (χ²(1,N=8)=0.2531, p=0.6149).

cat(render(wrapChiSqTest(d,'pred','y'),pLargeCutoff=1))

Chi-Square Test summary: pseudo-R²=0.02282 (χ²(1,N=8)=0.2531, p=0.6149).

d <- data.frame(x=c(1,2,3,4,5,6,7,7),
               y=c(1,1,2,2,3,3,4,4))
ct <- cor.test(d$x,d$y)

cat(render(wrapCorTest(ct)))

Pearson’s product-moment correlation: (r=0.9767, p=3.094e-05).

d <- data.frame(x=c('b','a','a','a','b','b','b'),
                y=c('1','1','1','2','2','2','2'))
ft <- fisher.test(table(d))

cat(render(wrapFisherTest(ft),pLargeCutoff=1))

Fisher’s Exact Test for Count Data: (odds.ratio=4.45, p=0.4857).

d <- data.frame(x=c(1,2,3,4,5,6,7,7),
                y=c(1,1,2,2,3,3,4,4))
ft <- t.test(d$x,d$y)

cat(render(wrapTTest(ft),pLargeCutoff=1))

Welch Two Sample t-test, two.sided: (t=2.072, df=10.62, p=0.06349).

parallelCluster <- NULL
#parallelCluster <- parallel::makeCluster(parallel::detectCores())

set.seed(25325)
d <- data.frame(x1=c(1,2,3,4,5,6,7,7),
                y=c(FALSE,TRUE,FALSE,FALSE,
                    TRUE,TRUE,FALSE,TRUE))
d <- rbind(d,d,d,d)
sigr::resampleTestAUC(d,'x1','y',TRUE,
                nrep=200,
                parallelCluster=parallelCluster)

[1] “AUC test alt. hyp. AUC>0.5: (AUC=0.6562, s.d.=0.09311, p=n.s.).”

set.seed(25325)
d <- data.frame(x1=c(1,2,3,4,5,6,7,7),
                x2=1,
                y=c(FALSE,TRUE,FALSE,FALSE,
                    TRUE,TRUE,FALSE,TRUE))
d <- rbind(d,d,d,d)
sigr::testAUCpair(d,'x1','x2','y',TRUE,
                    nrep=200,
                    parallelCluster=parallelCluster)

[1] “AUC test resampled AUC1>AUC2: (AUCs=0.6562;0.5, s.d.=0.09694, e=n.s.).”

if(!is.null(parallelCluster)) {
  parallel::stopCluster(parallelCluster)
}

permutationScoreModel

set.seed(25325)
y <- 1:5
m <- c(1,1,2,2,2)
cor.test(m,y,alternative='greater')

## 
##  Pearson's product-moment correlation
## 
## data:  m and y
## t = 3, df = 3, p-value = 0.02883
## alternative hypothesis: true correlation is greater than 0
## 95 percent confidence interval:
##  0.1526678 1.0000000
## sample estimates:
##       cor 
## 0.8660254

f <- function(modelValues, yValues) { cor(modelValues, yValues) }
sigr::permutationScoreModel(m,y,f)

## [1] "<b>Studentized permutation test</b>: is observed score greater than permuted score,  summary: <i>p</i>=0.04162"

resampleScoreModel

set.seed(25325)
y <- 1:5
m1 <- c(1,1,2,2,2)
cor.test(m1,y,alternative='greater')

## 
##  Pearson's product-moment correlation
## 
## data:  m1 and y
## t = 3, df = 3, p-value = 0.02883
## alternative hypothesis: true correlation is greater than 0
## 95 percent confidence interval:
##  0.1526678 1.0000000
## sample estimates:
##       cor 
## 0.8660254

f <- function(modelValues,yValues) {
  if((sd(modelValues)<=0)||(sd(yValues)<=0)) {
    return(0)
  }
  cor(modelValues,yValues)
}
s <- sigr::resampleScoreModel(m1,y,f)
print(s)

## $fnName
## [1] "resampleScoreModel"
## 
## $observedScore
## [1] 0.8660254
## 
## $bias
## [1] -0.06201873
## 
## $sd
## [1] 0.2768382
## 
## $nNA
## [1] 0
## 
## $n
## [1] 5

z <- s$observedScore/s$sd  # always check size of z relative to bias!
pValue <- pt(z,df=length(y)-2,lower.tail=FALSE)
pValue

## [1] 0.0260677

resampleScoreModelPair

set.seed(25325)
y <- 1:5
m1 <- c(1,1,2,2,2)
m2 <- c(1,1,1,1,2)
cor(m1,y)

## [1] 0.8660254

cor(m2,y)

## [1] 0.7071068

f <- function(modelValues,yValues) {
  if((sd(modelValues)<=0)||(sd(yValues)<=0)) {
    return(0)
  }
  cor(modelValues,yValues)
}
sigr::render(sigr::resampleScoreModelPair(m1,m2,y,f),
             pLargeCutoff=1,format='ascii')

## [1] "Studentized empirical test: is difference greater than zero on re-samples,  summary: e=0.3331"