Model selection using AIC and WAIC.

library(rethinking)
data("Howell1")
myData<-Howell1
head(myData)
plot(myData$height,myData$weight)

Using R’s built in lm() to fit a linear and quadratic model.

lmFit<-lm(weight~height,data=myData)
lmFitQuad<-lm(weight~height + I(height^2),data=myData)
lmFit

Call:
lm(formula = weight ~ height, data = myData)

Coefficients:
(Intercept)       height  
   -33.7561       0.5017  
lmFitQuad

Call:
lm(formula = weight ~ height + I(height^2), data = myData)

Coefficients:
(Intercept)       height  I(height^2)  
  20.844760    -0.472953     0.004033  
broom::glance(lmFitQuad)$AIC
[1] 3041.213

Plotting the fit

myData2<-cbind(myData,predict(lmFit),predict(lmFitQuad))
head(myData2)
myData2ordered<-myData2[order(myData2$height),]
plot(myData2$height,myData2$weight)
lines(myData2ordered$height,myData2ordered[['predict(lmFit)']])
lines(myData2ordered$height,myData2ordered[['predict(lmFitQuad)']])

Extracting AIC from the lm() fits. Lower values of AIC are favored.

broom::glance(lmFit)$AIC - broom::glance(lmFitQuad)$AIC
[1] 256.1191
library(stats)
AIC(lmFit)
[1] 3297.332
AIC(lmFitQuad)
[1] 3041.213
AIC(lmFit)-AIC(lmFitQuad)
[1] 256.1191
extractAIC(lmFit)
[1]    2.000 1751.527
extractAIC(lmFitQuad)
[1]    3.000 1495.408
extractAIC(lmFit)-extractAIC(lmFitQuad)
[1]  -1.0000 256.1191

Thus, we would select the curved line over the straight line.

Next, writing our own likelihood functions to fit a linear and quadratic models.

nllRegressionLinear<-function(parmVector,weightDat,heightDat,print=F){

  intercept<-parmVector[1]
  B<-parmVector[2]
  mySD<-parmVector[3]
  
  w<-weightDat
  h<-heightDat
  mu<-intercept + B*h
  
  nllik<- -sum(dnorm(x=w,mean=mu,sd=mySD,log=TRUE))

  if (print) cat("nllik= ",nllik,sep=" ",fill=T)
  return(nllik)}
parVecLinear<-c(0.1,0.1,10) # Initial parameter values 
outRegLinear<-optim(par=parVecLinear,fn=nllRegressionLinear,method="L-BFGS-B",lower=c(-Inf,-Inf, 0.01),upper=c(Inf,Inf, Inf), weightDat=myData$weight, heightDat=myData$height,print=F)
outRegLinear$par 
[1] -33.7561543   0.5016994   4.9837499
outRegLinear$value
[1] 1645.666

Calculating AIC where AIC = 2k - 2ln(L) where L is the maximized likelihood. L is given by outRegLinear\(value from above. k is the number of parameters fit, which is equal to the length of parVecLinear. For our linear model, its an intercept, slope, and sd. Note since we calculated the negative log likelihood, we calculate AIC using 2*k + 2*outRegLinear\)value

lmAIC = 2*length(parVecLinear) + 2*outRegLinear$value
lmAIC
[1] 3297.332
broom::glance(lmFit)
nllRegressionQuad<-function(parmVector,weightDat,heightDat,print=F){

  intercept<-parmVector[1]
  B1<-parmVector[2]
  B2<-parmVector[3]
  mySD<-parmVector[4]
  
  w<-weightDat
  h<-heightDat
  mu<-intercept + B1*h + B2*h^2
  
  nllik<- -sum(dnorm(x=w,mean=mu,sd=mySD,log=TRUE))

  if (print) cat("nllik= ",nllik,sep=" ",fill=T)
  return(nllik)}
parVecQuad<-c(20.1,0.1,0.1,10) 
outRegQuad<-optim(par=parVecQuad,fn=nllRegressionQuad,method="L-BFGS-B",lower=c(-Inf,-Inf,-Inf, 0.01),upper=c(Inf,Inf,Inf,Inf), weightDat=myData$weight, heightDat=myData$height,print=F)
outRegQuad$par 
[1] 20.092051849 -0.460139727  0.003981269  3.931391664
outRegQuad$value
[1] 1516.635
broom::glance(lmFitQuad)

Calculating AIC for the quadratic model. Note that we have one additional parameter compared to the linear model.

quadAIC = 2*length(parVecQuad) + 2*outRegQuad$value
quadAIC
[1] 3041.271
DeltaAIC<-lmAIC-quadAIC
DeltaAIC
[1] 256.0611

Plotting the fit

plot(myData$height,myData$weight)
abline(a=outRegLinear$par[1],b=outRegLinear$par[2])
curve(outRegQuad$par[1] + outRegQuad$par[2]*x + outRegQuad$par[3]*x^2 , 55, 180,add=TRUE)

Finally, using the QUAP function in the Rethinking package.

library(rethinking)
lm.qa <- quap(
    alist(
        weight ~ dnorm(mu,sd) ,  # normal likelihood
        sd ~ dexp(1),     # exponential prior
        mu<-intercept + beta*height,
        intercept~dnorm(0,10),
        beta~dnorm(0,10)
    ) ,
    data=list(weight=myData$weight,height=myData$height) )

precis(lm.qa)
WAIC(lm.qa)
library(rethinking)
quad.qa <- quap(
    alist(
        weight ~ dnorm(mu,sd) ,  # normal likelihood
        sd ~ dexp(1),     # exponential prior
        mu<-intercept + beta1*height + beta2*height2,
        intercept~dnorm(0,10),
        beta1~dnorm(0,10),
        beta2~dnorm(0,10)
    ) ,
    data=list(weight=myData$weight,height=myData$height, height2=myData$height^2),
    start=list(sd=5))

precis(quad.qa,digits=4)
WAIC(quad.qa)
WAIC(lm.qa)$WAIC - WAIC(quad.qa)$WAIC
[1] 256.1963

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tCnRpdGxlOiAiTW9kZWwgU2VsZWN0aW9uIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpNb2RlbCBzZWxlY3Rpb24gdXNpbmcgQUlDIGFuZCBXQUlDLgoKYGBge3J9CmxpYnJhcnkocmV0aGlua2luZykKZGF0YSgiSG93ZWxsMSIpCm15RGF0YTwtSG93ZWxsMQpoZWFkKG15RGF0YSkKcGxvdChteURhdGEkaGVpZ2h0LG15RGF0YSR3ZWlnaHQpCmBgYAoKCgpVc2luZyBSJ3MgYnVpbHQgaW4gbG0oKSB0byBmaXQgYSBsaW5lYXIgYW5kIHF1YWRyYXRpYyBtb2RlbC4gCmBgYHtyfQpsbUZpdDwtbG0od2VpZ2h0fmhlaWdodCxkYXRhPW15RGF0YSkKbG1GaXRRdWFkPC1sbSh3ZWlnaHR+aGVpZ2h0ICsgSShoZWlnaHReMiksZGF0YT1teURhdGEpCmBgYAoKCmBgYHtyfQpsbUZpdApgYGAKCgpgYGB7cn0KbG1GaXRRdWFkCmBgYAoKClBsb3R0aW5nIHRoZSBmaXQKCmBgYHtyfQpteURhdGEyPC1jYmluZChteURhdGEscHJlZGljdChsbUZpdCkscHJlZGljdChsbUZpdFF1YWQpKQpoZWFkKG15RGF0YTIpCmBgYAoKCmBgYHtyfQpteURhdGEyb3JkZXJlZDwtbXlEYXRhMltvcmRlcihteURhdGEyJGhlaWdodCksXQpwbG90KG15RGF0YTIkaGVpZ2h0LG15RGF0YTIkd2VpZ2h0KQpsaW5lcyhteURhdGEyb3JkZXJlZCRoZWlnaHQsbXlEYXRhMm9yZGVyZWRbWydwcmVkaWN0KGxtRml0KSddXSkKbGluZXMobXlEYXRhMm9yZGVyZWQkaGVpZ2h0LG15RGF0YTJvcmRlcmVkW1sncHJlZGljdChsbUZpdFF1YWQpJ11dKQpgYGAKCgpFeHRyYWN0aW5nIEFJQyBmcm9tIHRoZSBsbSgpIGZpdHMuICBMb3dlciB2YWx1ZXMgb2YgQUlDIGFyZSBmYXZvcmVkLgoKYGBge3J9CmJyb29tOjpnbGFuY2UobG1GaXQpJEFJQyAtIGJyb29tOjpnbGFuY2UobG1GaXRRdWFkKSRBSUMKYGBgCgoKYGBge3J9CmxpYnJhcnkoc3RhdHMpCkFJQyhsbUZpdCkKQUlDKGxtRml0UXVhZCkKQUlDKGxtRml0KS1BSUMobG1GaXRRdWFkKQpgYGAKCgpgYGB7cn0KZXh0cmFjdEFJQyhsbUZpdCkKZXh0cmFjdEFJQyhsbUZpdFF1YWQpCmV4dHJhY3RBSUMobG1GaXQpLWV4dHJhY3RBSUMobG1GaXRRdWFkKQpgYGAKClRodXMsIHdlIHdvdWxkIHNlbGVjdCB0aGUgY3VydmVkIGxpbmUgb3ZlciB0aGUgc3RyYWlnaHQgbGluZS4KCgpOZXh0LCB3cml0aW5nIG91ciBvd24gbGlrZWxpaG9vZCBmdW5jdGlvbnMgdG8gZml0IGEgbGluZWFyIGFuZCBxdWFkcmF0aWMgbW9kZWxzLgoKYGBge3J9Cm5sbFJlZ3Jlc3Npb25MaW5lYXI8LWZ1bmN0aW9uKHBhcm1WZWN0b3Isd2VpZ2h0RGF0LGhlaWdodERhdCxwcmludD1GKXsKCiAgaW50ZXJjZXB0PC1wYXJtVmVjdG9yWzFdCiAgQjwtcGFybVZlY3RvclsyXQogIG15U0Q8LXBhcm1WZWN0b3JbM10KICAKICB3PC13ZWlnaHREYXQKICBoPC1oZWlnaHREYXQKICBtdTwtaW50ZXJjZXB0ICsgQipoCiAgCiAgbmxsaWs8LSAtc3VtKGRub3JtKHg9dyxtZWFuPW11LHNkPW15U0QsbG9nPVRSVUUpKQoKICBpZiAocHJpbnQpIGNhdCgibmxsaWs9ICIsbmxsaWssc2VwPSIgIixmaWxsPVQpCiAgcmV0dXJuKG5sbGlrKX0KCmBgYAoKCmBgYHtyfQpwYXJWZWNMaW5lYXI8LWMoMC4xLDAuMSwxMCkgIyBJbml0aWFsIHBhcmFtZXRlciB2YWx1ZXMgCm91dFJlZ0xpbmVhcjwtb3B0aW0ocGFyPXBhclZlY0xpbmVhcixmbj1ubGxSZWdyZXNzaW9uTGluZWFyLG1ldGhvZD0iTC1CRkdTLUIiLGxvd2VyPWMoLUluZiwtSW5mLCAwLjAxKSx1cHBlcj1jKEluZixJbmYsIEluZiksIHdlaWdodERhdD1teURhdGEkd2VpZ2h0LCBoZWlnaHREYXQ9bXlEYXRhJGhlaWdodCxwcmludD1GKQpvdXRSZWdMaW5lYXIkcGFyIApvdXRSZWdMaW5lYXIkdmFsdWUKYGBgCgpDYWxjdWxhdGluZyBBSUMgd2hlcmUgQUlDID0gMiprIC0gMipsbihMKSB3aGVyZSBMIGlzIHRoZSBtYXhpbWl6ZWQgbGlrZWxpaG9vZC4gTCBpcyBnaXZlbiBieSBvdXRSZWdMaW5lYXIkdmFsdWUgZnJvbSBhYm92ZS4gayBpcyB0aGUgbnVtYmVyIG9mIHBhcmFtZXRlcnMgZml0LAp3aGljaCBpcyBlcXVhbCB0byB0aGUgbGVuZ3RoIG9mIHBhclZlY0xpbmVhci4gIEZvciBvdXIgbGluZWFyIG1vZGVsLCBpdHMgYW4gaW50ZXJjZXB0LCBzbG9wZSwgYW5kIHNkLgpOb3RlIHNpbmNlIHdlIGNhbGN1bGF0ZWQgdGhlIG5lZ2F0aXZlIGxvZyBsaWtlbGlob29kLCB3ZSBjYWxjdWxhdGUgQUlDIHVzaW5nIDIqayArIDIqb3V0UmVnTGluZWFyJHZhbHVlCgpgYGB7cn0KbG1BSUMgPSAyKmxlbmd0aChwYXJWZWNMaW5lYXIpICsgMipvdXRSZWdMaW5lYXIkdmFsdWUKbG1BSUMKYGBgCmBgYHtyfQpicm9vbTo6Z2xhbmNlKGxtRml0KQpgYGAKCgpgYGB7cn0KbmxsUmVncmVzc2lvblF1YWQ8LWZ1bmN0aW9uKHBhcm1WZWN0b3Isd2VpZ2h0RGF0LGhlaWdodERhdCxwcmludD1GKXsKCiAgaW50ZXJjZXB0PC1wYXJtVmVjdG9yWzFdCiAgQjE8LXBhcm1WZWN0b3JbMl0KICBCMjwtcGFybVZlY3RvclszXQogIG15U0Q8LXBhcm1WZWN0b3JbNF0KICAKICB3PC13ZWlnaHREYXQKICBoPC1oZWlnaHREYXQKICBtdTwtaW50ZXJjZXB0ICsgQjEqaCArIEIyKmheMgogIAogIG5sbGlrPC0gLXN1bShkbm9ybSh4PXcsbWVhbj1tdSxzZD1teVNELGxvZz1UUlVFKSkKCiAgaWYgKHByaW50KSBjYXQoIm5sbGlrPSAiLG5sbGlrLHNlcD0iICIsZmlsbD1UKQogIHJldHVybihubGxpayl9CgpgYGAKCmBgYHtyfQpwYXJWZWNRdWFkPC1jKDIwLjEsMC4xLDAuMSwxMCkgCm91dFJlZ1F1YWQ8LW9wdGltKHBhcj1wYXJWZWNRdWFkLGZuPW5sbFJlZ3Jlc3Npb25RdWFkLG1ldGhvZD0iTC1CRkdTLUIiLGxvd2VyPWMoLUluZiwtSW5mLC1JbmYsIDAuMDEpLHVwcGVyPWMoSW5mLEluZixJbmYsSW5mKSwgd2VpZ2h0RGF0PW15RGF0YSR3ZWlnaHQsIGhlaWdodERhdD1teURhdGEkaGVpZ2h0LHByaW50PUYpCm91dFJlZ1F1YWQkcGFyIApvdXRSZWdRdWFkJHZhbHVlCmBgYAoKYGBge3J9CmJyb29tOjpnbGFuY2UobG1GaXRRdWFkKQpgYGAKCgpDYWxjdWxhdGluZyBBSUMgZm9yIHRoZSBxdWFkcmF0aWMgbW9kZWwuICBOb3RlIHRoYXQgd2UgaGF2ZSBvbmUgYWRkaXRpb25hbCBwYXJhbWV0ZXIgY29tcGFyZWQgdG8gdGhlIGxpbmVhciBtb2RlbC4KYGBge3J9CnF1YWRBSUMgPSAyKmxlbmd0aChwYXJWZWNRdWFkKSArIDIqb3V0UmVnUXVhZCR2YWx1ZQpxdWFkQUlDCmBgYAoKYGBge3J9CkRlbHRhQUlDPC1sbUFJQy1xdWFkQUlDCkRlbHRhQUlDCmBgYAoKClBsb3R0aW5nIHRoZSBmaXQKCmBgYHtyfQpwbG90KG15RGF0YSRoZWlnaHQsbXlEYXRhJHdlaWdodCkKYWJsaW5lKGE9b3V0UmVnTGluZWFyJHBhclsxXSxiPW91dFJlZ0xpbmVhciRwYXJbMl0pCmN1cnZlKG91dFJlZ1F1YWQkcGFyWzFdICsgb3V0UmVnUXVhZCRwYXJbMl0qeCArIG91dFJlZ1F1YWQkcGFyWzNdKnheMiAsIDU1LCAxODAsYWRkPVRSVUUpCmBgYAoKCkZpbmFsbHksIHVzaW5nIHRoZSBRVUFQIGZ1bmN0aW9uIGluIHRoZSBSZXRoaW5raW5nIHBhY2thZ2UuCgpgYGB7cn0KbGlicmFyeShyZXRoaW5raW5nKQpsbS5xYSA8LSBxdWFwKAogICAgYWxpc3QoCiAgICAgICAgd2VpZ2h0IH4gZG5vcm0obXUsc2QpICwgICMgbm9ybWFsIGxpa2VsaWhvb2QKICAgICAgICBzZCB+IGRleHAoMSksICAgICAjIGV4cG9uZW50aWFsIHByaW9yCiAgICAgICAgbXU8LWludGVyY2VwdCArIGJldGEqaGVpZ2h0LAogICAgICAgIGludGVyY2VwdH5kbm9ybSgwLDEwKSwKICAgICAgICBiZXRhfmRub3JtKDAsMTApCiAgICApICwKICAgIGRhdGE9bGlzdCh3ZWlnaHQ9bXlEYXRhJHdlaWdodCxoZWlnaHQ9bXlEYXRhJGhlaWdodCkgKQoKcHJlY2lzKGxtLnFhKQpgYGAKCmBgYHtyfQpXQUlDKGxtLnFhKQpgYGAKCgpgYGB7cn0KbGlicmFyeShyZXRoaW5raW5nKQpxdWFkLnFhIDwtIHF1YXAoCiAgICBhbGlzdCgKICAgICAgICB3ZWlnaHQgfiBkbm9ybShtdSxzZCkgLCAgIyBub3JtYWwgbGlrZWxpaG9vZAogICAgICAgIHNkIH4gZGV4cCgxKSwgICAgICMgZXhwb25lbnRpYWwgcHJpb3IKICAgICAgICBtdTwtaW50ZXJjZXB0ICsgYmV0YTEqaGVpZ2h0ICsgYmV0YTIqaGVpZ2h0MiwKICAgICAgICBpbnRlcmNlcHR+ZG5vcm0oMCwxMCksCiAgICAgICAgYmV0YTF+ZG5vcm0oMCwxMCksCiAgICAgICAgYmV0YTJ+ZG5vcm0oMCwxMCkKICAgICkgLAogICAgZGF0YT1saXN0KHdlaWdodD1teURhdGEkd2VpZ2h0LGhlaWdodD1teURhdGEkaGVpZ2h0LCBoZWlnaHQyPW15RGF0YSRoZWlnaHReMiksCiAgICBzdGFydD1saXN0KHNkPTUpKQoKcHJlY2lzKHF1YWQucWEsZGlnaXRzPTQpCmBgYAoKYGBge3J9CldBSUMocXVhZC5xYSkKYGBgCgoKYGBge3J9CldBSUMobG0ucWEpJFdBSUMgLSBXQUlDKHF1YWQucWEpJFdBSUMKYGBgCgoKCgoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KcGxvdChjYXJzKQpgYGAKCkFkZCBhIG5ldyBjaHVuayBieSBjbGlja2luZyB0aGUgKkluc2VydCBDaHVuayogYnV0dG9uIG9uIHRoZSB0b29sYmFyIG9yIGJ5IHByZXNzaW5nICpDbWQrT3B0aW9uK0kqLgoKV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDbWQrU2hpZnQrSyogdG8gcHJldmlldyB0aGUgSFRNTCBmaWxlKS4gCgpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuCgo=