07_Ch7-Ch8_ Solutions

Multivariate regression. Calculate the AIC value for the regression of height vs weight for the !Kung data with and without sex using lm(), and your own likelihood function. Based on these AIC scores, which is the better model?.

First using lm()

library(rethinking)

## Loading required package: cmdstanr

## This is cmdstanr version 0.8.0

## - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr

## - CmdStan path: /Users/brianbeckage/.cmdstan/cmdstan-2.36.0

## - CmdStan version: 2.36.0

## Loading required package: posterior

## This is posterior version 1.6.0

## 
## Attaching package: 'posterior'

## The following objects are masked from 'package:stats':
## 
##     mad, sd, var

## The following objects are masked from 'package:base':
## 
##     %in%, match

## Loading required package: parallel

## rethinking (Version 2.42)

## 
## Attaching package: 'rethinking'

## The following object is masked from 'package:stats':
## 
##     rstudent

data(Howell1)
d <- Howell1
d2 <- d[ d$age >= 18 , ]
# head(d2)
# plot(height~weight, data=d2)

# Scaling the predictor variable weight and the response varible height
# both to mean = 0, sd = 1.
d3<-d2
d3$weight<-standardize(d3$weight)

Fit a linear model using lm()

height~Normal(mu,sigma) mu<-intercept + betaWeight * Weight

out<-lm(height~weight,data=d3)
summary(out,correlation=TRUE) # # correlation about -0.99

## 
## Call:
## lm(formula = height ~ weight, data = d3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.7464  -2.8835   0.0222   3.1424  14.7744 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 154.5971     0.2711  570.25   <2e-16 ***
## weight        5.8435     0.2715   21.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.086 on 350 degrees of freedom
## Multiple R-squared:  0.5696, Adjusted R-squared:  0.5684 
## F-statistic: 463.3 on 1 and 350 DF,  p-value: < 2.2e-16
## 
## Correlation of Coefficients:
##        (Intercept)
## weight 0.00

plot(height~weight, data=d3)
abline(a=out$coefficients[1],b=out$coefficients[2])

library(stats)
sigma(out)

## [1] 5.086336

AIC(out)

## [1] 2148.024

Adding a model with sex

height~Normal(mu,sigma) mu<-intercept + betaWeight * Weight + betaMale * male

outSex<-lm(height~weight + male, data=d3)
summary(outSex,correlation=TRUE) # # correlation about -0.99

## 
## Call:
## lm(formula = height ~ weight + male, data = d3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.7859  -2.5506   0.4669   2.6278  14.1486 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 151.5501     0.3391  446.98   <2e-16 ***
## weight        4.1399     0.2678   15.46   <2e-16 ***
## male          6.5003     0.5359   12.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.272 on 349 degrees of freedom
## Multiple R-squared:  0.6973, Adjusted R-squared:  0.6955 
## F-statistic: 401.9 on 2 and 349 DF,  p-value: < 2.2e-16
## 
## Correlation of Coefficients:
##        (Intercept) weight
## weight  0.39             
## male   -0.74       -0.52

library(stats)
sigma(outSex)

## [1] 4.272153

AIC(outSex)

## [1] 2026.211

BIC(outSex)

## [1] 2041.665

AIC of model with weight + sex-> 2026.211 AIC of model with weight-> 2148.024

Fitting with my own likelihood function

Likelihood function with weight:

nllNormReg<-function(parVec,data=d3){
  wt<-data$weight
  ht<-data$height
  intercept<-parVec[1]
  betaW<-parVec[2]
  mySd<-parVec[3]
  htPred<-intercept+betaW*wt
  nllik<- -sum(dnorm(x=ht,mean=htPred,sd=mySd,log=TRUE))
  #browser()
  return(nllik)
}

parVec<-c(1.0,2.0,2.0) # Need some starting parameters for gradient climbing/descending methods
outLM<-optim(par=parVec,fn=nllNormReg,method="L-BFGS-B",
               lower=c(-Inf,-Inf,0.0001),upper=c(Inf,Inf,Inf))
# 
AIC<- 2*outLM$value + 2*3

outLM$par

## [1] 154.597093   5.843509   5.071867

AIC

## [1] 2148.024

Likelihood function with weight and sex:

nllNormReg<-function(parVec,data=d3){
  wt<-data$weight
  ht<-data$height
  male<-data$male
  intercept<-parVec[1]
  betaW<-parVec[2]
  betaM<-parVec[3]
  mySd<-parVec[4]
  htPred<-intercept+betaW*wt + betaM*male
  nllik<- -sum(dnorm(x=ht,mean=htPred,sd=mySd,log=TRUE))
  #browser()
  return(nllik)
}

parVec<-c(1.0,2.0,2.0,2.0) # Need some starting parameters for gradient climbing/descending methods
outLM<-optim(par=parVec,fn=nllNormReg,method="L-BFGS-B",
               lower=c(-Inf,-Inf,-Inf,0.0001),upper=c(Inf,Inf,Inf,Inf))
# 
AIC<- 2*outLM$value + 2*4

outLM$par

## [1] 151.550099   4.139819   6.500287   4.253915

AIC

## [1] 2026.211

7E1. State the three motivating criteria that define information entropy. Try to express each in your own words.

The criteria for this measure of “uncertainty” are:

(1) Continuity. The measure is not discrete, but it may be bounded. So it can have a minimum and maximum, but it must be continuous in between.

(2) Increasing with the number of events. When more distinct things can happen,and all else is equal, there is more uncertainty than when fewer things can happen.

(3) Additivity. This is hardest one to understand for most people. Additivity is desirable because it means we can redefine event categories without changing the amount of uncertainty.

7E3. Suppose a four-sided die is loaded such that, when tossed onto a table, it shows “1” 20%, “2” 25%, “3” 25%, and “4” 30% of the time. What is the entropy of this die?

-(0.2*log(0.2) + 2*0.25*log(0.25) + 0.3*log(0.3))

## [1] 1.376227

7H1. In 2007, The Wall Street Journal published an editorial (“We’re Number One, Alas”) with a graph of corporate tax rates in 29 countries plotted against tax revenue. A badly fit curve was drawn in (reconstructed at right), seemingly by hand, to make the argument that the relationship between tax rate and tax revenue increases and then declines, such that higher tax rates can actually produce less tax revenue. I want you to actually fit a curve to these data, found in data(Laffer). Consider models that use tax rate to predict tax revenue. Compare, using WAIC or PSIS, a straight-line model to any curved models you like. What do you conclude about the relationship between tax rate and tax revenue? (See the associated figure for this problem in the book.)

data(Laffer)
plot(tax_revenue~tax_rate, data=Laffer)

library(rethinking)

modelLM<- quap(
    alist(
        tax_revenue ~ dnorm( mu , sigma ) ,
        mu <- intercept + beta*tax_rate,
        intercept ~ dnorm( 100 , 100 ) ,
        beta~dnorm(0,5),
        sigma ~ dunif( 0 , 50 )
    ) , data=Laffer )

modelQuad <- quap(
    alist(
        tax_revenue ~ dnorm( mu , sigma ) ,
        mu <- intercept + beta*tax_rate + beta2*tax_rate^2,
        intercept ~ dnorm( 100 , 100 ) ,
        beta~dnorm(0,5),
        beta2~dnorm(0,5),
        sigma ~ dunif( 0 , 50 )
    ) , data=Laffer )

# precis(modelQUAP)
# WAIC(modelQUAP)
compare(modelLM, modelQuad , func=WAIC)

8H1. Return to the data(tulips) example in the chapter. Now include the bed variable as a predictor in the interaction model. Don’t interact bed with the other predictors; just include it as a main effect. Note that bed is categorical. So to use it properly, you will need to either construct dummy variables or rather an index variable, as explained in Chapter 5.

library(rethinking)
data(tulips)
d<-tulips
d$blooms_std <- d$blooms / max(d$blooms) 
d$water_cent <- d$water - mean(d$water) 
d$shade_cent <- d$shade - mean(d$shade)
d$bedID<-as.numeric(d$bed)

modelTulipBed<- quap(
    alist(
        blooms_std ~ dnorm( mu , sigma ) ,
        mu <- betaB[bed] + betaS*shade_cent + betaW*water_cent + 
          betaI*shade_cent*water_cent,
        betaS~dnorm(0,0.5),
        betaW~dnorm(0,0.5),
        betaI~dnorm(0,0.5),
        betaB[bed]~dnorm(0,0.5),
        sigma ~ dunif( 0 , 50 )
    ) , data=d)

precis(modelTulipBed,depth=2)