This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

Run this code for problems 3E1-3E4.

p_grid <- seq( from=0 , to=1 , length.out=1000 )
prior <- rep( 1 , 1000 )
likelihood <- dbinom( 6 , size=9 , prob=p_grid )
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
set.seed(100)
samples <- sample( p_grid , prob=posterior , size=1e5 , replace=TRUE )
hist(samples,probability = TRUE)

library(rethinking)
## Loading required package: cmdstanr
## This is cmdstanr version 0.8.0
## - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
## - CmdStan path: /Users/brianbeckage/.cmdstan/cmdstan-2.31.0
## - CmdStan version: 2.31.0
## 
## A newer version of CmdStan is available. See ?install_cmdstan() to install it.
## To disable this check set option or environment variable cmdstanr_no_ver_check=TRUE.
## Loading required package: posterior
## This is posterior version 1.6.0
## 
## Attaching package: 'posterior'
## The following objects are masked from 'package:stats':
## 
##     mad, sd, var
## The following objects are masked from 'package:base':
## 
##     %in%, match
## Loading required package: parallel
## rethinking (Version 2.42)
## 
## Attaching package: 'rethinking'
## The following object is masked from 'package:stats':
## 
##     rstudent
dens(samples)

plot(p_grid,posterior)

3E1. How much posterior probability lies below p=0.2?

length(samples[samples<0.2])/length(samples) # from sampling from posterior
## [1] 0.00095
sum( posterior[ p_grid < 0.2 ] ) # from posterior directly
## [1] 0.0008560951

3E2. How much posterior probability lies above p=0.8?

length(samples[samples>0.8])/length(samples) # from sampling from posterior
## [1] 0.12075
sum( posterior[ p_grid > 0.8 ] ) # from posterior directly
## [1] 0.1203449

3E3. How much posterior probability lies between p=0.2 and p=0.8?

length(samples[samples>=0.2 & samples<=0.8])/length(samples) # from sampling from posterior
## [1] 0.8783
sum( posterior[ p_grid>=0.2 & p_grid <= 0.8 ] ) # from posterior directly
## [1] 0.878799

3E4. 20% of the posterior probability lies below which value of p?

quantile(samples,0.2) # from sampling from posterior
##       20% 
## 0.5165165
sum(posterior[p_grid<0.5165]) # checking agains posterior
## [1] 0.1994116

3M1. Suppose the globe tossing data had turned out to be 8 water in 15 tosses. Construct the posterior distribution, using grid approximation. Use the same flat prior as before.

p_grid <- seq( from=0 , to=1 , length.out=1000 )
prior <- rep( 1 , 1000 )
likelihood <- dbinom( 8 , size=15 , prob=p_grid )
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
set.seed(100)
samples <- sample( p_grid , prob=posterior , size=1e5 , replace=TRUE )
hist(samples,probability = TRUE)

library(rethinking)
dens(samples)

plot(p_grid,posterior)

3M3. Construct a posterior predictive check for this model and data. This means simulate the distribution of samples, averaging over the posterior uncertainty in p. What is the probability of observing 8 water in 15 tosses?

p_grid <- seq( from=0 , to=1 , length.out=1000 )
prior <- rep( 1 , 1000 )
likelihood <- dbinom( 8 , size=15 , prob=p_grid )
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
set.seed(100)
samples <- sample( p_grid , prob=posterior , size=1e5 , replace=TRUE )
# hist(samples,probability = TRUE)
postPred<-rbinom(n=10000,size=15,prob=samples)

hist(postPred)

sum(postPred==8)/length(postPred)
## [1] 0.147

3M4. Using the posterior distribution constructed from the new (8/15) data, now calculate the probability of observing 6 water in 9 tosses.

postPred<-rbinom(n=1000000,size=9,prob=samples)
sum(postPred==6)/length(postPred)
## [1] 0.176931

3M5. Start over at 3M1, but now use a prior that is zero below p=0.5 and a constant above p=0.5. This corresponds to prior information that a majority of the Earth’s surface is water. Repeat each problem above and compare the inferences. What difference does the better prior make? If it helps, compare inferences (using both priors) to the true value

p_grid <- seq( from=0 , to=1 , length.out=1000 )
prior <- rep( 1 , 1000 )
prior<-ifelse(p_grid<=0.5,0,1)
likelihood <- dbinom( 8 , size=15 , prob=p_grid )
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
set.seed(100)
samples <- sample( p_grid , prob=posterior , size=1e5 , replace=TRUE )
hist(samples,probability = TRUE)

library(rethinking)
dens(samples)

plot(p_grid,posterior)

3M3 with new prior. Construct a posterior predictive check for this model and data. This means simulate the distribution of samples, averaging over the posterior uncertainty in p. What is the probability of observing 8 water in 15 tosses?

postPred<-rbinom(n=10000,size=15,prob=samples)
sum(postPred==8)/length(postPred)
## [1] 0.1569

3M4 with new prior. Using the posterior distribution constructed from the new (8/15) data, now calculate the probability of observing 6 water in 9 tosses.

postPred<-rbinom(n=1000000,size=9,prob=samples)
sum(postPred==6)/length(postPred)
## [1] 0.23291