Includes:
Load libraries
library(knitr)
library(ezknitr)
library(abd)
Settings for Knitr (optional)
opts_chunk$set(fig.width = 8, fig.height = 6)
data(LionNoses)
head(LionNoses)
## age proportion.black
## 1 1.1 0.21
## 2 1.5 0.14
## 3 1.9 0.11
## 4 2.2 0.13
## 5 2.6 0.12
## 6 3.2 0.13
lm.nose<-lm(age~proportion.black, data=LionNoses)
Coefficients and residual variation are stored in lmfit:
coef(lm.nose)
## (Intercept) proportion.black
## 0.8790062 10.6471194
summary(lm.nose)$sigma # residual variation
## [1] 1.668764
What else is stored in lmfit? (residuals, variance covariance matrix, etc)
names(lm.nose)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
names(summary(lm.nose))
## [1] "call" "terms" "residuals" "coefficients"
## [5] "aliased" "sigma" "df" "r.squared"
## [9] "adj.r.squared" "fstatistic" "cov.unscaled"
## Use the same sampmle size Sample size - use length so it matches sample size of original data
n <- length(LionNoses$age)
## Predictor - copy of original proporation black data, now in vector
p.black <- LionNoses$proportion.black
## Parameters
sigma <- summary(lm.nose)$sigma # residual variation
betas <- coef(lm.nose)# regression coefficients
## Errors and response
# Residual errors are modeled as ~ N(0, sigma)
epsilon <- rnorm(n, 0, sigma)
# Response is modeled as linear function plus residual errors
y <- betas[1] + betas[2]*p.black + epsilon
# Fit of model to simulated data:
lmfit.generated <- lm(y ~ p.black)
summary(lmfit.generated)
##
## Call:
## lm(formula = y ~ p.black)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8923 -1.4875 -0.2337 1.0471 3.8692
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6975 0.6295 1.108 0.277
## p.black 11.0586 1.6704 6.620 2.5e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.847 on 30 degrees of freedom
## Multiple R-squared: 0.5936, Adjusted R-squared: 0.5801
## F-statistic: 43.83 on 1 and 30 DF, p-value: 2.499e-07
Hint: if you get stuck, try starting with a small number of simulations (less than 5000) until you get the code right.
# set up a matrix of size 1 by 5000 to store our estimates of beta_1
nsims <- 5000 # number of simulations
beta.hat<- matrix(NA, nrow = nsims, ncol = 5000)
# Simulation
for(i in 1:nsims){
epsilon <- rnorm(n, 0, sigma) # random errors
y <- betas[1] + betas[2]*p.black + epsilon # response
lm.temp <- lm(y ~ p.black)
## extract beta-hat
beta.hat[i] <- coef(lm.temp)[2]
}
Plot results
hist(beta.hat, col="gray",xlab="", main=expression(paste("Sampling Distribution of ", hat(beta)[1])))
abline(v=betas[2]) # add population parameter
spun with ezspin(“in_class/Exercise1Solution.R”, out_dir = “output/in_class”, keep_md=FALSE, fig_dir = “figures”)
Session Information:
sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows >= 8 x64 (build 9200)
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] abd_0.2-8 mosaic_0.14.4 Matrix_1.2-6
## [4] mosaicData_0.14.0 ggplot2_2.1.0 dplyr_0.5.0
## [7] lattice_0.20-33 nlme_3.1-128 ezknitr_0.6
## [10] knitr_1.14
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.7 formatR_1.4 plyr_1.8.4
## [4] R.methodsS3_1.7.1 R.utils_2.4.0 tools_3.3.1
## [7] digest_0.6.10 evaluate_0.9 tibble_1.2
## [10] gtable_0.2.0 DBI_0.5-1 yaml_2.1.14
## [13] ggdendro_0.1-20 gridExtra_2.2.1 stringr_1.1.0
## [16] rprojroot_1.1 R6_2.2.0 rmarkdown_1.3
## [19] tidyr_0.6.0 magrittr_1.5 backports_1.0.4
## [22] scales_0.4.0 htmltools_0.3.5 splines_3.3.1
## [25] MASS_7.3-45 assertthat_0.1 mime_0.5
## [28] colorspace_1.2-6 stringi_1.1.2 lazyeval_0.2.0
## [31] munsell_0.4.3 markdown_0.7.7 R.oo_1.20.0