library(UsingR)
data(father.son)
x <- father.son$fheight
y <- father.son$sheight
n <- length(x)
father.son
in R
?plot(x, y, xlab = "father's height (inches)", ylab = "son's height (inches)")
Some key quantities:
xbar = mean(x); ybar = mean(y)
Sxy = sum((x - xbar) * (y - ybar))
Sxx = sum((x - xbar)^2)
c(n, Sxx, Sxy, xbar, ybar)
## [1] 1078.00000 8114.44391 4171.57912 67.68710 68.68407
Point estimates of parameters:
beta1hat = Sxy / Sxx
beta0hat = ybar - beta1hat * xbar
res = y - (beta0hat + beta1hat * x)
SS_res = sum(res^2) #residual sum of squares
MS_res = SS_res/(n - 2) # residual mean square
sigma2hat = MS_res
c(beta0hat, beta1hat, sigma2hat)
## [1] 33.886604 0.514093 5.936804
alpha = 0.05 # 95% confidence interval
sigmahat = sqrt(sigma2hat)
se_beta0hat = sigmahat*sqrt(1/n + mean(x)^2/Sxx) # standard error of beta0hat
c(beta0hat - qt(1 - alpha/2, df = n - 2) * se_beta0hat, beta0hat + qt(1 - alpha/2, df = n - 2) * se_beta0hat) # 95% confidence interval of beta0
## [1] 30.29121 37.48200
se_beta1hat = sigmahat/sqrt(Sxx)
c(beta1hat - qt(1 - alpha/2, df = n - 2) * se_beta1hat, beta1hat + qt(1 - alpha/2, df = n - 2) * se_beta1hat) # 95% confidence interval of beta1
## [1] 0.4610188 0.5671673
Q2. Is it correct to say that that the probability that \(\beta_1\) is contained in the interval [0.4610188, 0.5671673] is 0.95?
Q3. Which event has probability 0.95?
Confidence intervals for the population regression line:
xast <- seq(min(x), max(x), l=200)
yhatast <- beta0hat + beta1hat*xast
se_yhatast <- sigmahat * sqrt(1/n + (xast-xbar)^2/Sxx)
plot(x, y, col="lightgray")
abline(a=beta0hat, b=beta1hat)
lowerc <- yhatast - se_yhatast * qt(1-alpha/2, df=n-2)
upperc <- yhatast + se_yhatast * qt(1-alpha/2, df=n-2)
lines(xast, lowerc, col="blue")
lines(xast, upperc, col="blue")
xast <- seq(min(x), max(x), l=200)
yhatast <- beta0hat + beta1hat*xast
lower_predict <- yhatast - sigmahat * sqrt(1 + 1/n + (xast-xbar)^2/Sxx) * qt(1-alpha/2, df=n-2)
upper_predict <- yhatast + sigmahat * sqrt(1 + 1/n + (xast-xbar)^2/Sxx) * qt(1-alpha/2, df=n-2)
plot(x, y, col="lightgray")
abline(a=beta0hat, b=beta1hat)
lines(xast, lowerc, col="blue")
lines(xast, upperc, col="blue")
lines(xast, lower_predict, col="red")
lines(xast, upper_predict, col="red")
tvaluebeta0hat <- beta0hat / (sigmahat*sqrt(1/n + xbar^2/Sxx))
pvalue = 2 * (1 - pt(abs(tvaluebeta0hat), df = n - 2))
pvalue
## [1] 0
tvaluebeta1hat <- beta1hat / (sigmahat/sqrt(Sxx))
pvalue = 2 * (1 - pt(abs(tvaluebeta1hat), df = n - 2))
pvalue
## [1] 0
tvaluebeta1hat <- (beta1hat - 0.5) / (sigmahat/sqrt(Sxx))
pvalue = 2 * (1 - pt(abs(tvaluebeta1hat), df = n - 2))
pvalue
## [1] 0.6024573
+Q4. What is the relationship between confidence intervals and hypothesis testing?