Section 1.1
- Load the house prices data.
- Create a stem-and-leaf plot.
- Create a histogram.
- Calculate summary statistics.
houseprice <- read.table("~/path-to-folder/houseprice.txt", header=T)
attach(houseprice)
stem(Price, scale=0.5)
# The decimal point is 2 digit(s) to the right of the |
# 1 | 6
# 2 | 0011344
# 2 | 5666777899
# 3 | 002223444
# 3 | 666
hist(Price, breaks=seq(150, 375, by=25))
summary(Price)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 155.5 244.6 279.0 278.6 323.4 359.9
mean(Price) # 278.6033
median(Price) # 278.95
sd(Price) # 53.8656
min(Price) # 155.5
max(Price) # 359.9
quantile(Price, probs=c(.25,.5,.75), type=7) # R default
# 25% 50% 75%
# 244.625 278.950 323.350
quantile(Price, probs=c(.25,.5,.75), type=6) # Minitab default
# 25% 50% 75%
# 241.375 278.950 325.875
Section 1.2
- Calculate percentiles (critical values) for a standard normal distribution.
- Create a QQ-plot (quantile-quantile or normal probability plot).
qnorm(c(0.1, 0.05, 0.025, 0.01, 0.005, 0.001), lower.tail=F)
# 1.281552 1.644854 1.959964 2.326348 2.575829 3.090232
qqnorm(Price)
qqline(Price)
Section 1.3
- Calculate upper tail areas (p-values) for a standard normal distribution.
pnorm(2, lower.tail=F)
# 0.02275013
pnorm(1.96, lower.tail=F)
# 0.0249979
Section 1.4
- Calculate percentiles (critical values) for a t distribution.
qt(c(0.1, 0.05, 0.025, 0.01, 0.005, 0.001), df=29, lower.tail=F)
# 1.311434 1.699127 2.045230 2.462021 2.756386 3.396240
Section 1.5
- Calculate a confidence interval for a univariate mean.
t.test(Price, conf.level=0.95)
# 95 percent confidence interval:
# 258.4896 298.7171
Section 1.6
- Do a hypothesis test for a univariate mean.
- Calculate upper tail areas (p-values) for a t distribution.
t.test(Price, mu=255, alternative="greater")
# t = 2.4001, df = 29, p-value = 0.01152
# alternative hypothesis: true mean is greater than 255
pt(2.4001, df=29, lower.tail=F) # upper-tail p-value
# 0.01151699
t.test(Price, mu=255, alternative="less")
# t = 2.4001, df = 29, p-value = 0.9885
# alternative hypothesis: true mean is less than 255
t.test(Price, mu=255, alternative="two.sided")
# t = 2.4001, df = 29, p-value = 0.02304
# alternative hypothesis: true mean is not equal to 255
Section 1.7
- Calculate a prediction interval for a univariate mean (using "ones" trick).
ones <- rep(1, 30)
model <- lm(Price ~ ones - 1)
predict(model, interval="prediction")
# fit lwr upr
# 1 278.6033 166.6147 390.5919