################################
#### Example: Vitamin C & French skiers
#### LOGLIN() & GLM() functions
#### Related: See VitaminC.R
#################################
#### Here is one way to read the data vector of values with labels for the table
ski<-matrix(c(31, 17, 109, 122), ncol=2, dimnames=list(Treatment=c("Placebo", "VitaminC"), Cold=c("Cold", "NoCold")))
ski
#### Here is how we did this previously, for more details see VitaminC.R
#### Pearson's Chi-squared test with Yates' continuity correction
result<-chisq.test(ski)
result
######################
#### VIA LOGLIN() ####
#### Fitting 2-way log-linear model of independence
#### using the table structure and loglin() function; this is similar to CATMOD in SAS
#### first check if the data are in the table format
is.table(ski)
#### if not make them to be in table format with
ski<-as.table(ski)
#### fit the model
#### list(1,2) indicates that we are fitting the two margins independently
#### fit=TRUE and param=TRUE, say that we want have the fitted values of the cells
#### and the estimated model parameters returned
#### for other options see R help, e.g., type the ?loglin in the prompt
ski.ind<-loglin(ski, list(1, 2), fit=TRUE, param=TRUE)
ski.ind
#### get the p-value; note that this output is the same as the chi-squared test
#### WITHOUT Yates' continuity correction.
1-pchisq(ski.ind$lrt, ski.ind$df)
#### Fitting 2-way saturated log-linear model
#### using the table structure and loglin() function;
####this is similar to CATMOD in SAS
ski.sat<-loglin(ski, list(c(1, 2)), fit=TRUE, param=TRUE)
#### notice the zero value for LRT, and the PERFECT fit!!!
ski.sat
1-pchisq(ski.sat$lrt, ski.sat$df)
###################
#### VIA GLM() ####
#### Fitting 2-way log-linear model of independence
#### using the database structure and glm() function; this is similar to GENMOD in SAS
#### first we must format the data into a data frame format
ski.data<-as.data.frame(ski)
ski.data
#### using the glm() to fit the loglinear model is similar to fitting a regression model
#### but the trick is in specifying what the correct response, and the family of distributions
#### (i.e., sampling scheme!)
#### we need to specify the response that we are modeling which are the cell counts;
#### ski.data$Freq
#### this line: ski.data$Treatment+ski.data$Cold, specifies that we have two main effects,
#### and no interaction term
#### we need to specify that sampling distribution, that is for log-linear models when modeling
#### the counts we assume the most general random mechanism of Poisson sampling: family=poinsson().
#### This assures that errors follow Poisson model AND that we are modeling "log" of the response
#### that is of our counts.
ski.ind<-glm(ski.data$Freq~ski.data$Treatment+ski.data$Cold, family=poisson())
#### to view the model and the relevant statistics
ski.ind
#### another way of viewing the model
summary(ski.ind)
#### this is ANOVA table similar to what you have seen in linear regression models that looks
#### at the contribution of different model parameters to the overall fit of the model
anova(ski.ind)
#### looking at and saving the fitted values of the cell counts
fits<-fitted(ski.ind)
#### looking at and saving the pearson residuals
resids <- residuals(ski.ind,type="pearson")
#### checking for the influential points
h <- lm.influence(ski.ind)$hat
#### adjusted pearson residuals, e.g., standardized
adjresids <- resids/sqrt(1-h)
#### putting them together into a table
round(cbind(ski.data$Freq,fits,adjresids),2)
#### Fitting 2-way saturated log-linear model
#### using the database structure and glm() function; this is similar to GENMOD in SAS
#### Notice now that we have an interaction term in the model: ski.data$Treatment*ski.data$Cold
ski.sat <- glm(Freq~Treatment*Cold, family=poisson(), data=ski.data)
ski.sat
anova(ski.sat)