#### Test of conditional independence
#### Use of oddsratio(), confit() from {vcd}
#### Cochran-Mantel-Haenszel test
#### Also testing via Log-linear models
############################################

#### Input the table

deliquent=c("no","yes")
scout=c("no", "yes")
SES=c("low", "med","high")
table=expand.grid(deliquent=deliquent,scout=scout,SES=SES)
count=c(169,42,43,11,132,20,104,14,59,2,196,8)
table=cbind(table,count=count)
table
temp=xtabs(count~deliquent+scout+SES,table)
temp

#### Create "flat" contigency tables

ftable(temp)

#### Let's see how we can create various subtables
#### One-way Table SES

Frequency=as.vector(margin.table(temp,3))
CumFrequency=cumsum(Frequency)
cbind(SES,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency))

#### One-way Table scout

Frequency=as.vector(margin.table(temp,2))
CumFrequency=cumsum(Frequency)
cbind(scout,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency))

#### One-way Table deliquent

Frequency=as.vector(margin.table(temp,1))
CumFrequency=cumsum(Frequency)
cbind(deliquent,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency))

#### Test the Mutual Independence, step by step 
#### compute the expected frequencies

E=array(NA,dim(temp))
for (i in 1:dim(temp)[1]) {
for (j in 1:dim(temp)[2]) {
for (k in 1:dim(temp)[3]) {
E[i,j,k]=(margin.table(temp,3)[k]*margin.table(temp,2)[j]*margin.table(temp,1))[i]/(sum(temp))^2
}}}
E

#### compute the X^2, and G^2

df=(length(temp)-1)-(sum(dim(temp))-3)
X2=sum((temp-E)^2/E)
X2
1-pchisq(X2,df)
G2=2*sum(temp*log(temp/E))
G2
1-pchisq(G2,df)


#### Test for Mutual indpendence (and other models) by considering analysis of all two-way tables
#### Two-way Table SES*scout
#### This is a test of Marginal Independence for SES and Scout

SES_Scout=margin.table(temp,c(3,2))
result=chisq.test(SES_Scout)
result
result$expected
result=chisq.test(SES_Scout,correct = FALSE)
result
SES_Scout=list(Frequency=SES_Scout,RowPercentage=prop.table(SES_Scout,2))


#### Two-way Table SES*deliquent
#### temp=xtabs(count~scout+SES+deliquent,table)
#### SES_deliquent=addmargins(temp)[1:2,1:3,3]
#### This is a test of Marginal Independence for SES and deliquent status

SES_deliquent=margin.table(temp,c(3,1))
result=chisq.test(SES_deliquent)
result
result$expected
result=chisq.test(SES_deliquent,correct = FALSE)
result
SES_deliquent=list(Frequency=SES_deliquent,RowPercentage=prop.table(SES_deliquent,2))

#### Two-way Table deliquent*scout
#### This is a test of Marginal Independence for Deliquent status and the Scout status

deliquent_scout=margin.table(temp,c(1,2))
result=chisq.test(deliquent_scout)
result$expected
result=chisq.test(deliquent_scout,correct = FALSE)
result
deliquent_scout1=list(Frequency=deliquent_scout,RowPercentage=prop.table(deliquent_scout,1))

#### compute the log(oddsraio), oddsratio and its 95% CI using {vcd} package

lor=oddsratio(deliquent_scout)
lor
OR=exp(lor)
OR
OR=oddsratio(deliquent_scout, log=FALSE)
OR
CI=exp(confint(lor))
CI
CI=confint(OR)
CI


#### Table of deliquent*scout at different level of SES

temp

#### Test for Joint Independence of (D,BS)
#### creating 6x2 table, BS x D

SESscout_deliquent=ftable(temp, row.vars=c(3,2))
result=chisq.test(SESscout_deliquent)
result

#### Test for Marginal Independence (see above analysis of two-way tables)

#### Test for conditional independence
#### To get partial tables of DB for each level of S

temp[,,1]
chisq.test(temp[,,1], correct=FALSE)
temp[,,2]
chisq.test(temp[,,2], correct=FALSE)
temp[,,3]
chisq.test(temp[,,3], correct=FALSE)
X2=sum(chisq.test(temp[,,1], correct=FALSE)$statistic+chisq.test(temp[,,2], correct=FALSE)$statistic+chisq.test(temp[,,3], correct=FALSE)$statistic)
1-pchisq(X2,df=3)

#### Cochran-Mantel-Haenszel test

mantelhaen.test(temp)
mantelhaen.test(temp,correct=FALSE) 

#### Breslow-Day test
#### make sure to first source/run breslowday.test_.R

breslowday.test(temp)

################################################################
#### Log-linear Models
#### Let's see how we would do this via Log_linear models 
#### We will look at the details later in the course
#### test of conditional independence
#### via loglinear model, but the object has to be a table!

is.table(temp)  #### to check if table
temp<-as.table(temp) #### to save as table
temp.condind<-loglin(temp, list(c(1,3), c(2,3)), fit=TRUE, param=TRUE) #### fit the cond.indep. model
temp.condind
1-pchisq(temp.condind$lrt, temp.condind$df)

#### There is no way to do Breslow-Day stats in R; 
#### you need to write your own function or fit the homogeneous association model
#### to test for identity of odds-ratios

#### test of homogenous association model

temp.hom<-loglin(temp, list(c(1,3), c(2,3), c(1,2)), fit=TRUE, param=TRUE)
temp.hom
1-pchisq(temp.hom$lrt, temp.hom$df)


#### Here is how to do the same but with the glm() function in R
#### test of conditional independence
#### via loglinear mode, but using glm() funcion
#### the object now needs to be dataframe

temp.data<-as.data.frame(temp)
temp.data
temp.condind<-glm(temp.data$Freq~temp.data$scout*temp.data$SES+temp.data$deliquent*temp.data$SES, family=poisson())
summary(temp.condind)
fitted(temp.condind)

#### test of homogenous association model

temp.hom<-glm(temp.data$Freq~temp.data$scout*temp.data$SES+temp.data$deliquent*temp.data$SES+temp.data$scout*temp.data$deliquent, family=poisson())
summary(temp.hom)
fitted(temp.hom)