#### Test of conditional independence #### Use of oddsratio(), confit() from {vcd} #### Cochran-Mantel-Haenszel test #### Also testing via Log-linear models ############################################ #### Input the table deliquent=c("no","yes") scout=c("no", "yes") SES=c("low", "med","high") table=expand.grid(deliquent=deliquent,scout=scout,SES=SES) count=c(169,42,43,11,132,20,104,14,59,2,196,8) table=cbind(table,count=count) table temp=xtabs(count~deliquent+scout+SES,table) temp #### Create "flat" contigency tables ftable(temp) #### Let's see how we can create various subtables #### One-way Table SES Frequency=as.vector(margin.table(temp,3)) CumFrequency=cumsum(Frequency) cbind(SES,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency)) #### One-way Table scout Frequency=as.vector(margin.table(temp,2)) CumFrequency=cumsum(Frequency) cbind(scout,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency)) #### One-way Table deliquent Frequency=as.vector(margin.table(temp,1)) CumFrequency=cumsum(Frequency) cbind(deliquent,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency)) #### Test the Mutual Independence, step by step #### compute the expected frequencies E=array(NA,dim(temp)) for (i in 1:dim(temp)[1]) { for (j in 1:dim(temp)[2]) { for (k in 1:dim(temp)[3]) { E[i,j,k]=(margin.table(temp,3)[k]*margin.table(temp,2)[j]*margin.table(temp,1))[i]/(sum(temp))^2 }}} E #### compute the X^2, and G^2 df=(length(temp)-1)-(sum(dim(temp))-3) X2=sum((temp-E)^2/E) X2 1-pchisq(X2,df) G2=2*sum(temp*log(temp/E)) G2 1-pchisq(G2,df) #### Test for Mutual indpendence (and other models) by considering analysis of all two-way tables #### Two-way Table SES*scout #### This is a test of Marginal Independence for SES and Scout SES_Scout=margin.table(temp,c(3,2)) result=chisq.test(SES_Scout) result result$expected result=chisq.test(SES_Scout,correct = FALSE) result SES_Scout=list(Frequency=SES_Scout,RowPercentage=prop.table(SES_Scout,2)) #### Two-way Table SES*deliquent #### temp=xtabs(count~scout+SES+deliquent,table) #### SES_deliquent=addmargins(temp)[1:2,1:3,3] #### This is a test of Marginal Independence for SES and deliquent status SES_deliquent=margin.table(temp,c(3,1)) result=chisq.test(SES_deliquent) result result$expected result=chisq.test(SES_deliquent,correct = FALSE) result SES_deliquent=list(Frequency=SES_deliquent,RowPercentage=prop.table(SES_deliquent,2)) #### Two-way Table deliquent*scout #### This is a test of Marginal Independence for Deliquent status and the Scout status deliquent_scout=margin.table(temp,c(1,2)) result=chisq.test(deliquent_scout) result$expected result=chisq.test(deliquent_scout,correct = FALSE) result deliquent_scout1=list(Frequency=deliquent_scout,RowPercentage=prop.table(deliquent_scout,1)) #### compute the log(oddsraio), oddsratio and its 95% CI using {vcd} package lor=oddsratio(deliquent_scout) lor OR=exp(lor) OR OR=oddsratio(deliquent_scout, log=FALSE) OR CI=exp(confint(lor)) CI CI=confint(OR) CI #### Table of deliquent*scout at different level of SES temp #### Test for Joint Independence of (D,BS) #### creating 6x2 table, BS x D SESscout_deliquent=ftable(temp, row.vars=c(3,2)) result=chisq.test(SESscout_deliquent) result #### Test for Marginal Independence (see above analysis of two-way tables) #### Test for conditional independence #### To get partial tables of DB for each level of S temp[,,1] chisq.test(temp[,,1], correct=FALSE) temp[,,2] chisq.test(temp[,,2], correct=FALSE) temp[,,3] chisq.test(temp[,,3], correct=FALSE) X2=sum(chisq.test(temp[,,1], correct=FALSE)$statistic+chisq.test(temp[,,2], correct=FALSE)$statistic+chisq.test(temp[,,3], correct=FALSE)$statistic) 1-pchisq(X2,df=3) #### Cochran-Mantel-Haenszel test mantelhaen.test(temp) mantelhaen.test(temp,correct=FALSE) #### Breslow-Day test #### make sure to first source/run breslowday.test_.R breslowday.test(temp) ################################################################ #### Log-linear Models #### Let's see how we would do this via Log_linear models #### We will look at the details later in the course #### test of conditional independence #### via loglinear model, but the object has to be a table! is.table(temp) #### to check if table temp<-as.table(temp) #### to save as table temp.condind<-loglin(temp, list(c(1,3), c(2,3)), fit=TRUE, param=TRUE) #### fit the cond.indep. model temp.condind 1-pchisq(temp.condind$lrt, temp.condind$df) #### There is no way to do Breslow-Day stats in R; #### you need to write your own function or fit the homogeneous association model #### to test for identity of odds-ratios #### test of homogenous association model temp.hom<-loglin(temp, list(c(1,3), c(2,3), c(1,2)), fit=TRUE, param=TRUE) temp.hom 1-pchisq(temp.hom$lrt, temp.hom$df) #### Here is how to do the same but with the glm() function in R #### test of conditional independence #### via loglinear mode, but using glm() funcion #### the object now needs to be dataframe temp.data<-as.data.frame(temp) temp.data temp.condind<-glm(temp.data$Freq~temp.data$scout*temp.data$SES+temp.data$deliquent*temp.data$SES, family=poisson()) summary(temp.condind) fitted(temp.condind) #### test of homogenous association model temp.hom<-glm(temp.data$Freq~temp.data$scout*temp.data$SES+temp.data$deliquent*temp.data$SES+temp.data$scout*temp.data$deliquent, family=poisson()) summary(temp.hom) fitted(temp.hom)