> ### Test of conditional independence Lesson 5:
> ### Use of oddsratio(), confit() from {vcd}
> ### Cochran-Mantel-Haenszel test
> ### Also testing via Log-linear models: related Lesson 10
> #########################################################
> 
> ### Input the table
> deliquent=c("no","yes")
> scout=c("no", "yes")
> SES=c("low", "med","high")
> table=expand.grid(deliquent=deliquent,scout=scout,SES=SES)
> count=c(169,42,43,11,132,20,104,14,59,2,196,8)
> table=cbind(table,count=count)
> table
   deliquent scout  SES count
1         no    no  low   169
2        yes    no  low    42
3         no   yes  low    43
4        yes   yes  low    11
5         no    no  med   132
6        yes    no  med    20
7         no   yes  med   104
8        yes   yes  med    14
9         no    no high    59
10       yes    no high     2
11        no   yes high   196
12       yes   yes high     8
> temp=xtabs(count~deliquent+scout+SES,table)
> temp
, , SES = low

         scout
deliquent  no yes
      no  169  43
      yes  42  11

, , SES = med

         scout
deliquent  no yes
      no  132 104
      yes  20  14

, , SES = high

         scout
deliquent  no yes
      no   59 196
      yes   2   8

> 
> ### Create "flat" contigency tables
> ftable(temp)
                SES low med high
deliquent scout                 
no        no        169 132   59
          yes        43 104  196
yes       no         42  20    2
          yes        11  14    8
> 
> ##Let's see how we can create various subtables
> ### One-way Table SES
> Frequency=as.vector(margin.table(temp,3))
> CumFrequency=cumsum(Frequency)
> cbind(SES,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency))
     SES    Frequency Percentage CumFrequency CumPercentage
[1,] "low"  "265"     "0.33125"  "265"        "0.33125"    
[2,] "med"  "270"     "0.3375"   "535"        "0.66875"    
[3,] "high" "265"     "0.33125"  "800"        "1"          
> 
> ### One-way Table scout
> Frequency=as.vector(margin.table(temp,2))
> CumFrequency=cumsum(Frequency)
> cbind(scout,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency))
     scout Frequency Percentage CumFrequency CumPercentage
[1,] "no"  "424"     "0.53"     "424"        "0.53"       
[2,] "yes" "376"     "0.47"     "800"        "1"          
> 
> ### One-way Table deliquent
> Frequency=as.vector(margin.table(temp,1))
> CumFrequency=cumsum(Frequency)
> cbind(deliquent,Frequency=Frequency,Percentage=Frequency/sum(Frequency),CumFrequency=CumFrequency,CumPercentage=CumFrequency/sum(Frequency))
     deliquent Frequency Percentage CumFrequency CumPercentage
[1,] "no"      "703"     "0.87875"  "703"        "0.87875"    
[2,] "yes"     "97"      "0.12125"  "800"        "1"          
> 
> ### Test the Mutual Independence, step by step 
> ### compute the expected frequences
> E=array(NA,dim(temp))
> for (i in 1:dim(temp)[1]) {
+ for (j in 1:dim(temp)[2]) {
+ for (k in 1:dim(temp)[3]) {
+ E[i,j,k]=(margin.table(temp,3)[k]*margin.table(temp,2)[j]*margin.table(temp,1))[i]/(sum(temp))^2
+ }}}
> E
, , 1

          [,1]      [,2]
[1,] 123.42044 109.44831
[2,]  17.02956  15.10169

, , 2

          [,1]      [,2]
[1,] 125.74913 111.51337
[2,]  17.35087  15.38663

, , 3

          [,1]      [,2]
[1,] 123.42044 109.44831
[2,]  17.02956  15.10169

> 
> ### compute the X^2, and G^2
> df=(length(temp)-1)-(sum(dim(temp))-3)
> X2=sum((temp-E)^2/E)
> X2
[1] 214.9233
> 1-pchisq(X2,df)
[1] 0
> G2=2*sum(temp*log(temp/E))
> G2
[1] 218.6622
> 1-pchisq(G2,df)
[1] 0
> 
> 
> ### Test for Mutual indpendence (and other models) by considering anlaysis of all two-way tables
> ### Two-way Table SES*scout
> ### This is a test of Marginal Independence for SES and Scout
> SES_Scout=margin.table(temp,c(3,2))
> result=chisq.test(SES_Scout)
> result

	Pearson's Chi-squared test

data:  SES_Scout 
X-squared = 172.2025, df = 2, p-value < 2.2e-16

> result$expected
      scout
SES        no    yes
  low  140.45 124.55
  med  143.10 126.90
  high 140.45 124.55
> result=chisq.test(SES_Scout,correct = FALSE)
> result

	Pearson's Chi-squared test

data:  SES_Scout 
X-squared = 172.2025, df = 2, p-value < 2.2e-16

> SES_Scout=list(Frequency=SES_Scout,RowPercentage=prop.table(SES_Scout,2))
> 
> 
> ### Two-way Table SES*deliquent
> ### temp=xtabs(count~scout+SES+deliquent,table)
> ### SES_deliquent=addmargins(temp)[1:2,1:3,3]
> ### This is a test of Marginal Independence for SES and deliquent status
> SES_deliquent=margin.table(temp,c(3,1))
> result=chisq.test(SES_deliquent)
> result

	Pearson's Chi-squared test

data:  SES_deliquent 
X-squared = 32.8263, df = 2, p-value = 7.445e-08

> result$expected
      deliquent
SES          no      yes
  low  232.8688 32.13125
  med  237.2625 32.73750
  high 232.8688 32.13125
> result=chisq.test(SES_deliquent,correct = FALSE)
> result

	Pearson's Chi-squared test

data:  SES_deliquent 
X-squared = 32.8263, df = 2, p-value = 7.445e-08

> SES_deliquent=list(Frequency=SES_deliquent,RowPercentage=prop.table(SES_deliquent,2))
> 
> ### Two-way Table deliquent*scout
> ### This is a test of Marginal Independence for Deliquent status and the Scout status
> deliquent_scout=margin.table(temp,c(1,2))
> result=chisq.test(deliquent_scout)
> result$expected
         scout
deliquent     no    yes
      no  372.59 330.41
      yes  51.41  45.59
> result=chisq.test(deliquent_scout,correct = FALSE)
> result

	Pearson's Chi-squared test

data:  deliquent_scout 
X-squared = 7.4652, df = 1, p-value = 0.00629

> deliquent_scout1=list(Frequency=deliquent_scout,RowPercentage=prop.table(deliquent_scout,1))
> 
> ## compute the log(oddsraio), oddsratio and its 95% CI using {vcd} package
> lor=oddsratio(deliquent_scout)
> lor
[1] -0.6140019
> OR=exp(lor)
> OR
[1] 0.5411808
> OR=oddsratio(deliquent_scout, log=FALSE)
> OR
[1] 0.5411808
> CI=exp(confint(lor))
> CI
           lwr       upr
[1,] 0.3475674 0.8426469
> CI=confint(OR)
> CI
           lwr       upr
[1,] 0.3475674 0.8426469
> 
> 
> ### Table of deliquent*scout at different level of SES
> temp
, , SES = low

         scout
deliquent  no yes
      no  169  43
      yes  42  11

, , SES = med

         scout
deliquent  no yes
      no  132 104
      yes  20  14

, , SES = high

         scout
deliquent  no yes
      no   59 196
      yes   2   8

> 
> ### Test for Joint Independence of (D,BS)
> ## creating 6x2 table, BS x D
> SESscout_deliquent=ftable(temp, row.vars=c(3,2))
> result=chisq.test(SESscout_deliquent)
> result

	Pearson's Chi-squared test

data:  SESscout_deliquent 
X-squared = 32.9576, df = 5, p-value = 3.837e-06

> 
> ### Test for Marginal Independence (see above analysis of two-way tables)
> 
> #### Test for conditional independence
> ### To get partial tables of DB for each level of S
> temp[,,1]
         scout
deliquent  no yes
      no  169  43
      yes  42  11
> chisq.test(temp[,,1], correct=FALSE)

	Pearson's Chi-squared test

data:  temp[, , 1] 
X-squared = 0.0058, df = 1, p-value = 0.9392

> temp[,,2]
         scout
deliquent  no yes
      no  132 104
      yes  20  14
> chisq.test(temp[,,2], correct=FALSE)

	Pearson's Chi-squared test

data:  temp[, , 2] 
X-squared = 0.101, df = 1, p-value = 0.7507

> temp[,,3]
         scout
deliquent no yes
      no  59 196
      yes  2   8
> chisq.test(temp[,,3], correct=FALSE)

	Pearson's Chi-squared test

data:  temp[, , 3] 
X-squared = 0.0534, df = 1, p-value = 0.8172

Warning message:
In chisq.test(temp[, , 3], correct = FALSE) :
  Chi-squared approximation may be incorrect
> X2=sum(chisq.test(temp[,,1], correct=FALSE)$statistic+chisq.test(temp[,,2], correct=FALSE)$statistic+chisq.test(temp[,,3], correct=FALSE)$statistic)
Warning message:
In chisq.test(temp[, , 3], correct = FALSE) :
  Chi-squared approximation may be incorrect
> 1-pchisq(X2,df=3)
[1] 0.9837374
> 
> ### Cochran-Mantel-Haenszel test
> mantelhaen.test(temp)

	Mantel-Haenszel chi-squared test without continuity correction

data:  temp 
Mantel-Haenszel X-squared = 0.008, df = 1, p-value = 0.9287
alternative hypothesis: true common odds ratio is not equal to 1 
95 percent confidence interval:
 0.5970214 1.6009845 
sample estimates:
common odds ratio 
        0.9776615 

> mantelhaen.test(temp,correct=FALSE) 

	Mantel-Haenszel chi-squared test without continuity correction

data:  temp 
Mantel-Haenszel X-squared = 0.008, df = 1, p-value = 0.9287
alternative hypothesis: true common odds ratio is not equal to 1 
95 percent confidence interval:
 0.5970214 1.6009845 
sample estimates:
common odds ratio 
        0.9776615 

> 
> ### Breslow-Day test
> ### make sure to first source/run breslowday.test.R
> breslowday.test(temp)
Breslow and Day test (with Tarone correction):
Breslow-Day X-squared         = 0.1517992 
Breslow-Day-Tarone X-squared  = 0.1517984 

Test for test of a common OR: p-value =  0.9269096 

> 
> ################################################################
> #################### Log-linear Models: Lesson 10 ##############
> #### Let's see how we would this via Log_linear models 
> #### We will look at the details later in the course
> ### test of conditional independence
> ### via loglinear model, but the object has to be a table!
> 
> is.table(temp)  ### to check if table
[1] TRUE
> temp<-as.table(temp) ### to save as table
> temp.condind<-loglin(temp, list(c(1,3), c(2,3)), fit=TRUE, param=TRUE) ### fit the cond.indep. model
2 iterations: deviation 5.684342e-14 
> temp.condind
$lrt
[1] 0.1623331

$pearson
[1] 0.1602389

$df
[1] 3

$margin
$margin[[1]]
[1] "deliquent" "SES"      

$margin[[2]]
[1] "scout" "SES"  


$fit
, , SES = low

         scout
deliquent         no        yes
      no  168.800000  43.200000
      yes  42.200000  10.800000

, , SES = med

         scout
deliquent         no        yes
      no  132.859259 103.140741
      yes  19.140741  14.859259

, , SES = high

         scout
deliquent         no        yes
      no   58.698113 196.301887
      yes   2.301887   7.698113


$param
$param$`(Intercept)`
[1] 3.534926

$param$deliquent
       no       yes 
 1.093741 -1.093741 

$param$scout
         no         yes 
 0.06813731 -0.06813731 

$param$SES
       low        med       high 
 0.2192042  0.2590306 -0.4782348 

$param$deliquent.SES
         SES
deliquent        low       med       high
      no  -0.4005935 -0.125005  0.5255985
      yes  0.4005935  0.125005 -0.5255985

$param$scout.SES
     SES
scout        low         med       high
  no   0.6132997  0.05846064 -0.6717604
  yes -0.6132997 -0.05846064  0.6717604


> 1-pchisq(temp.condind$lrt, temp.condind$df)
[1] 0.9834279
> 
> #### There is no way to do Breslow-Day stats in R; you need to write your own function or fit the homogeneous association model to test for identity of odds-ratios
> 
> ### test of homogenous association model
> temp.hom<-loglin(temp, list(c(1,3), c(2,3), c(1,2)), fit=TRUE, param=TRUE)
3 iterations: deviation 0.0266118 
> temp.hom
$lrt
[1] 0.1542917

$pearson
[1] 0.1518237

$df
[1] 2

$margin
$margin[[1]]
[1] "deliquent" "SES"      

$margin[[2]]
[1] "scout" "SES"  

$margin[[3]]
[1] "deliquent" "scout"    


$fit
, , SES = low

         scout
deliquent         no        yes
      no  168.645281  43.350696
      yes  42.357015  10.646982

, , SES = med

         scout
deliquent         no        yes
      no  132.695867 103.303327
      yes  19.303337  14.694967

, , SES = high

         scout
deliquent         no        yes
      no   58.658852 196.345977
      yes   2.339648   7.658050


$param
$param$`(Intercept)`
[1] 3.534951

$param$deliquent
       no       yes 
 1.094126 -1.094126 

$param$scout
         no         yes 
 0.07238322 -0.07238322 

$param$SES
       low        med       high 
 0.2171812  0.2584266 -0.4756079 

$param$deliquent.scout
         scout
deliquent           no          yes
      no  -0.005595678  0.005595678
      yes  0.005595678 -0.005595678

$param$deliquent.SES
         SES
deliquent        low        med       high
      no  -0.3976988 -0.1246397  0.5223385
      yes  0.3976988  0.1246397 -0.5223385

$param$scout.SES
     SES
scout        low         med       high
  no   0.6124499  0.05840757 -0.6708574
  yes -0.6124499 -0.05840757  0.6708574


> 1-pchisq(temp.hom$lrt, temp.hom$df)
[1] 0.9257548
> 
> 
> ### Here is how to do the same but with the glm() function in R
> ### test of conditional independence
> ### via loglinear mode, but using glm() funcion
> ### the object now needs to be dataframe
> temp.data<-as.data.frame(temp)
> temp.data
   deliquent scout  SES Freq
1         no    no  low  169
2        yes    no  low   42
3         no   yes  low   43
4        yes   yes  low   11
5         no    no  med  132
6        yes    no  med   20
7         no   yes  med  104
8        yes   yes  med   14
9         no    no high   59
10       yes    no high    2
11        no   yes high  196
12       yes   yes high    8
> temp.condind<-glm(temp.data$Freq~temp.data$scout*temp.data$SES+temp.data$deliquent*temp.data$SES, family=poisson())
> summary(temp.condind)

Call:
glm(formula = temp.data$Freq ~ temp.data$scout * temp.data$SES + 
    temp.data$deliquent * temp.data$SES, family = poisson())

Deviance Residuals: 
       1         2         3         4         5         6         7         8         9        10        11  
 0.01539  -0.03081  -0.03045   0.06067  -0.07463   0.19496   0.08449  -0.22511   0.03937  -0.20358  -0.02155  
      12  
 0.10811  

Coefficients:
                                         Estimate Std. Error z value Pr(>|z|)    
(Intercept)                               5.12871    0.07538  68.035  < 2e-16 ***
temp.data$scoutyes                       -1.36287    0.15251  -8.937  < 2e-16 ***
temp.data$SESmed                         -0.23942    0.11312  -2.117   0.0343 *  
temp.data$SEShigh                        -1.05631    0.14908  -7.086 1.38e-12 ***
temp.data$deliquentyes                   -1.38629    0.15357  -9.027  < 2e-16 ***
temp.data$scoutyes:temp.data$SESmed       1.10968    0.19573   5.669 1.43e-08 ***
temp.data$scoutyes:temp.data$SEShigh      2.57012    0.21108  12.176  < 2e-16 ***
temp.data$SESmed:temp.data$deliquentyes  -0.55118    0.23924  -2.304   0.0212 *  
temp.data$SEShigh:temp.data$deliquentyes -1.85238    0.35708  -5.188 2.13e-07 ***
---
Signif. codes:  0 Ô***Ő 0.001 Ô**Ő 0.01 Ô*Ő 0.05 Ô.Ő 0.1 Ô Ő 1 

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 739.58872  on 11  degrees of freedom
Residual deviance:   0.16233  on  3  degrees of freedom
AIC: 82.688

Number of Fisher Scoring iterations: 3

> fitted(temp.condind)
         1          2          3          4          5          6          7          8          9         10 
168.800000  42.200000  43.200000  10.800000 132.859259  19.140741 103.140741  14.859259  58.698113   2.301887 
        11         12 
196.301887   7.698113 
> 
> ### test of homogenous association model
> temp.hom<-glm(temp.data$Freq~temp.data$scout*temp.data$SES+temp.data$deliquent*temp.data$SES+temp.data$scout*temp.data$deliquent, family=poisson())
> summary(temp.hom)

Call:
glm(formula = temp.data$Freq ~ temp.data$scout * temp.data$SES + 
    temp.data$deliquent * temp.data$SES + temp.data$scout * temp.data$deliquent, 
    family = poisson())

Deviance Residuals: 
       1         2         3         4         5         6         7         8         9        10        11  
 0.02727  -0.05452  -0.05388   0.10799  -0.06037   0.15720   0.06828  -0.18270   0.04442  -0.22832  -0.02431  
      12  
 0.12214  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)                                5.12780    0.07611  67.372  < 2e-16 ***
temp.data$scoutyes                        -1.35839    0.16040  -8.469  < 2e-16 ***
temp.data$SESmed                          -0.23975    0.11325  -2.117   0.0343 *  
temp.data$SEShigh                         -1.05605    0.14914  -7.081 1.43e-12 ***
temp.data$deliquentyes                    -1.38173    0.16171  -8.545  < 2e-16 ***
temp.data$scoutyes:temp.data$SESmed        1.10803    0.19657   5.637 1.73e-08 ***
temp.data$scoutyes:temp.data$SEShigh       2.56650    0.21486  11.945  < 2e-16 ***
temp.data$SESmed:temp.data$deliquentyes   -0.54595    0.24619  -2.218   0.0266 *  
temp.data$SEShigh:temp.data$deliquentyes  -1.83965    0.38421  -4.788 1.68e-06 ***
temp.data$scoutyes:temp.data$deliquentyes -0.02252    0.25123  -0.090   0.9286    
---
Signif. codes:  0 Ô***Ő 0.001 Ô**Ő 0.01 Ô*Ő 0.05 Ô.Ő 0.1 Ô Ő 1 

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 739.58872  on 11  degrees of freedom
Residual deviance:   0.15429  on  2  degrees of freedom
AIC: 84.68

Number of Fisher Scoring iterations: 3

> fitted(temp.hom)
         1          2          3          4          5          6          7          8          9         10 
168.645710  42.354290  43.354290  10.645710 132.694789  19.305211 103.305211  14.694789  58.659501   2.340499 
        11         12 
196.340499   7.659501 
> 
> ### Here is a way to fit a logistic regression
> ### for a 2x2 table scout vs. delinquent
> ### with the glm() function in R
> ### the object now needs to be dataframe
> is.table(temp) ##check that the object is a table
[1] TRUE
> tempBD<-margin.table(temp, c(2,1))## create a two way table of interest
> counts<-cbind(tempBD[,2],tempBD[,1])
> counts
    [,1] [,2]
no    64  360
yes   33  343
> scout
[1] "no"  "yes"
> scout<-as.factor(scout)
> temp.logit<-glm(counts~scout,family=binomial("logit"))
> temp.logit

Call:  glm(formula = counts ~ scout, family = binomial("logit"))

Coefficients:
(Intercept)     scoutyes  
     -1.727       -0.614  

Degrees of Freedom: 1 Total (i.e. Null);  0 Residual
Null Deviance:	    7.613 
Residual Deviance: 2.842e-14 	AIC: 15.08