library(tidyr) library(VGAM) library(MASS) data(housing) # see also https://rdrr.io/cran/MASS/man/housing.html # baseline category logit model for nominal response # unstacking the satisfaction response housing = spread(housing, Sat, Freq) # default baseline in R is last category # all two-way interactions fit.full = vglm(cbind(Low,High,Medium) ~ (Infl+Type+Cont)^2, family=multinomial, data=housing) summary(fit.full) # deviance test for lack of fit g2 = deviance(fit.full) df = df.residual(fit.full) 1 - pchisq(g2, df) # pearson test for lack of fit e = residuals(fit.full, type='pearson') x2 = sum(e^2) 1 - pchisq(x2, df) # significance tests based on LR statistics anova(fit.full, type=3, test="LR") # additive model fit.add = vglm(cbind(Low,High,Medium) ~ Infl+Type+Cont, family=multinomial, data=housing) summary(fit.add) deviance(fit.add) exp(coef(fit.add)) exp(confint(fit.add)) anova(fit.add, type=3, test="LR") e.add = residuals(fit.add, type='pearson') x2.add = sum(e.add^2) df.add = df.residual(fit.add) 1 - pchisq(x2.add, df.add) # type and cont only fit.tc = vglm(cbind(Low,High,Medium) ~ Type+Cont, family=multinomial, data=housing) deviance(fit.tc) # infl and cont only fit.ic = vglm(cbind(Low,High,Medium) ~ Infl+Cont, family=multinomial, data=housing) deviance(fit.ic) # infl and type only fit.it = vglm(cbind(Low,High,Medium) ~ Infl+Type, family=multinomial, data=housing) deviance(fit.it) # intercept only fit0 = vglm(cbind(Low,High,Medium) ~ 1, family=multinomial, data=housing) deviance(fit0) # stepwise algorithm using AIC step4vglm(fit.full, direction='backward') ######################### ordinal treatment # ordinal, not proportional odds # additive fit.add.o = vglm(cbind(Low,Medium,High) ~ Infl+Type+Cont, family=cumulative(parallel=FALSE), data=housing) # deviance test for lack of fit g2.add.o = deviance(fit.add.o) df.add.o = df.residual(fit.add.o) 1 - pchisq(g2.add.o, df.add.o) # ordinal, proportional odds fit.add.op = vglm(cbind(Low,Medium,High) ~ Infl+Type+Cont, family=cumulative(parallel=TRUE), data=housing) # testing proportional odds assumption g2.prop = 2*(logLik(fit.add.o) - logLik(fit.add.op)) df.prop = df.residual(fit.add.op) - df.residual(fit.add.o) 1 - pchisq(g2.prop, df.prop) # significance of predictors anova(fit.add.op, type=3, test="LR") summary(fit.add.op) exp(coef(fit.add.op)) exp(confint(fit.add.op))