#######################################################################
##### Penn State student drinking example for Binomial likelihood and loglikelihood functions
##### Estimate of the MLE
##### Test for 1-sample proportion based on the normal approximation
##### Test for 1-sample proportion based on Likelihood Ratio Test
##### R code that corresponds to the SAS code
######################################################################


########## Likelihood & Loglikelihood ######################################
##### Define the likelihood function for binomial sample with N=1315 and X=630:

likelhd = function(p) dbinom(630,1315,p)
loglik = function(p) dbinom(630,1315,p, log=TRUE)

##### Plot the likelihood function and save it into a file:
##### If you do not want it to be saved into a file directly, comment out the
##### postscript() and dev.off() lines

postscript("drinkingLik.ps")
plot(likelhd,0,1,xlab="pi",ylab="l(p)",main="Binomial likelihood, N=1315, X=630")
dev.off()

postscript("drinkingLogLik.ps")
plot(loglik,0,1,xlab="pi",ylab="l(p)",main="Binomial loglikelihood, N=1315, X=630")
dev.off()

############# MLE ######################################
##### Find the MLE (for multiparameter problems, we would use the nlm() function)
##### $maximum is the value of the MLE

mle=optimize(likelhd,c(0,1),maximum=TRUE)
mle
mle=mle$maximum

######### 1-proportion Hypothesis test, Approximate 95% confidence interval, and MLE #####
##### H0: p=0.5 vs. Ha: p is not equal 0.5 

prop.test(630, 1315, 0.5)

### when the sample size is small, we can do an exact test via:

binom.test(630,1315,0.5)

##### Now the 1-proportion Hypothesis test and the 95% CI based on the LR test (statistic, and 95% CI)
##### calculating Likelihood Ratio Statistics and testing for it's significance 

LRstats=2*(loglik(0.48)-loglik(0.5))
LRstats

##### p-value from chisq distribution with degrees of freedom=1 

pvalue<-1-pchisq(LRstats, 1)
pvalue

###### Likelihood ratio based 95% CI #####
### First compute the horizontal cutoff, and plot it on the loglikelhood

cutoff=loglik(0.48)-1.92
cutoff

postscript("drinkingCI.ps")
plot(loglik,0,1,xlab="pi",ylab="l(p)",main="Binomial loglikelihood, N=1315, X=630", ylim=c(-60,-3), xlim=c(0.3,0.65))
abline(h=cutoff)

### compute where the cutoff line intersects the loglikelihood 
### and plot the vertical lines at those points

loglik.optim=function(p){abs(cutoff-loglik(p))}

min1=optimize(loglik.optim, c(0,mle))
min2=optimize(loglik.optim, c(mle,1))

min1
min2

abline(v=min1$minimum)
abline(v=min2$minimum)

#abline(v=0.453)
#abline(v=0.506)
dev.off()

##################################################
##### Here is R code version of this problem 
####  that corresponds to the SAS code and its output 
#################################################

drinking=c(rep("high risk",630),rep("low risk",685))

##### Freq Procedure

Percentage=100*as.vector(table(drinking))/sum(table(drinking))
CumulativeFrequency=cumsum(c(630,685))
CumulativePercentage=cumsum(Percentage)
Freq=data.frame(table(drinking),Percentage,CumulativeFrequency,CumulativePercentage,row.names=NULL)
row.names(Freq)=NULL
Freq

##### asymptotic standard error

phat=630/(630+685)
p0=0.5
n=630+685
ASE=sqrt(phat*(1-phat)/n)
ASE

##### The asymptotic test statistic Z

Z=(phat-0.5)/sqrt(p0*(1-p0)/n)
Z

##### One-sided Pr <  Z

pnorm(Z)

##### Two-sided Pr > |Z|

2*pnorm(Z) 

##### Using the normal approximation, calculate the asymptotic confidence interval

CI_upper=phat+qnorm(0.025,lower.tail=F)*ASE
CI_lower=phat-qnorm(0.025,lower.tail=F)*ASE
CI=cbind(CI_lower,CI_upper)
CI 

##### The approximate confidence interval can also be calculated by prop.test()

prop.test(630,630+685,p=630/(630+685))

##### Apply the exact binomial test to test H0:p=0.5 for high-risk drinkers

binom.test(c(630,685),p=0.5)