options ls=78;
title "Confidence Intervals - Women's Nutrition Data";
/* %let allows the p variable to be used throughout the code below
* After reading in the nutrient data, where each variable is
* originally in its own column, the next statements stack the data
* so that all variable names are in one column called 'variable',
* and all response values are in another column called 'x'.
* This format is used for the calculations that follow.
*/
%let p=5;
data nutrient;
infile "D:\Statistics\STAT 505\data\nutrient.csv" firstobs=2 delimiter=',';
input id calcium iron protein a c;
variable="calcium"; x=calcium; output;
variable="iron"; x=iron; output;
variable="protein"; x=protein; output;
variable="vit a"; x=a; output;
variable="vit c"; x=c; output;
keep variable x;
run;
proc sort;
by variable;
run;
/* The means procedure calculates and saves the sample size,
* mean, and variance for each variable. It then saves these results
* in a new data set 'a' for use in the final step below.
* /
proc means noprint;
by variable;
var x;
output out=a n=n mean=xbar var=s2;
run;
/* The data step here is used to calculate the confidence interval
* limits from the statistics calculated in the data set 'a'.
* The values 't1', 'tb', and 'f' are the critical values used in the
* one-at-a-time, Bonferroni, and F intervals, respectively.
* /
data b;
set a;
t1=tinv(1-0.025,n-1);
tb=tinv(1-0.025/&p,n-1);
f=finv(0.95,&p,n-&p);
loone=xbar-t1*sqrt(s2/n);
upone=xbar+t1*sqrt(s2/n);
losim=xbar-sqrt(&p*(n-1)*f*s2/(n-&p)/n);
upsim=xbar+sqrt(&p*(n-1)*f*s2/(n-&p)/n);
lobon=xbar-tb*sqrt(s2/n);
upbon=xbar+tb*sqrt(s2/n);
run;
proc print data=b;
run;