options ls=78; title "Confidence Intervals - Swiss Bank Notes"; /* %let allows the p variable to be used throughout the code below */ %let p=6; data swiss; infile "D:\Statistics\STAT 505\data\swiss3.csv" firstobs=2 delimiter=','; input type $ length left right bottom top diag; run; /* A new data set named 'real' is created, consisting * of only the real notes. This is used for calculation * of the statistics needed for the last step. * Also, where each variable is originally in its own column, * these commands stack the data so that all variable names * are in one column called 'variable', and all response values * are in another column called 'x'. */ data real; set swiss; if type="real"; variable="length"; x=length; output; variable="left"; x=left; output; variable="right"; x=right; output; variable="bottom"; x=bottom; output; variable="top"; x=top; output; variable="diagonal"; x=diag; output; keep type variable x; run; proc sort; by variable; run; /*The means procedure calculates and saves the sample size, * mean, and variance for each variable. It then saves these results * in a new data set 'pop1', corresponding to the real notes. */ proc means data=real noprint; by variable; id type; var x; output out=pop1 n=n1 mean=xbar1 var=s21; /* A new data set named 'fake' is created, consisting * of only the fake notes. This is used for calculation * of the statistics needed for the last step. * Also, where each variable is originally in its own column, * these commands stack the data so that all variable names * are in one column called 'variable', and all response values * are in another column called 'x'. */ data fake; set swiss; if type="fake"; variable="length"; x=length; output; variable="left"; x=left; output; variable="right"; x=right; output; variable="bottom"; x=bottom; output; variable="top"; x=top; output; variable="diagonal"; x=diag; output; keep type variable x; run; proc sort; by variable; run; /* The means procedure calculates and saves the sample size, * mean, and variance for each variable. It then saves these results * in a new data set 'pop2', corresponding to the fake notes. */ proc means data=fake noprint; by variable; id type; var x; output out=pop2 n=n2 mean=xbar2 var=s22; /* This last step combines the two separate data sets to one * and computes the 95% simultaneous confidence interval limits * from the statistics calculated previously. * The variances are pooled from both the real and the fake samples. */ data combine; merge pop1 pop2; by variable; f=finv(0.95,&p,n1+n2-&p-1); t=tinv(1-0.025/&p,n1+n2-2); sp=((n1-1)*s21+(n2-1)*s22)/(n1+n2-2); losim=xbar1-xbar2-sqrt(&p*(n1+n2-2)*f*(1/n1+1/n2)*sp/(n1+n2-&p-1)); upsim=xbar1-xbar2+sqrt(&p*(n1+n2-2)*f*(1/n1+1/n2)*sp/(n1+n2-&p-1)); lobon=xbar1-xbar2-t*sqrt((1/n1+1/n2)*sp); upbon=xbar1-xbar2+t*sqrt((1/n1+1/n2)*sp); run; proc print data=combine; run;