options ls=78;
title "Confidence Intervals - Swiss Bank Notes";
/* %let allows the p variable to be used throughout the code below
*/
%let p=6;
data swiss;
infile "D:\Statistics\STAT 505\data\swiss3.csv" firstobs=2 delimiter=',';
input type $ length left right bottom top diag;
run;
/* A new data set named 'real' is created, consisting
* of only the real notes. This is used for calculation
* of the statistics needed for the last step.
* Also, where each variable is originally in its own column,
* these commands stack the data so that all variable names
* are in one column called 'variable', and all response values
* are in another column called 'x'.
*/
data real;
set swiss;
if type="real";
variable="length"; x=length; output;
variable="left"; x=left; output;
variable="right"; x=right; output;
variable="bottom"; x=bottom; output;
variable="top"; x=top; output;
variable="diagonal"; x=diag; output;
keep type variable x;
run;
proc sort;
by variable;
run;
/* The means procedure calculates and saves the sample size,
* mean, and variance for each variable. It then saves these results
* in a new data set 'pop1', corresponding to the real notes.
* /
proc means data=real noprint;
by variable;
id type;
var x;
output out=pop1 n=n1 mean=xbar1 var=s21;
/* A new data set named 'fake' is created, consisting
* of only the fake notes. This is used for calculation
* of the statistics needed for the last step.
* Also, where each variable is originally in its own column,
* these commands stack the data so that all variable names
* are in one column called 'variable', and all response values
* are in another column called 'x'.
*/
data fake;
set swiss;
if type="fake";
variable="length"; x=length; output;
variable="left"; x=left; output;
variable="right"; x=right; output;
variable="bottom"; x=bottom; output;
variable="top"; x=top; output;
variable="diagonal"; x=diag; output;
keep type variable x;
run;
proc sort;
by variable;
run;
/* The means procedure calculates and saves the sample size,
* mean, and variance for each variable. It then saves these results
* in a new data set 'pop2', corresponding to the fake notes.
* /
proc means data=fake noprint;
by variable;
id type;
var x;
output out=pop2 n=n2 mean=xbar2 var=s22;
/* This last step combines the two separate data sets to one
* and computes the 95% simultaneous confidence interval limits
* from the statistics calculated previously.
* The variances are pooled from both the real and the fake samples.
*/
data combine;
merge pop1 pop2;
by variable;
f=finv(0.95,&p,n1+n2-&p-1);
t=tinv(1-0.025/&p,n1+n2-2);
sp=((n1-1)*s21+(n2-1)*s22)/(n1+n2-2);
losim=xbar1-xbar2-sqrt(&p*(n1+n2-2)*f*(1/n1+1/n2)*sp/(n1+n2-&p-1));
upsim=xbar1-xbar2+sqrt(&p*(n1+n2-2)*f*(1/n1+1/n2)*sp/(n1+n2-&p-1));
lobon=xbar1-xbar2-t*sqrt((1/n1+1/n2)*sp);
upbon=xbar1-xbar2+t*sqrt((1/n1+1/n2)*sp);
run;
proc print data=combine;
run;