options ls=78;
title "Hotellings T2 - Women's Nutrition Data";
data nutrient;
infile "D:\Statistics\STAT 505\data\nutrient.csv" firstobs=2 delimiter=',';
input id calcium iron protein a c;
run;
/* iml code to compute the hotelling t2 statistic
* hotel is the name of the module we define here
* mu0 is the null vector
* one is a vector of 1s
* ident is the identity matrix
* ybar is the vector of sample means
* s is the sample covariance matrix
* t2 is the squared statistical distance between ybar and mu0
* f is the final form of the t2 statistic after scaling
* to have an f-distribution
* the module definition is ended with the 'finish' statement
* use nutrient makes the data set 'nutrient' available
* the variables from nutrient are input to x and hotel module is called
*/
proc iml;
start hotel;
mu0={1000, 15, 60, 800, 75};
one=j(nrow(x),1,1);
ident=i(nrow(x));
ybar=x`*one/nrow(x);
s=x`*(ident-one*one`/nrow(x))*x/(nrow(x)-1.0);
print mu0 ybar;
print s;
t2=nrow(x)*(ybar-mu0)`*inv(s)*(ybar-mu0);
f=(nrow(x)-ncol(x))*t2/ncol(x)/(nrow(x)-1);
df1=ncol(x);
df2=nrow(x)-ncol(x);
p=1-probf(f,df1,df2);
print t2 f df1 df2 p;
finish;
use nutrient;
read all var{calcium iron protein a c} into x;
run hotel;