options ls=78;
title "2-Sample Hotellings T2 - Swiss Bank Notes (unequal variances)";

data swiss;
  infile "D:\Statistics\STAT 505\data\swiss3.csv" firstobs=2 delimiter=',';
  input type $ length left right bottom top diag;
  run;

 /* The iml code below defines and executes the 'hotel2m' module 
  * for calculating the two-sample Hotelling T2 test statistic,
  * where here the sample covariances are not pooled.
  * The commands between 'start' and 'finish' define the 
  * calculations of the module for two input vectors 'x1' and 'x2',
  * which have the same variables but correspond to two separate groups.
  * Note that s1 and s2 are not pooled in these calculations, and the
  * resulting degrees of freedom are considerably more involved.
  * The 'use' statement makes the 'swiss' data set available, from 
  * which all the variables are taken. The variables are then read 
  * separately into the vectors 'x1' and 'x2' for each group, and
  * finally the 'hotel2' module is called.
  */

proc iml;
  start hotel2m;
    n1=nrow(x1);
    n2=nrow(x2);
    k=ncol(x1);
    one1=j(n1,1,1);
    one2=j(n2,1,1);
    ident1=i(n1);
    ident2=i(n2);
    ybar1=x1`*one1/n1;
    s1=x1`*(ident1-one1*one1`/n1)*x1/(n1-1.0);
    print n1 ybar1;
    print s1;
    ybar2=x2`*one2/n2;
    s2=x2`*(ident2-one2*one2`/n2)*x2/(n2-1.0);
    st=s1/n1+s2/n2;
    print n2 ybar2;
    print s2;
    t2=(ybar1-ybar2)`*inv(st)*(ybar1-ybar2);
    df1=k;
    p=1-probchi(t2,df1);
    print t2 df1 p;
    f=(n1+n2-k-1)*t2/k/(n1+n2-2);
    temp=((ybar1-ybar2)`*inv(st)*(s1/n1)*inv(st)*(ybar1-ybar2)/t2)**2/(n1-1);
    temp=temp+((ybar1-ybar2)`*inv(st)*(s2/n2)*inv(st)*(ybar1-ybar2)/t2)**2/(n2-1);
    df2=1/temp;
    p=1-probf(f,df1,df2);
    print f df1 df2 p;
  finish;
  use swiss;
    read all var{length left right bottom top diag} where (type="real") into x1;
    read all var{length left right bottom top diag} where (type="fake") into x2;
  run hotel2m;