options ls=78;
title "Discriminant - Swiss Bank Notes";
data swiss;
infile "D:\Statistics\STAT 505\data\swiss3.csv" firstobs=2 delimiter=',';
input type $ length left right bottom top diag;
run;
/* A new data set called 'test' is created to store any new
* values to be classified with our discriminant rule.
* The variables must match the quantitative ones in the training set.
*/
data test;
input length left right bottom top diag;
cards;
214.9 130.1 129.9 9 10.6 140.5
; run;
run;
/* The pool option conducts a test of equal covariance matrices.
* If the results of the test are insignificant (at the 0.10 level), the
* sample covariance matrices are pooled, resulting in a linear discriminant
* function; otherwise, the sample covariance matrices are not pooled,
* resulting in a quadratic discriminant function.
* The crossvalidate option calculates the confusion matrix based on
* the holdout method, where each obs is classified from the other obs only.
* The testdata= option specifies the data set with obs to be classified.
* The testout= option specifies the name of the data set where classification
* results are stored.
* The class statement specifies the variable with groups for classification.
* The var statement specifies the quantitative variables used to estimate
* the mean and covariance matrices of the groups.
*/
proc discrim data=swiss pool=test crossvalidate testdata=test testout=a;
class type;
var length left right bottom top diag;
priors "real"=0.99 "fake"=0.01;
run;
/* This will print the results of the classifications of the obs
* from the 'test' data set.
*/
proc print data=a;
run;