options ls=78;
title "Discriminant Analysis - Insect Data";
data insect;
infile "D:\Statistics\STAT 505\data\insect.csv" firstobs=2 delimiter=',';
input species $ joint1 joint2 aedeagus;
run;
/* A new data set called 'test' is created to store any new
* values to be classified with our discriminant rule.
* The variables must match the quantitative ones in the training set.
*/
data test;
input joint1 joint2 aedeagus;
cards;
194 124 49
; run;
/* The pool option conducts a test of equal covariance matrices.
* If the results of the test are insignificant (at the 0.10 level), the
* sample covariance matrices are pooled, resulting in a linear discriminant
* function; otherwise, the sample covariance matrices are not pooled,
* resulting in a quadratic discriminant function.
* The crossvalidate option calculates the confusion matrix based on
* the holdout method, where each obs is classified from the other obs only.
* The testdata= option specifies the data set with obs to be classified.
* The testout= option specifies the name of the data set where classification
* results are stored.
* The class statement specifies the variable with groups for classification.
* The var statement specifies the quantitative variables used to estimate
* the mean and covariance matrices of the groups.
*/
proc discrim data=insect pool=test crossvalidate testdata=test testout=a;
class species;
var joint1 joint2 aedeagus;
run;
/* This will print the results of the classifications of the obs
* from the 'test' data set.
*/
proc print data=a;
run;