options ls=78; title 'Cluster Analysis - Woodyard Hammock - Complete Linkage'; /* After reading in the data, an ident variable is created as the * row number for each observation. This is neeed for the clustering algorithm. * The drop statement removes several variables not used for this analysis. */ data wood; infile 'D:\Statistics\STAT 505\data\wood.csv' firstobs=2 delimiter=','; input x y acerub carcar carcor cargla cercan corflo faggra frapen ileopa liqsty lirtul maggra magvir morrub nyssyl osmame ostvir oxyarb pingla pintae pruser quealb quehem quenig quemic queshu quevir symtin ulmala araspi cyrrac; ident=_n_; drop acerub carcor cargla cercan frapen lirtul magvir morrub osmame pintae pruser quealb quehem queshu quevir ulmala araspi cyrrac; run; /* The observations are sorted by their ident value. */ proc sort data=wood; by ident; run; /* The cluster procedure is for hierarchical clustering. * The method option specifies the cluster distance formula to use. * The outtree option saves the results. */ proc cluster data=wood method=simple outtree=clust1; var carcar corflo faggra ileopa liqsty maggra nyssyl ostvir oxyarb pingla quenig quemic symtin; id ident; run; /* The tree procedure generates a dendrogram of the heirarchical * clustering results and saves cluster label assignments if the * nclusters option is also specified. */ proc tree data=clust1 horizontal nclusters=6 out=clust2; id ident; run; /* The data are sorted by their ident value. */ proc sort data=clust2; by ident; run; /* The results from clust2 are printed. */ proc print data=clust2; run; /* This step combines the original wood data set with * the results of clust2, which allows the ANOVA statistics * to be calculated in the following glm procedure. */ data combine; merge wood clust2; by ident; run; /* The glm procedure views the cluster labels as ANOVA groups and * reports several statistics to assess variation between clusters * relative to variation within clusters. * The mean for each cluster is also reported. */ proc glm data=combine; class cluster; model carcar corflo faggra ileopa liqsty maggra nyssyl ostvir oxyarb pingla quenig quemic symtin = cluster; means cluster; run; proc cluster data=wood method=simple outtree=clust1 plots=dendrogram(vertical) nonorm; run;