options ls=78; title "PCA - Covariance Matrix - Places Rated"; /* After reading in the places data, the (base 10) log transformations are taken. * This is an optional step and not required for the pca analysis. */ data places; infile "D:\Statistics\STAT 505\data\places.csv" firstobs=2 delimiter=','; input climate housing health crime trans educate arts recreate econ id; climate=log10(climate); housing=log10(housing); health=log10(health); crime=log10(crime); trans=log10(trans); educate=log10(educate); arts=log10(arts); recreate=log10(recreate); econ=log10(econ); run; /* The princomp procedure performs pca on the places data. * The cov option specifies results are calculated from the covariance * matrix, instead of the default correlation matrix. * The out=a option saves results to a data set named 'a'. */ proc princomp data=places cov out=a; var climate housing health crime trans educate arts recreate econ; run; /* The corr procedure is used to calculate pairwise correlations * between the first 3 principal components and the original variables. */ proc corr data=a; var prin1 prin2 prin3 climate housing health crime trans educate arts recreate econ; run; /* The gplot procedure is used to plot the first 2 principal components. * axis1 and axis2 options set the plotting window size, * and these are then set to vertical and horizontal axes, respectively. */ proc gplot data=a; axis1 length=5 in; axis2 length=5 in; plot prin2*prin1 / vaxis=axis1 haxis=axis2; run;