options ls=78;
title "PCA - Covariance Matrix - Places Rated";
/* After reading in the places data, the (base 10) log transformations are taken.
* This is an optional step and not required for the pca analysis.
*/
data places;
infile "D:\Statistics\STAT 505\data\places.csv" firstobs=2 delimiter=',';
input climate housing health crime trans educate arts recreate econ id;
climate=log10(climate);
housing=log10(housing);
health=log10(health);
crime=log10(crime);
trans=log10(trans);
educate=log10(educate);
arts=log10(arts);
recreate=log10(recreate);
econ=log10(econ);
run;
/* The princomp procedure performs pca on the places data.
* The cov option specifies results are calculated from the covariance
* matrix, instead of the default correlation matrix.
* The out=a option saves results to a data set named 'a'.
*/
proc princomp data=places cov out=a;
var climate housing health crime trans educate arts recreate econ;
run;
/* The corr procedure is used to calculate pairwise correlations
* between the first 3 principal components and the original variables.
*/
proc corr data=a;
var prin1 prin2 prin3 climate housing health crime trans educate arts
recreate econ;
run;
/* The gplot procedure is used to plot the first 2 principal components.
* axis1 and axis2 options set the plotting window size,
* and these are then set to vertical and horizontal axes, respectively.
*/
proc gplot data=a;
axis1 length=5 in;
axis2 length=5 in;
plot prin2*prin1 / vaxis=axis1 haxis=axis2;
run;