options ls=78;
title "Profile Plot - Women's Nutrition Data";
/* %let allows the p variable to be used throughout the code below
* After reading in the nutrient data, where each variable is
* originally in its own column, the next statements stack the data
* so that all variable names are in one column called 'variable',
* and all response values divided by their null values
* are in another column called 'ratio'.
* This format is used for the calculations that follow, as well
* as for the profile plot.
*/
%let p=5;
data nutrient;
infile "D:\Statistics\STAT 505\data\nutrient.csv" firstobs=2 delimiter=',';
input id calcium iron protein a c;
variable="calcium"; ratio=calcium/1000; output;
variable="iron"; ratio=iron/15; output;
variable="protein"; ratio=protein/60; output;
variable="vit a"; ratio=a/800; output;
variable="vit c"; ratio=c/75; output;
keep variable ratio;
run;
proc sort;
by variable;
run;
/* The means procedure calculates and saves the sample size,
* mean, and variance for each variable. It then saves these results
* in a new data set 'a' for use in the steps below.
* /
proc means;
by variable;
var ratio;
output out=a n=n mean=xbar var=s2;
run;
/* The data step here is used to calculate the simultaneous
* confidence intervals based on the F-multiplier.
* Three values are saved for the plot: the ratio itself and
* both endpoints, lower and upper, of the confidence interval.
* /
data b;
set a;
f=finv(0.95,&p,n-&p);
ratio=xbar; output;
ratio=xbar-sqrt(&p*(n-1)*f*s2/(n-&p)/n); output;
ratio=xbar+sqrt(&p*(n-1)*f*s2/(n-&p)/n); output;
run;
/* The axis commands define the size of the plotting window.
* The horizontal axis is of the variables, and the vertical
* axis is used for the confidence limits.
* The reference line of 1 corresponds to the null value of the
* ratio for each variable.
* /
proc gplot;
axis1 length=4 in;
axis2 length=6 in;
plot ratio*variable / vaxis=axis1 haxis=axis2 vref=1 lvref=21;
symbol v=none i=hilot color=black;
run;