/*************************************************************
 Code typed in by: Laura J. Simon
 Date: 02 March 2008

 Code originally created by: Ron Cody
 Date: For the SAS Global Forum 2007 Conference in Orlando, FL

 The examples contained in this SAS program are from Ron Cody's
 "An Introduction to SAS Character Functions" (Paper 217-2007).
 The page numbers in the program refer to the page numbers from
 this version of the paper:

 http://www2.sas.com/proceedings/forum2007/217-2007.pdf
 **************************************************************/

OPTIONS NODATE NONUMBER PS=58 LS=80;

/* Page 1 */
/* How Lengths of Character Variables are Set in a SAS Data Step */
/* #1 */
DATA chars1;
    FILE print;
	string = 'abc';
	length string $ 7;  /* Does this do anything */
	storage_length = lengthc(string);
	display = ":" || string || ":";
	put storage_length= ;
	put display= ;
RUN;

/* Page 2 */
/* #2 */
DATA chars2;
    FILE print;
	length string $ 7;  /* Does this do anything */
	string = 'abc';
	storage_length = lengthc(string);
	display = ":" || string || ":";
	put storage_length= ;
	put display= ;
RUN;

/* Converting Multiple Blanks to a Single Blank */
/* #3 */
DATA multiple;
    INPUT #1 @1  name    $20.
          #2 @1  address $30.
		  #3 @1  city    $15.
             @20 state    $2.
             @25 zip      $5.;
    name = compbl(name);
	address = compbl(address);
	city = compbl(city);
DATALINES;
Ron Cody
89 Lazy Brook Road
Flemington         NJ    08822
Bill     Brown
28   Cathy   Street
North   City       NY    11518
;
RUN;

PROC PRINT data = multiple NOOBS;
    TITLE 'The multiple data set';
    ID name;
	VAR address city state zip;
RUN;

/* Page 3 */
/* How to Remove Characters from a String */
/* #4 */
DATA phone;
    INPUT phone $ 1-15;
	phone1 = compress(phone);
	phone2 = compress(phone,'(-) ');
DATALINES;
(908)235-4490
(201) 555-77 99
;
RUN;

PROC PRINT data = phone NOOBS;
    TITLE 'The phone data set';
RUN;

/* Page 4 */
/* Character Data Verification */
/* #5 */
DATA verify;
    INPUT @1  id $3.
          @5  answer $5.;
    position = verify(answer,'abcde');
DATALINES;
001 acbed
002 abxde
003 12cce
004 abc e
;

PROC PRINT data = verify NOOBS;
    TITLE 'The verify data set';
RUN;

/* #6 */
DATA trailing;
    length string $ 10;
	string = 'abc';
	pos1 = verify(string,'abcde');
    pos2 = verify(trim(string),'abcde');
RUN;

PROC PRINT data = trailing NOOBS;
    TITLE 'The trailing data set';
RUN;

/* Page 5 */
/* Substring Example */
/* #7 */
DATA pieces_parts;
    INPUT id $ 1-9;
	length state $ 2;
	state = substr(id,1,2);
    num = input(substr(id,7,3),3.);
DATALINES;
NYXXXX123
NJ1234567
;
RUN;

PROC PRINT data = pieces_parts NOOBS;
    TITLE 'The pieces_parts data set';
RUN;

/* Page 6 */
/* Using the SUBSTR Function on the Left-Hand Side of the Equal Sign */
/* #8 */
DATA pressure;
     INPUT sbp dbp @@;
	 length sbp_chk dbp_chk $ 4;
	 sbp_chk = put(sbp,3.);
     dbp_chk = put(dbp,3.);
	 if sbp gt 160 then substr(sbp_chk,4,1) = '*';
	 if dbp gt  90 then substr(dbp_chk,4,1) = '*';
DATALINES;
120 80 180 92 200 110
;
RUN;

PROC PRINT data = pressure NOOBS;
    TITLE 'The pressure data set';
RUN;

/* Page 7 */
/* Unpacking a String */
/* #9 */
DATA pack;
     INPUT string $ 1-5;
DATALINES;
12345
8 642
;
RUN;

DATA unpack;
    SET pack;
	array x[5];
	DO j = 1 to 5;
	    x[j] = input(substr(string,j,1),1.);
	END;
	DROP j;
RUN;

PROC PRINT data = unpack NOOBS;
    TITLE 'The unpack data set';
RUN;

/* Parsing a String */
/* #10 */
DATA parse;
     INPUT long_str $ 1-80;
	 array pieces[5] $ 10 piece1-piece5;
	 do i = 1 to 5;
	     pieces[i] = scan(long_str,i,',.! ');
     end;
     drop long_str i;
DATALINES;
this line,contains!five.words
abcdefghijkl xxx yyy
;
RUN;

PROC PRINT data = parse NOOBS;
    TITLE 'The parse data set';
RUN;

/* Page 8 */
/* Using the SCAN function to Extract a Last Name */
/* #11 */
DATA first_last;
     INPUT @1  name  $20.
	       @21 phone $13.;
     *** The next statement extracts the last name from name;
	 last_name = scan(name,-1,' ');    /* scans from the right */
DATALINES;
Jeff W. Snoker        (908)782-4382
Raymond Albert        (732)235-4444
Alred Edward Newman   (800)123-4321
Steven J. Foster      (201)567-9876
Jose Romerez          (516)593-2377
;
RUN;

PROC REPORT data = first_last NOWINDOWS;
    TITLE 'Names and Phone Numbers in Alphabetical Order (by Last Name)';
    COLUMNS name phone last_name;
	DEFINE last_name / order noprint width = 20;
	DEFINE name      / display 'Name' left width = 20;
	DEFINE phone     / display 'Phone Number' width = 13 format=$13.;
RUN;

/* Page 9 */
/* Locating the Position of One String within Another String */
/* #12 */
DATA locate;
    INPUT string $ 1-10;
	first   = index(string,'xyz');
	first_c = indexc(string,'x','y','z');
DATALINES;
abcxyz1234
1234567890
abcx1y2z39
abczzzxyz3
;
RUN;

PROC PRINT data = locate NOOBS;
    TITLE 'The locate data set';
RUN;

/* Changing Lower Case to Upper Case and Vice Versa */
/* Page 10 */
/* #13 */
DATA up_down;
     length a b c d e $ 1;
     INPUT a b c d e x y;
DATALINES;
M f P p D 1 2
m f m F M 3 4
;
RUN;

DATA upper;
    SET up_down;
	array all_c[*] _character_;
	DO i = 1 to dim(all_c);
	    all_c[i] = upcase(all_c[i]);
	END;
	DROP i;
RUN;

PROC PRINT data = upper NOOBS;
    TITLE 'The upper data set';
RUN;

/* Converting String to Proper Case */
/* #14 */
DATA proper;
     INPUT Name $40.;
	 propname = propcase(Name);
DATALINES;
rOn coDY
the tall and the short
the "%$#@!" escape
;
RUN;

PROC PRINT data = proper NOOBS;
    TITLE 'The proper data set';
RUN;

/* Page 11 */
/* Substituting One Word for Another in a String */
/* #15 */
DATA convert;
     INPUT @1 address $20.;
	 *** Convert Street, Avenue, and Road 
	     to their abbreviations;
	 address = tranwrd(address,'Street','St.');
	 address = tranwrd(address,'Avenue','Ave.');
	 address = tranwrd(address,'Road','Rd.');
DATALINES;
89 Lazy Brook Road
123 River Rd.
12 Main Street
;
RUN;

PROC PRINT data = convert;
    TITLE 'The convert data set';
RUN;

/* Fuzzy Merging: The SPEDIS Function */
/* Page 12 */
/* #16 */
DATA compare;
     length string1 string2 $ 15;
	 INPUT string1 string2;
	 points = spedis(string1, string2);
DATALINES;
same same
same sam
firstletter xirstletter
lastletter lastlettex
receipt reciept
;
RUN;

PROC PRINT data = compare NOOBS;
    TITLE 'The compare data set';
RUN;

/* Demonstrating the "ANY" Functions */
/* #17 */
DATA find_alpha_digit;
     input string $20.;
	 first_alpha = anyalpha(string);
     first_digit = anydigit(string);
DATALINES;
no digits here
the 3 and 4
123 456 789
;
RUN;

PROC PRINT data = find_alpha_digit NOOBS;
    TITLE 'The find_alpha_digit data set';
RUN;

/* Page 13 */
/* Demonstrating the "NOT" Functions */
/* #18 */
DATA data_cleaning;
     input string $20.;
	 only_alpha = notalpha(trim(string));
     only_digit = notdigit(trim(string));
DATALINES;
abcdefg
1234567
abc123
1234abcd
;
RUN;

PROC PRINT data = data_cleaning NOOBS;
    TITLE 'The data_cleaning data set';
RUN;

/* Page 14 */
/* The New Concatenation Functions */
/* #19 */
DATA join_up;
     length cats $ 6 catx $ 17;
	 string1 = 'ABC   ';
	 string2 = '   XYZ   ';
	 string3 = '12345';
	 cats = cats(string1, string2);
	 catx = catx('***', string1, string2, string3);
RUN;

PROC PRINT data = join_up NOOBS;
    TITLE 'The join_up data set';
	VAR string1 string2 string3 cats catx;
RUN;

/* The Length, Lengthn, and LengthC Functions */
/* Page 15 */
/* #20 */
DATA how_long;
   one = 'ABC   ';
   two = ' ';   /* character missing value */
   three = 'ABC   XYZ';
   length_one = length(one);
   lengthn_one = lengthn(one);
   lengthc_one = lengthc(one);
   length_two = length(two);
   lengthn_two = lengthn(two);
   lengthc_two = lengthc(two);
   length_three = length(three);
   lengthn_three = lengthn(three);
   lengthc_three = lengthc(three);
RUN;

PROC PRINT data = how_long NOOBS;
    TITLE 'The how_long data set';
RUN;

/* Page 16 */
/* Counting Occurrences of Characters or Substrings Using the
   COUNT and COUNTC Functions */
/* #21 */
DATA Dracula;
    INPUT string $20.;
	count_a_or_b = count(string,'ab');
	countc_a_or_b = countc(string,'ab');
	count_abc = count(string,'abc');
	countc_abc = countc(string,'abc');
	case_a = countc(string,'a','i');
DATALINES;
xxabcxabcxxbbbb
cbacba
aaAA
;
RUN;

PROC PRINT data = Dracula NOOBS;
    TITLE 'The Dracula data set';
RUN;