/************************************************************* Code typed in by: Laura J. Simon Date: 02 March 2008 Code originally created by: Ron Cody Date: For the SAS Global Forum 2007 Conference in Orlando, FL The examples contained in this SAS program are from Ron Cody's "An Introduction to SAS Character Functions" (Paper 217-2007). The page numbers in the program refer to the page numbers from this version of the paper: http://www2.sas.com/proceedings/forum2007/217-2007.pdf **************************************************************/ OPTIONS NODATE NONUMBER PS=58 LS=80; /* Page 1 */ /* How Lengths of Character Variables are Set in a SAS Data Step */ /* #1 */ DATA chars1; FILE print; string = 'abc'; length string $ 7; /* Does this do anything */ storage_length = lengthc(string); display = ":" || string || ":"; put storage_length= ; put display= ; RUN; /* Page 2 */ /* #2 */ DATA chars2; FILE print; length string $ 7; /* Does this do anything */ string = 'abc'; storage_length = lengthc(string); display = ":" || string || ":"; put storage_length= ; put display= ; RUN; /* Converting Multiple Blanks to a Single Blank */ /* #3 */ DATA multiple; INPUT #1 @1 name $20. #2 @1 address $30. #3 @1 city $15. @20 state $2. @25 zip $5.; name = compbl(name); address = compbl(address); city = compbl(city); DATALINES; Ron Cody 89 Lazy Brook Road Flemington NJ 08822 Bill Brown 28 Cathy Street North City NY 11518 ; RUN; PROC PRINT data = multiple NOOBS; TITLE 'The multiple data set'; ID name; VAR address city state zip; RUN; /* Page 3 */ /* How to Remove Characters from a String */ /* #4 */ DATA phone; INPUT phone $ 1-15; phone1 = compress(phone); phone2 = compress(phone,'(-) '); DATALINES; (908)235-4490 (201) 555-77 99 ; RUN; PROC PRINT data = phone NOOBS; TITLE 'The phone data set'; RUN; /* Page 4 */ /* Character Data Verification */ /* #5 */ DATA verify; INPUT @1 id $3. @5 answer $5.; position = verify(answer,'abcde'); DATALINES; 001 acbed 002 abxde 003 12cce 004 abc e ; PROC PRINT data = verify NOOBS; TITLE 'The verify data set'; RUN; /* #6 */ DATA trailing; length string $ 10; string = 'abc'; pos1 = verify(string,'abcde'); pos2 = verify(trim(string),'abcde'); RUN; PROC PRINT data = trailing NOOBS; TITLE 'The trailing data set'; RUN; /* Page 5 */ /* Substring Example */ /* #7 */ DATA pieces_parts; INPUT id $ 1-9; length state $ 2; state = substr(id,1,2); num = input(substr(id,7,3),3.); DATALINES; NYXXXX123 NJ1234567 ; RUN; PROC PRINT data = pieces_parts NOOBS; TITLE 'The pieces_parts data set'; RUN; /* Page 6 */ /* Using the SUBSTR Function on the Left-Hand Side of the Equal Sign */ /* #8 */ DATA pressure; INPUT sbp dbp @@; length sbp_chk dbp_chk $ 4; sbp_chk = put(sbp,3.); dbp_chk = put(dbp,3.); if sbp gt 160 then substr(sbp_chk,4,1) = '*'; if dbp gt 90 then substr(dbp_chk,4,1) = '*'; DATALINES; 120 80 180 92 200 110 ; RUN; PROC PRINT data = pressure NOOBS; TITLE 'The pressure data set'; RUN; /* Page 7 */ /* Unpacking a String */ /* #9 */ DATA pack; INPUT string $ 1-5; DATALINES; 12345 8 642 ; RUN; DATA unpack; SET pack; array x[5]; DO j = 1 to 5; x[j] = input(substr(string,j,1),1.); END; DROP j; RUN; PROC PRINT data = unpack NOOBS; TITLE 'The unpack data set'; RUN; /* Parsing a String */ /* #10 */ DATA parse; INPUT long_str $ 1-80; array pieces[5] $ 10 piece1-piece5; do i = 1 to 5; pieces[i] = scan(long_str,i,',.! '); end; drop long_str i; DATALINES; this line,contains!five.words abcdefghijkl xxx yyy ; RUN; PROC PRINT data = parse NOOBS; TITLE 'The parse data set'; RUN; /* Page 8 */ /* Using the SCAN function to Extract a Last Name */ /* #11 */ DATA first_last; INPUT @1 name $20. @21 phone $13.; *** The next statement extracts the last name from name; last_name = scan(name,-1,' '); /* scans from the right */ DATALINES; Jeff W. Snoker (908)782-4382 Raymond Albert (732)235-4444 Alred Edward Newman (800)123-4321 Steven J. Foster (201)567-9876 Jose Romerez (516)593-2377 ; RUN; PROC REPORT data = first_last NOWINDOWS; TITLE 'Names and Phone Numbers in Alphabetical Order (by Last Name)'; COLUMNS name phone last_name; DEFINE last_name / order noprint width = 20; DEFINE name / display 'Name' left width = 20; DEFINE phone / display 'Phone Number' width = 13 format=$13.; RUN; /* Page 9 */ /* Locating the Position of One String within Another String */ /* #12 */ DATA locate; INPUT string $ 1-10; first = index(string,'xyz'); first_c = indexc(string,'x','y','z'); DATALINES; abcxyz1234 1234567890 abcx1y2z39 abczzzxyz3 ; RUN; PROC PRINT data = locate NOOBS; TITLE 'The locate data set'; RUN; /* Changing Lower Case to Upper Case and Vice Versa */ /* Page 10 */ /* #13 */ DATA up_down; length a b c d e $ 1; INPUT a b c d e x y; DATALINES; M f P p D 1 2 m f m F M 3 4 ; RUN; DATA upper; SET up_down; array all_c[*] _character_; DO i = 1 to dim(all_c); all_c[i] = upcase(all_c[i]); END; DROP i; RUN; PROC PRINT data = upper NOOBS; TITLE 'The upper data set'; RUN; /* Converting String to Proper Case */ /* #14 */ DATA proper; INPUT Name $40.; propname = propcase(Name); DATALINES; rOn coDY the tall and the short the "%$#@!" escape ; RUN; PROC PRINT data = proper NOOBS; TITLE 'The proper data set'; RUN; /* Page 11 */ /* Substituting One Word for Another in a String */ /* #15 */ DATA convert; INPUT @1 address $20.; *** Convert Street, Avenue, and Road to their abbreviations; address = tranwrd(address,'Street','St.'); address = tranwrd(address,'Avenue','Ave.'); address = tranwrd(address,'Road','Rd.'); DATALINES; 89 Lazy Brook Road 123 River Rd. 12 Main Street ; RUN; PROC PRINT data = convert; TITLE 'The convert data set'; RUN; /* Fuzzy Merging: The SPEDIS Function */ /* Page 12 */ /* #16 */ DATA compare; length string1 string2 $ 15; INPUT string1 string2; points = spedis(string1, string2); DATALINES; same same same sam firstletter xirstletter lastletter lastlettex receipt reciept ; RUN; PROC PRINT data = compare NOOBS; TITLE 'The compare data set'; RUN; /* Demonstrating the "ANY" Functions */ /* #17 */ DATA find_alpha_digit; input string $20.; first_alpha = anyalpha(string); first_digit = anydigit(string); DATALINES; no digits here the 3 and 4 123 456 789 ; RUN; PROC PRINT data = find_alpha_digit NOOBS; TITLE 'The find_alpha_digit data set'; RUN; /* Page 13 */ /* Demonstrating the "NOT" Functions */ /* #18 */ DATA data_cleaning; input string $20.; only_alpha = notalpha(trim(string)); only_digit = notdigit(trim(string)); DATALINES; abcdefg 1234567 abc123 1234abcd ; RUN; PROC PRINT data = data_cleaning NOOBS; TITLE 'The data_cleaning data set'; RUN; /* Page 14 */ /* The New Concatenation Functions */ /* #19 */ DATA join_up; length cats $ 6 catx $ 17; string1 = 'ABC '; string2 = ' XYZ '; string3 = '12345'; cats = cats(string1, string2); catx = catx('***', string1, string2, string3); RUN; PROC PRINT data = join_up NOOBS; TITLE 'The join_up data set'; VAR string1 string2 string3 cats catx; RUN; /* The Length, Lengthn, and LengthC Functions */ /* Page 15 */ /* #20 */ DATA how_long; one = 'ABC '; two = ' '; /* character missing value */ three = 'ABC XYZ'; length_one = length(one); lengthn_one = lengthn(one); lengthc_one = lengthc(one); length_two = length(two); lengthn_two = lengthn(two); lengthc_two = lengthc(two); length_three = length(three); lengthn_three = lengthn(three); lengthc_three = lengthc(three); RUN; PROC PRINT data = how_long NOOBS; TITLE 'The how_long data set'; RUN; /* Page 16 */ /* Counting Occurrences of Characters or Substrings Using the COUNT and COUNTC Functions */ /* #21 */ DATA Dracula; INPUT string $20.; count_a_or_b = count(string,'ab'); countc_a_or_b = countc(string,'ab'); count_abc = count(string,'abc'); countc_abc = countc(string,'abc'); case_a = countc(string,'a','i'); DATALINES; xxabcxabcxxbbbb cbacba aaAA ; RUN; PROC PRINT data = Dracula NOOBS; TITLE 'The Dracula data set'; RUN;