/* dtchkf.sas Daniel Brockman 070728 Check dates */ %macro dtchkf (iset,oset,exset,Dv,yr,mo,dy) ; /* * dtchkf * 1. prints an error message in the log for each invalid * date detected in iset. * 2. replaces the value of Dv with . (missing) in oset. * 3. writes copy of bad obs from iset to data set exset. * * iset = name of data set to consider. * oset = name of modified data set. * exset = name of exception data set containing bad observations. * Dv = The date variable in iset. dtchkf doesn't attempt to * interpret the format of Dv or read its value. However, * dtchkf will mark it missing if vars yr, mo, dy imply * an invalid date. * yr = name of year variable stored in iset. * The variable contains an integer between 1 and 9999 inclusive. * 1 or 2 digits represents a year not more than 50 years from * this year. * 3 or 4 digits represents the year number unambiguously. * mo = name of month variable stored in iset. * The variable contains an integer between 1 and 12 inclusive. * dy = name of day variable stored in iset. * The variable contains an integer between 1 and 31. * * Notes: * 1. The above limits on the numbers in yr, mo and dy apply for valid * dates. The numbers for invalid dates may not comply with these limits. * 2. You may have to create the 3 variables yr, mo and dy in the data * set you want to check. To maintain generality, dtchkf makes no assumptions * about the data set except that it contains variables representing year, * month and day as described above. * 3. dtchkf executes a data step. * 4. dtchkf writes nothing in the log when it encounters a valid date. * 5. a date 50 years from this year is an ambiguous case for a 2-digit year. * 6. if Dv or yr or mo or dy is . (missing), then the date is invalid. * 7. One of my classmates (I regret I can't remember whether it was * Tom Lambert or Mark Gschwind) produced a list of observations that * contained bad dates, and I thought that was a good idea. * 8. Needs work. What if input data set contains a var named "good"? * I regret I'm out of time. * * */ %global greturn ; %global greturn1 ; %mkvarlist2(&iset); %let invars=&greturn ; /* the variables we want to keep;*/ /* is there a syntax error near this line?*/ data &oset (keep=&invars) &exset (keep=&invars) ; set &iset ; /* I opted not to use the array method after testing it against the * if-statement method. I found no significant (or detectable) * performance difference while checking as many as 1 million * dates. This result corresponds with a similar test I conducted * with FORTRAN about 25 years ago. The array method is a trick * and not obvious to the naive, whereas the if-statement method * seems straightforward and intuitive to most people. These are * the reasons I chose the if-statement method. /* * number of days in each month of non-leap year ; * days per month ; a1=31; a2=28 ; a3=31 ; a4=30 ; a5=31 ; a6=30 ; a7=31 ; a8=31 ; a9=30 ; a10=31 ; a11=30 ; a12=31 ; array dpm(12) a1-a12; */ good=1; * starting assumption: date is good ; if (&dy ne int(&dy)) then good=0; * yr not an integer? ; else if (&mo ne int(&mo)) then good=0; * yr not an integer? ; else if (&yr ne int(&yr)) then good=0; * yr not an integer? ; else if (missing(&dy) or missing(&mo) or missing(&yr) or missing(&Dv) ) then good=0; * missing value? ; else do; * all integers ? ; if (&yr<100) then do; * 2 digit year? ; tyyyy = year(today()); * this year ; tcc = int(tyyyy/100) ; * this century ; tcc&yr = tcc*100 + &yr ; * this year if yr is in this century ; if (tcc&yr - tyyyy > 50) then tcc=tcc-1 ; * yr must be prev century ; if (tyyyy - tcc&yr >=50) then tcc=tcc+1 ; * yr must be next century ; &yr = tcc*100 + &yr ; * we will work with this year number ; end ; * 2 digit year ; if (&yr<1 or &yr>9999) then good=0; * yr out of range?; else if (&mo<1 or &mo>12) then good=0; * mo out of range?; else if (&dy<1 or &dy>31) then good=0; * dy out of range?; else if (&mo = 2) then do; * February?? ; * discover whether leap year ; leap=0 ; * assume not a leap year; if (mod(&yr,400)=0) then leap=1; * yr divides by 400? ; else if (mod(&yr,100)=0) then leap = 0 ; * century yr? ; else if (mod(&yr,4)=0) then leap=1 ; * yr divides by 4? ; else leap = 0 ; * regular year ; if (leap and &dy>29) then good=0; * >29 days in leap yr? ; else if ((not leap) and &dy>28) then good=0; * >28 days in regular yr? ; end; * February ; else do ; * Not February? ; * too many days? ; * if (&dy>dpm(&mo)) then good=0; * too many days? ; if (&dy>30 and (&mo=9 or &mo=4 or &mo=6 or &mo=11)) then good=0; end; * Not February ; end; * all integers ; if (not good) then do; /* put or %put results in fatal error for this macro */ put "ERROR BAD DATE " _N_= &Dv.= &yr.= &mo= &dy= ; *&iset.= ; * err msg to log file ; output &exset ; * write exception set ; &Dv = . ; * replace Dv with missing ; end; * end if not good then do ; output &oset ; * write output set ; %mend dtchkf ; * --------------------------------------------------------- ;