/* dtchkf.sas Daniel Brockman 070728 Check dates */
%macro dtchkf (iset,oset,exset,Dv,yr,mo,dy) ;
/*
* dtchkf
* 1. prints an error message in the log for each invalid
* date detected in iset.
* 2. replaces the value of Dv with . (missing) in oset.
* 3. writes copy of bad obs from iset to data set exset.
*
* iset = name of data set to consider.
* oset = name of modified data set.
* exset = name of exception data set containing bad observations.
* Dv = The date variable in iset. dtchkf doesn't attempt to
* interpret the format of Dv or read its value. However,
* dtchkf will mark it missing if vars yr, mo, dy imply
* an invalid date.
* yr = name of year variable stored in iset.
* The variable contains an integer between 1 and 9999 inclusive.
* 1 or 2 digits represents a year not more than 50 years from
* this year.
* 3 or 4 digits represents the year number unambiguously.
* mo = name of month variable stored in iset.
* The variable contains an integer between 1 and 12 inclusive.
* dy = name of day variable stored in iset.
* The variable contains an integer between 1 and 31.
*
* Notes:
* 1. The above limits on the numbers in yr, mo and dy apply for valid
* dates. The numbers for invalid dates may not comply with these limits.
* 2. You may have to create the 3 variables yr, mo and dy in the data
* set you want to check. To maintain generality, dtchkf makes no assumptions
* about the data set except that it contains variables representing year,
* month and day as described above.
* 3. dtchkf executes a data step.
* 4. dtchkf writes nothing in the log when it encounters a valid date.
* 5. a date 50 years from this year is an ambiguous case for a 2-digit year.
* 6. if Dv or yr or mo or dy is . (missing), then the date is invalid.
* 7. One of my classmates (I regret I can't remember whether it was
* Tom Lambert or Mark Gschwind) produced a list of observations that
* contained bad dates, and I thought that was a good idea.
* 8. Needs work. What if input data set contains a var named "good"?
* I regret I'm out of time.
*
*
*/
%global greturn ;
%global greturn1 ;
%mkvarlist2(&iset);
%let invars=&greturn ; /* the variables we want to keep;*/
/* is there a syntax error near this line?*/
data &oset (keep=&invars) &exset (keep=&invars) ;
set &iset ;
/* I opted not to use the array method after testing it against the
* if-statement method. I found no significant (or detectable)
* performance difference while checking as many as 1 million
* dates. This result corresponds with a similar test I conducted
* with FORTRAN about 25 years ago. The array method is a trick
* and not obvious to the naive, whereas the if-statement method
* seems straightforward and intuitive to most people. These are
* the reasons I chose the if-statement method.
/*
* number of days in each month of non-leap year ;
* days per month ;
a1=31;
a2=28 ;
a3=31 ;
a4=30 ;
a5=31 ;
a6=30 ;
a7=31 ;
a8=31 ;
a9=30 ;
a10=31 ;
a11=30 ;
a12=31 ;
array dpm(12) a1-a12;
*/
good=1; * starting assumption: date is good ;
if (&dy ne int(&dy)) then good=0; * yr not an integer? ;
else if (&mo ne int(&mo)) then good=0; * yr not an integer? ;
else if (&yr ne int(&yr)) then good=0; * yr not an integer? ;
else if (missing(&dy) or
missing(&mo) or
missing(&yr) or
missing(&Dv) ) then good=0; * missing value? ;
else do; * all integers ? ;
if (&yr<100) then do; * 2 digit year? ;
tyyyy = year(today()); * this year ;
tcc = int(tyyyy/100) ; * this century ;
tcc&yr = tcc*100 + &yr ; * this year if yr is in this century ;
if (tcc&yr - tyyyy > 50) then tcc=tcc-1 ; * yr must be prev century ;
if (tyyyy - tcc&yr >=50) then tcc=tcc+1 ; * yr must be next century ;
&yr = tcc*100 + &yr ; * we will work with this year number ;
end ; * 2 digit year ;
if (&yr<1 or &yr>9999) then good=0; * yr out of range?;
else if (&mo<1 or &mo>12) then good=0; * mo out of range?;
else if (&dy<1 or &dy>31) then good=0; * dy out of range?;
else if (&mo = 2) then do; * February?? ;
* discover whether leap year ;
leap=0 ; * assume not a leap year;
if (mod(&yr,400)=0) then leap=1; * yr divides by 400? ;
else if (mod(&yr,100)=0) then leap = 0 ; * century yr? ;
else if (mod(&yr,4)=0) then leap=1 ; * yr divides by 4? ;
else leap = 0 ; * regular year ;
if (leap and &dy>29) then good=0; * >29 days in leap yr? ;
else if ((not leap) and &dy>28) then good=0; * >28 days in regular yr? ;
end; * February ;
else do ; * Not February? ;
* too many days? ;
* if (&dy>dpm(&mo)) then good=0;
* too many days? ;
if (&dy>30 and (&mo=9 or &mo=4 or &mo=6 or &mo=11)) then good=0;
end; * Not February ;
end; * all integers ;
if (not good) then do;
/* put or %put results in fatal error for this macro */
put "ERROR BAD DATE " _N_= &Dv.= &yr.= &mo= &dy= ;
*&iset.= ; * err msg to log file ;
output &exset ; * write exception set ;
&Dv = . ; * replace Dv with missing ;
end; * end if not good then do ;
output &oset ; * write output set ;
%mend dtchkf ;
* --------------------------------------------------------- ;