/* rhisto Daniel Brockman 111116 Histogram */ %macro rhisto(key=0,type=Resi,subtype=rFICO,firstbin=350,lastbin=850,increment=50,value=., code=0) ; /* key = action to perform /* = 0 = accumulate count of values into the histogram /* = 1 = list of histo variables separated by spaces /* = 2 = list of histo variables separated by commas /* = 3 = initialize histo variables /* Note: if key=1 or key=2, then the invocation of %rhisto() creates /* a list of datastep variables without a closing semicolon. /* /* type = code allowing exceptions /* = HNC = HELOC and Other Consumer /* = Resi = Residential /* /* subtype = common element of histogram variable names and allowing exceptions /* = rFICO = updated fico score /* = rLTV = updated Loan-to-Value /* /* firstbin = minimum value in lowest-valued bin. The minimum value defines the bin. /* /* lastbin = minimum value in highest-valued bin /* /* increment = range of value between bins. Required: &increment > 0. /* /* value = value to count in the bin (independent var to count) /* /* code = allows exceptions and variable processing (not in use at 111116). /* /* rhisto counts the values falling in each of the bins. /* rhisto is intended for insertion in a data step. /* /* Motivation: After reviewing and experimenting with proc univariate and proc rank, /* we implemented this as a simple solution targeting our specific goals. /* /* rhisto creates, initializes and counts into a series of numeric bin vars /* named BIN_&subtype._nnn where nnn is a value determined by &firstbin, /* &increment and &lastbin, plus BIN_&subtype._less for values less than &firstbin., /* BIN_&subtype._more for values greater than or equal to %eval(&increment.+&lastbin.) /* and BIN_&subtype._other (for missing values and other irregularities). /* &firstbin., &lastbin., and &increment. must be integers. /* rhisto also creates a char var named &subtype._histogram and assigns to it the value /* "&subtype._histogram". /* /* rhisto creates an intermediate result numeric variable named rhisto_value. ; /* /* This file includes %rhistotest() for testing and for a usage example. /* /* Bug at 111121: firstbin, increment, lastbin must be nonnegative integers. /* /* */ %local iii ; /* key = 0 = accumulate count of values into the histogram */ %if (&key = 0 ) %then %do ; rhisto_value=&value ; /* limits implied by subtype */ %if (&subtype eq rLTV ) %then %do ; if ( &value ne . and ( &value le 0 or &value gt 750 ) ) then rhisto_value=. ; %end; %if (&subtype eq rFICO ) %then %do ; if ( &value ne . and ( &value lt 300 or &value gt 850 ) ) then rhisto_value=. ; %end; %let iii = &firstbin. ; /* <-- loop iterator */ if (missing(&value.) or missing(rhisto_value)) then BIN_&subtype._other = BIN_&subtype._other + 1 ; else if (&value.<&firstbin.) then BIN_&subtype._less = BIN_&subtype._less + 1 ; else if (&value.>=%eval(&lastbin.+&increment.)) then BIN_&subtype._more = BIN_&subtype._more + 1 ; %do %while (&iii. <= &lastbin.) ; else if ( &iii. <= &value. < (&iii.+&increment.) ) then BIN_&subtype._&iii. = BIN_&subtype._&iii. + 1 ; %let iii=%eval(&iii.+&increment) ; %end ; /* while iii */ %end ; /* key = 1 = list of histo variables separated by spaces */ %if (&key = 1 ) % then %do ; &subtype._histogram %if ( (&subtype ne rLTV) ) %then %do ; BIN_&subtype._other %end; BIN_&subtype._less %let iii=&firstbin. ; %do %while ( &iii. le &lastbin. ) ; BIN_&subtype._&iii. %let iii=%eval(&iii.+&increment.) ; %end ; BIN_&subtype._more %if ( (&subtype eq rLTV) ) %then %do ; BIN_&subtype._other %end; %end; /* key = 2 = list of histo variables separated by commas */ %if (&key = 2 ) % then %do ; &subtype._histogram %if ( (&subtype ne rLTV) ) %then %do ; , BIN_&subtype._other %end; , BIN_&subtype._less %let iii=&firstbin. ; %do %while (&iii. le &lastbin.) ; , BIN_&subtype._&iii. %let iii=%eval(&iii.+&increment.) ; %end ; , BIN_&subtype._more %if ( (&subtype eq rLTV) ) %then %do ; , BIN_&subtype._other %end; %end; /* key = 3 = initialize histo variables */ %if (&key = 3 ) % then %do ; format &subtype._histogram $16. ; &subtype._histogram = "&subtype._histogram" ; BIN_&subtype._other = 0; BIN_&subtype._less = 0 ; %let iii=&firstbin. ; %do %while ( &iii.le &lastbin. ) ; BIN_&subtype._&iii. = 0 ; %let iii=%eval(&iii+&increment.) ; %end ; BIN_&subtype._more = 0 ; %end; %mend rhisto ; /* ------------------------------- testing ------------------------- */ %macro rhistotest(arg=unassigned) ; %if (&arg.=test) %then %do ; /* SQL8_SILHISTO.sas Daniel Brockman 20111116 Testing %rhisto() */ options ls=72 ps=50 /* mlogic symbolgen */ mprint ; %let devroot=U:\Wk ; /* development application root directory */ %let approot=G:\CAM_ADM_75100\Risk_Analysis; /* application root directory */ options mautosource sasautos=( "&devroot.\autosas" "&approot.\autosas" sasautos ) ; /* macro autocall libraries list */ %let myprogram = %pgmnam() ; /* get name of this program */ %let tsdir=G:\CAM_ADM_75100\Risk_Analysis\Analysis_Dataset\RLLF_data ; %odbclib(libref=s8rllf) /* Libname input data for this pgm */ %let yyyy=2011 ; %let mm=08; %let dsn=s8rllf.RLLF_SIL_Loss_Data ; title "dsn:&dsn pgm:%pgmnam()" ; title2 "for yyyy:&yyyy mm:&mm " ; /* --------------------------------- */ /* ---------------------------------------------- */ %s8lossconsole(&yyyy.&mm.,&dsn.,aleppo) ; %let fb=1 ; %let Lb=%eval(90000+&fb.) ; %let ib=10000 ; data buda (keep= %rhisto(key=1,type=HNC,subtype=wo,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.) %rhisto(key=1,type=HNC,subtype=recov,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.) ) ; retain %rhisto(key=1,type=HNC,subtype=wo,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.) %rhisto(key=1,type=HNC,subtype=recov,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.) ; set aleppo end=lastobs ; if (_N_=1) then do ; %rhisto(key=3,type=HNC,subtype=wo,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.) ; /* init */ %rhisto(key=3,type=HNC,subtype=recov,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.) ; /* init */ end; /* accumulate */ %rhisto(key=0,type=HNC,subtype=wo,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.,value=write_off) ; %rhisto(key=0,type=HNC,subtype=recov,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.,value=recovery_amt) ; if lastobs then output buda ; run ; /* data buda */ title3 "buda" ; proc sql ; select %rhisto(key=2,type=HNC,subtype=wo,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.) , %rhisto(key=2,type=HNC,subtype=recov,firstbin=&fb. ,lastbin=&Lb.,increment=&ib.) from buda ; run; proc print data=buda ; run ; goptions nodisplay ; title3 "proc univariate histo dsn:aleppo"; proc univariate data=aleppo noprint; histogram write_off / nendpoints=11 outhist=calais ; histogram recovery_amt / nendpoints=11 outhist=dresden ; run; goptions display ; title3 "proc univariate histo dsn:aleppo var:write_off"; proc print data=calais ; run ; title3 "proc univariate histo dsn:aleppo var:recovery_amt"; proc print data=dresden ; run ; title3 "proc rank histo dsn:aleppo var:write_off"; proc sort data=aleppo out=aleppos ; by write_off ; run ; proc rank data=aleppos out=alpha_rk groups=10 ties=low ; var write_off ; ranks wo_rk ; run; proc sql ; select count(*) as count , wo_rk as decile , min(write_off) as decile_min from alpha_rk group by wo_rk order by wo_rk ; run; title3 "proc rank histo dsn:aleppo var:recovery_amt"; proc sort data=aleppo out=aleppos ; by recovery_amt ; run ; proc rank data=aleppos out=alpha_rk groups=10 ties=low ; var recovery_amt ; ranks recov_rk ; run; proc sql ; select count(*) as count , recov_rk as decile , min(recovery_amt) as decile_min from alpha_rk group by recov_rk order by recov_rk ; run; title3 "aleppo selected observations " ; proc sql ; select month_id, loan_no_char, write_off, recovery_amt from aleppo where ( ( write_off ne 0 ) or ( recovery_amt ne 0 ) ) and ( rand('uniform') < 0.05 ) ; run; %end ; /* (arg=test) to conduct test */ %mend rhistotest ; /* 00 01 */ /* test %rhistotest() ; /* (arg=test) to conduct test */ /* */