/* corrwndo.sas Daniel Brockman 070730 Calculate corr matrix for time windows.*/ * tested 070805; %macro corrwndo(iset,Dv,vlist,DorN,start,length,interval,filespec); /* * * iset Input data set. corrwndo presumes iset is the output * of macro annvol(). If you use another source, then * you proceed at your own risk. * Dv The date variable that indexes the time series. * Contains a SAS Datevalue. * vlist List of variables of interest. corrwndo will compute * correlation matrices on these variables. * DorN For calculation of time windows over which to compute * correlations, you may choose to start the first time * window on a particular date, or with a particular * observation number. * ="D" means that start is a SAS Datevalue which is the * date of the first observation in the first time window. * ="N" means that start is the number of the * observation in iset which becomes the first observation * in the first time window. * =other means you have erred. * start See DorN. * length Integer length of time window. If DorN=D, then length is the * number of calendar days. If DorN=N, then length is * the number of consecutive observations. * interval The integer number of observations from the last obs of the * previous time window to the 1st obs of the next time * window. If DorN=D, then the 1st obs of the next time * window is the (interval)th calendar day after the * last obs of the previous time window. If DorN=N, then * the first obs of the next time window is the * (interval)th obs following the last obs of the previous * time window. * filespec A file descriptor for the output text file. * For output to the screen, filespec="screen". Examples: * %corrwndo(dsin,date,list,D,st,ln,iv,"screen") ; * %corrwndo(dsin,date,list,D,st,ln,iv,"C:\rpt\out-BP-1.txt") ; * * WARNING: if DorN=D and start isn't a datevalue, then corrwndo can enter * infinite or extremely long loop. * * corrwndo loops through the observations in iset according to parameters * DorN, start, length, and interval, selecting a dataset of observations. * With the selected dataset, corrwndo computes the correlation matrix * for the variables in vlist. * corrwndo then prints the lower diagonal of the correlation matrix in * the text file. * corrwndo may chart the correlations of some of the variables. * */ %global greturn ; %global greturn1 ; %if ((&filespec ne "screen") and (&filespec ne screen) ) %then %do ; proc printto print=&filespec new ; run; * direct output to overwrite file ; %end ; %else %do ; proc printto ; run ; * direct output to screen ; %end ; options missing=' '; * print a blank for missing values in proc print; /* this is some kind of relic of a brilliant idea I think %let Nv=0 ; * count the vars in vlist ; %do %while (%scan(&vlist,%eval(&Nv+1)) ne ) ; %let Nv=%eval(&Nv+1); * incr count ; %end ; */ %if (&DorN eq N) %then %do ; %let Cal=N; * wndo2() parm ; /* We must give wndo2() the Date of the 1st obs for the new time window. * wndo2() seems clunky as it was designed with other assumptions in * mind. So we work around its deficiencies. Otherwise we would have * to rewrite it. */ data _null_ ; set &iset (firstobs=&start obs=&start) ; * we only want 1 obs ; call symput('start2',&Dv) ; * pick up the date ; run; %let DorN=D ; * switch to date method ; %let Cal=Y ; * wndo2() parm ; %let start=&start2; %end ; * end if DorN ; /* We also need to know what the last date is, so that we stop when * we run out of data. */ %let dsid=%sysfunc(open(&iset)); %let nobs=%sysfunc(attrn(&dsid,nobs)) ; * get number of obs in iset ; %let rc=%sysfunc(close(&dsid)); data _null_ ; set &iset (firstobs=&nobs) ; * we only want 1 obs ; call symput('LastDate',&Dv) ; * pick up the date ; run; %getdsname(corrwndo1); %let t1=&greturn; * tempo dataset ; %getdsname(corrwndo2); %let t2=&greturn; * tempo dataset ; %getdsname(corrwndo3); %let t3=&greturn; * tempo dataset ; %getdsname(corrwndo4); %let t3=&greturn; * tempo dataset ; /* Loop the time windows */ %let FDV=&start ; * 1st date of 1st time window ; %put corrwndo L109 FDV:&FDV start:&start length:&length LastDate:&LastDate nobs:&nobs; %do %while(%eval(&FDV+&length) le &LastDate ) ; %let LDV=%eval(&FDV+&length-1) ; * last date of time window ; %put prewndo2 test ; %wndo2(&iset,&t1,&Dv,&Cal,&FDV,&length) ; * put time window of data into t1; %put postwndo2 iset:&iset t1:&t1 test ; proc corr data=&t1 noprint out=&t2 ; * calculate correlations ; var &vlist ; run; /* * 070803: instead of all this effort to construct time windows * and calculate each one of them, it might work well * to create a BY-variable in the data set which would contain * a number for each time window and then * use the BY statement in proc corr to perform the calculation. * However, I'm out of time in this episode. * 070804: I came back to the idea and gave the BY-var approach * a closer look (see corrwndo2()). The BY-variable approach * gave a profoundly simpler construction of the windows, * keeping all in one dataset and enabling one pass through * that one dataset to compute and print the correlations. * However, we lose the metainfo identifying and distinguishing the * earlier time windows from the later windows, unless we * create additional dataset variables to associate with the * contents of the proc corr output data set. Maintaining the * metainfo introduced complexities I chose not to pursue at * this time. So I return to corrwndo(). */ /* * we trim the vars and set the formats for the correlation triangle */ data &t3 (keep=_NAME_ &vlist); * keep the ones we care about ; set &t2 (firstobs=4); * obs1=mean, obs2=std, obs3=N ; format _NUMERIC_ 5.2 ; * show zeroes after the decimal point ; run; /* ========================== * Turned out this stuff wasn't necessary and resulted from ignorance (mine). * These next few lines serve to identify the numeric variables in * the order of appearance in the dataset. I'm not sure whether * var _NUMERIC_ already does this. * I plagiarized from http://support.sas.com/91doc/docMainpage.jsp * We reduce vvlist to the numeric variables it contains. %put corrwndo L148 vlist:&vlist ; * log info only; %let dsid=%sysfunc(open(&t3)); %let vvlist=; %let vvn=0; %do nvi=1 %to %sysfunc(attrn(&dsid,nvars)); %if (%sysfunc(vartype(&dsid,&nvi)) = N) %then %do ; %let vvlist=&vvlist %sysfunc(varname(&dsid,&nvi)); %let vvn=%eval(&vvn+1); %end; %end; %let rc=%sysfunc(close(&dsid)); %put corrwndo L152 vvlist:&vvlist ; * log info only; ========================== */ %mkvarlist2(&t3) ; %let vvlist=&greturn2 ; /* names of variables ;*/ %let vvn=&greturn3; /* number of numeric variables ;*/ /* * We want to get a proc to do the work of printing the * lower triangle and of lining up the numbers and column titles. * Looks like proc print will print missing values as blanks * if we specify the missing system option. * So, we shall set the variables to missing values for the * upper triangle and use proc report. * We leave the main diagonal to prove to ourselves that we * got it right. */ %findavar(&t3,ctr) ; * name counter variable ; %let counter = &greturn; %findavar(&t3,vctr) ; * name counter variable ; %let vctr = &greturn; data &t4 (drop=&counter &vctr); set &t3 ; retain &counter 1 ; * initialize a counter ; array vv[&vvn] &vvlist ; * array of numeric variables ; &counter=&counter+1; do &vctr=&counter to &vvn ; vv[&vctr]=. ; * set upper triangle to missing ; end; run; data _null_; * format the title dates ; %let FDVy=%sysfunc(year(&FDV)); %let FDVm=%sysfunc(month(&FDV)); %let FDVd=%sysfunc(day(&FDV)); %let LDVy=%sysfunc(year(&LDV)); %let LDVm=%sysfunc(month(&LDV)); %let LDVd=%sysfunc(day(&LDV)); title3 "Correlation triangle for time window" ; title4 "&FDVy.-&FDVm.-&FDVd. to &LDVy.-&LDVm.-&LDVd." ; proc print data=&t4 noobs heading=v ; run; title3 ; title4; %let FDV=%eval(&LDV+&interval) ; * the next FDV ; %end ; * end do while FDV+length ; proc printto; run; * reset to default output destinations ; options missing='.'; * reset to default ; %delds(&t1); * remove tempo files ; %delds(&t2); * remove tempo files ; %delds(&t3); * remove tempo files ; %delds(&t4); * remove tempo files ; %mend corrwndo;