/* corrwndo.sas Daniel Brockman 070730 Calculate corr matrix for time windows.*/
* tested 070805;
%macro corrwndo(iset,Dv,vlist,DorN,start,length,interval,filespec);
/*
*
* iset Input data set. corrwndo presumes iset is the output
* of macro annvol(). If you use another source, then
* you proceed at your own risk.
* Dv The date variable that indexes the time series.
* Contains a SAS Datevalue.
* vlist List of variables of interest. corrwndo will compute
* correlation matrices on these variables.
* DorN For calculation of time windows over which to compute
* correlations, you may choose to start the first time
* window on a particular date, or with a particular
* observation number.
* ="D" means that start is a SAS Datevalue which is the
* date of the first observation in the first time window.
* ="N" means that start is the number of the
* observation in iset which becomes the first observation
* in the first time window.
* =other means you have erred.
* start See DorN.
* length Integer length of time window. If DorN=D, then length is the
* number of calendar days. If DorN=N, then length is
* the number of consecutive observations.
* interval The integer number of observations from the last obs of the
* previous time window to the 1st obs of the next time
* window. If DorN=D, then the 1st obs of the next time
* window is the (interval)th calendar day after the
* last obs of the previous time window. If DorN=N, then
* the first obs of the next time window is the
* (interval)th obs following the last obs of the previous
* time window.
* filespec A file descriptor for the output text file.
* For output to the screen, filespec="screen". Examples:
* %corrwndo(dsin,date,list,D,st,ln,iv,"screen") ;
* %corrwndo(dsin,date,list,D,st,ln,iv,"C:\rpt\out-BP-1.txt") ;
*
* WARNING: if DorN=D and start isn't a datevalue, then corrwndo can enter
* infinite or extremely long loop.
*
* corrwndo loops through the observations in iset according to parameters
* DorN, start, length, and interval, selecting a dataset of observations.
* With the selected dataset, corrwndo computes the correlation matrix
* for the variables in vlist.
* corrwndo then prints the lower diagonal of the correlation matrix in
* the text file.
* corrwndo may chart the correlations of some of the variables.
*
*/
%global greturn ;
%global greturn1 ;
%if ((&filespec ne "screen") and (&filespec ne screen) ) %then %do ;
proc printto print=&filespec new ; run; * direct output to overwrite file ;
%end ;
%else %do ;
proc printto ; run ; * direct output to screen ;
%end ;
options missing=' '; * print a blank for missing values in proc print;
/* this is some kind of relic of a brilliant idea I think
%let Nv=0 ; * count the vars in vlist ;
%do %while (%scan(&vlist,%eval(&Nv+1)) ne ) ;
%let Nv=%eval(&Nv+1); * incr count ;
%end ;
*/
%if (&DorN eq N) %then %do ;
%let Cal=N; * wndo2() parm ;
/* We must give wndo2() the Date of the 1st obs for the new time window.
* wndo2() seems clunky as it was designed with other assumptions in
* mind. So we work around its deficiencies. Otherwise we would have
* to rewrite it.
*/
data _null_ ;
set &iset (firstobs=&start obs=&start) ; * we only want 1 obs ;
call symput('start2',&Dv) ; * pick up the date ;
run;
%let DorN=D ; * switch to date method ;
%let Cal=Y ; * wndo2() parm ;
%let start=&start2;
%end ; * end if DorN ;
/* We also need to know what the last date is, so that we stop when
* we run out of data.
*/
%let dsid=%sysfunc(open(&iset));
%let nobs=%sysfunc(attrn(&dsid,nobs)) ; * get number of obs in iset ;
%let rc=%sysfunc(close(&dsid));
data _null_ ;
set &iset (firstobs=&nobs) ; * we only want 1 obs ;
call symput('LastDate',&Dv) ; * pick up the date ;
run;
%getdsname(corrwndo1);
%let t1=&greturn; * tempo dataset ;
%getdsname(corrwndo2);
%let t2=&greturn; * tempo dataset ;
%getdsname(corrwndo3);
%let t3=&greturn; * tempo dataset ;
%getdsname(corrwndo4);
%let t3=&greturn; * tempo dataset ;
/* Loop the time windows */
%let FDV=&start ; * 1st date of 1st time window ;
%put corrwndo L109 FDV:&FDV start:&start length:&length LastDate:&LastDate nobs:&nobs;
%do %while(%eval(&FDV+&length) le &LastDate ) ;
%let LDV=%eval(&FDV+&length-1) ; * last date of time window ;
%put prewndo2 test ;
%wndo2(&iset,&t1,&Dv,&Cal,&FDV,&length) ; * put time window of data into t1;
%put postwndo2 iset:&iset t1:&t1 test ;
proc corr data=&t1 noprint out=&t2 ; * calculate correlations ;
var &vlist ;
run;
/*
* 070803: instead of all this effort to construct time windows
* and calculate each one of them, it might work well
* to create a BY-variable in the data set which would contain
* a number for each time window and then
* use the BY statement in proc corr to perform the calculation.
* However, I'm out of time in this episode.
* 070804: I came back to the idea and gave the BY-var approach
* a closer look (see corrwndo2()). The BY-variable approach
* gave a profoundly simpler construction of the windows,
* keeping all in one dataset and enabling one pass through
* that one dataset to compute and print the correlations.
* However, we lose the metainfo identifying and distinguishing the
* earlier time windows from the later windows, unless we
* create additional dataset variables to associate with the
* contents of the proc corr output data set. Maintaining the
* metainfo introduced complexities I chose not to pursue at
* this time. So I return to corrwndo().
*/
/*
* we trim the vars and set the formats for the correlation triangle
*/
data &t3 (keep=_NAME_ &vlist); * keep the ones we care about ;
set &t2 (firstobs=4); * obs1=mean, obs2=std, obs3=N ;
format _NUMERIC_ 5.2 ; * show zeroes after the decimal point ;
run;
/* ==========================
* Turned out this stuff wasn't necessary and resulted from ignorance (mine).
* These next few lines serve to identify the numeric variables in
* the order of appearance in the dataset. I'm not sure whether
* var _NUMERIC_ already does this.
* I plagiarized from http://support.sas.com/91doc/docMainpage.jsp
* We reduce vvlist to the numeric variables it contains.
%put corrwndo L148 vlist:&vlist ; * log info only;
%let dsid=%sysfunc(open(&t3));
%let vvlist=;
%let vvn=0;
%do nvi=1 %to %sysfunc(attrn(&dsid,nvars));
%if (%sysfunc(vartype(&dsid,&nvi)) = N) %then %do ;
%let vvlist=&vvlist %sysfunc(varname(&dsid,&nvi));
%let vvn=%eval(&vvn+1);
%end;
%end;
%let rc=%sysfunc(close(&dsid));
%put corrwndo L152 vvlist:&vvlist ; * log info only;
========================== */
%mkvarlist2(&t3) ;
%let vvlist=&greturn2 ; /* names of variables ;*/
%let vvn=&greturn3; /* number of numeric variables ;*/
/*
* We want to get a proc to do the work of printing the
* lower triangle and of lining up the numbers and column titles.
* Looks like proc print will print missing values as blanks
* if we specify the missing system option.
* So, we shall set the variables to missing values for the
* upper triangle and use proc report.
* We leave the main diagonal to prove to ourselves that we
* got it right.
*/
%findavar(&t3,ctr) ; * name counter variable ;
%let counter = &greturn;
%findavar(&t3,vctr) ; * name counter variable ;
%let vctr = &greturn;
data &t4 (drop=&counter &vctr);
set &t3 ;
retain &counter 1 ; * initialize a counter ;
array vv[&vvn] &vvlist ; * array of numeric variables ;
&counter=&counter+1;
do &vctr=&counter to &vvn ;
vv[&vctr]=. ; * set upper triangle to missing ;
end;
run;
data _null_;
* format the title dates ;
%let FDVy=%sysfunc(year(&FDV));
%let FDVm=%sysfunc(month(&FDV));
%let FDVd=%sysfunc(day(&FDV));
%let LDVy=%sysfunc(year(&LDV));
%let LDVm=%sysfunc(month(&LDV));
%let LDVd=%sysfunc(day(&LDV));
title3 "Correlation triangle for time window" ;
title4 "&FDVy.-&FDVm.-&FDVd. to &LDVy.-&LDVm.-&LDVd." ;
proc print data=&t4 noobs heading=v ;
run;
title3 ;
title4;
%let FDV=%eval(&LDV+&interval) ; * the next FDV ;
%end ; * end do while FDV+length ;
proc printto; run; * reset to default output destinations ;
options missing='.'; * reset to default ;
%delds(&t1); * remove tempo files ;
%delds(&t2); * remove tempo files ;
%delds(&t3); * remove tempo files ;
%delds(&t4); * remove tempo files ;
%mend corrwndo;