* ea-07 Daniel Brockman 061211 Regression Models -- Air; * Course project team: Christine Iodice, Prasad Satich, Saranne Warner, Daniel Brockman ; * Course X446 SAS Data Analysis. Jian-min Liu, Instructor. ; Title 'Comparative collection of models'; /* Instructions: First, run d-04.sas, at least once, * Then run ea-07.sas . * Go immediately to the last page of ea-07.sas output * to see the ranking of the models. */ Title2 'Program ea-07.sas'; options ls=80; * parameters ----------------------------------- ; * libname source 'C:\Documents and Settings\Christine\Desktop\SAS_Project'; libname source 'c:\b\SAS-X446\prj' ; * location of data; %let orig=air_jianmin_23Nov06 ; * orig data file ; %let cleanpct=air_clean_pct ; * orig data excluding extreme values ; %let cleanstr=air_clean_str ; * %let extremes=air_ex_pct ; * excluded values data file ; %let extremes=air_ex_str ; ********************************************; * select input data set to use for models ; * ; %let inuse=&orig; * ; ********************************************; * end parameters ------------------------------- ; title; /* Title1 "Original Data" ; proc print data=source.&orig ; run; Title1 "Excluding extreme observations"; proc print data=source.&clean ; run; */ Title1 "Input Data Used" ; proc print data=source.&inuse ; run; * cook up some variable transformations; data air2; set source.&inuse ; Seats = SPA*1000; AvgFilled = ALF*seats; *create variable for filled seats; AvgEmpty = (1-ALF)*seats; *create variable for empty seats; IAVGF = 1/AvgFilled ; ISPA = 1/SPA ; IALF = 1/ALF ; IASL = 1/ASL ; * BUCKETIZATION ; * now creating bucket variable for plane size; if 0.0 <= SPA <= 0.1 then psize=1; if 0.1 < SPA <= 0.2 then psize=2; if 0.2 < SPA <= 0.3 then psize=3; if 0.3 < SPA <= 0.4 then psize=4; *now creating bucket variables for ASL; if 0 <= ASL <= 500 then pdistance = 1; if 500 < ASL <= 1000 then pdistance = 2; if 1000 < ASL <= 1500 then pdistance = 3; if 1500 < ASL <= 2000 then pdistance = 4; if 2000 < ASL <= 2500 then pdistance = 5; if 2500 < ASL <= 3000 then pdistance = 6; if 3000 < ASL <= 3500 then pdistance = 7; if 3500 < ASL <= 4000 then pdistance = 8; Fill_to_Empty = Avgfilled / AvgEmpty; LCPM = Log(CPM); LUTL = Log(UTL); LASL = Log(ASL); LSPA = Log(SPA); LALF = log(ALF); LFilled = log(avgfilled); Lempty = log (avgempty); Lpsize = log(psize); Lpdistance = log(pdistance); SUTL = UTL**2; SASL = ASL**2; SSPA = SPA**2; SALF = ALF**2; Sfilled = Avgfilled**2; Sempty = Avgempty**2; PM = SPA * ALF * ASL ; /* Passenger-miles */ IPM = (1/PM) ; /* polynomial fitting */ UTL2 = UTL**2; UTL3 = UTL**3; UTL4 = UTL**4; UTL5 = UTL**5; ASL2 = ASL**2; ASL3 = ASL**3; ASL4 = ASL**4; ASL5 = ASL**5; SPA2 = SPA**2; SPA3 = SPA**3; SPA4 = SPA**4; SPA5 = SPA**5; ALF2 = ALF**2; ALF3 = ALF**3; ALF4 = ALF**4; ALF5 = ALF**5; run; Title1 'Regression models'; proc reg data=air2 outest=airregest; im2: model CPM = IPM / stb vif adjrsq; polystep: model CPM = UTL ASL SPA ALF UTL2 ASL2 SPA2 ALF2 UTL3 ASL3 SPA3 ALF3 UTL4 ASL4 SPA4 ALF4 UTL5 ASL5 SPA5 ALF5 / selection = stepwise stb vif adjrsq; Air1A: Model CPM = ASL LUTL LFilled SPA / stb vif adjrsq; Air1AxUTL: Model CPM = ASL LFilled SPA / stb vif adjrsq; Air1B: Model CPM = ASL LUTL LFilled Lempty / stb vif adjrsq; Air1BxUTL: Model CPM = ASL LFilled Lempty / stb vif adjrsq; m5xASL: model CPM = UTL SPA ALF / stb vif adjrsq; * Short1 == Air1B; * Short1: Model CPM = ASL LUTL LFilled Lempty / stb vif adjrsq; Short2: Model CPM = Lfilled Lempty UTL / stb vif adjrsq; Short3: Model CPM = Lfilled Lpsize UTL / stb vif adjrsq; Long1: Model CPM = Lfilled Lempty UTL / stb vif adjrsq; Long2: Model CPM = Lfilled Lempty / stb vif adjrsq; Long3: Model CPM =Lfilled lpsize / stb vif adjrsq; Bigstep: Model CPM = UTL ASL SPA ALF TYPE Avgfilled avgempty Psize pdistance LUTL LASL LSPA LALF Lfilled Lempty Lpsize Lpdistance SUTL SASL SSPA SALF Sfilled Sempty / Selection=stepwise SLE = .15 SLS =.1 stb vif adjrsq /*p r stb vif influence*/; model1: model CPM = AvgFilled /stb vif adjrsq ; model2: Model CPM = AvgFilled UTL ASL /stb vif adjrsq ; model3: Model CPM = AvgFilled Type /stb vif adjrsq ; model4: Model CPM = UTL ASL /stb vif adjrsq ; model5: model CPM = UTL ASL SPA ALF /stb vif adjrsq ; im1: model CPM=ISPA IALF / stb vif adjrsq ; cpmistep: model CPM=IAVGF ISPA IASL IALF /selection=stepwise stb vif adjrsq ; cpmviavgf: model CPM=IAVGF /stb vif adjrsq ; cpmvispa: model CPM=ISPA /stb vif adjrsq ; cpmviasl: model CPM=IASL /stb vif adjrsq ; cpmvialf: model CPM=IALF /stb vif adjrsq ; cpmvutl: model CPM=UTL /stb vif adjrsq ; cpmvasl: model CPM=ASL /stb vif adjrsq ; cpmvspa: model CPM=SPA /stb vif adjrsq ; cpmvavs: model CPM=AVS /stb vif adjrsq ; cpmvalf: model CPM=ALF /stb vif adjrsq ; cpmvcsm: model CPM=CSM /stb vif adjrsq ; cpmvtype: model CPM=TYPE /stb vif adjrsq ; mx1: model CPM=UTL ASL SPA AVS ALF CSM TYPE / selection = stepwise stb vif adjrsq; mx2: model CPM=UTL ASL SPA AVS ALF CSM / selection = stepwise stb vif adjrsq; mx3: model CPM=UTL ASL SPA AVS ALF / selection = stepwise stb vif adjrsq; mx4: model CPM=UTL SPA ALF TYPE / selection = stepwise stb vif adjrsq; mx6: model CPM=SPA ALF / stb vif adjrsq; mx7: model CPM=SPA ALF TYPE / stb vif adjrsq; mx8: model CPM=ASL SPA ALF / stb vif adjrsq; verf10: model LCPM=LASL / stb vif adjrsq; verf11: model CPM=LASL / stb vif adjrsq; verf12: model AVS=AvgFilled / stb vif adjrsq; ntxf11: model CPM=SPA ALF LASL AvgFilled / selection = stepwise stb vif adjrsq; ntxf12: model CPM=AvgFilled / stb vif adjrsq; run; * print comparison.; data airsorta; set airregest; absadjrsq = abs(_ADJRSQ_) ; run; proc sort data=airsorta; by descending absadjrsq ; run; proc print data=airsorta; Title 'Comparing models by R-Squared'; var _MODEL_ _RSQ_ _ADJRSQ_ ; run; quit; *----------------------------------------;