* Christine Iodice 061211 ; * Course project team, The Gang of Four: Prasad Satich, Christine Iodice, Saranne Warner, Daniel Brockman ; * Course X446 SAS Data Analysis. Jian-min Liu, Instructor. ; * 061211 db -- minor adjustments, renaming of models. options ls=80 nocenter mlogic mprint symbolgen; * libname source 'C:\Documents and Settings\Christine\Desktop\SAS_Project'; * parameters ----------------------------------- ; libname source 'c:\b\SAS-X446\prj' ; * location of data; %let orig=air_jianmin_23Nov06 ; * orig data file ; %let clean=air_clean_pct ; * orig data excl extreme values (%); %let extremes=air_ex_pct ; * excluded values data file (%); ********************************************; * select data set to use for inputs ; * ; %let inuse=&orig; * ; ********************************************; * end parameters ------------------------------- ; title; title2; /* Create temporary dataset w/ labels*/ Data source.air1 (Drop=AVS CSM); set source.&inuse; */below are potential outliers to delete.; *if CPM = 4.737 then delete; *identified as outlier through plots; *if ALF = .287 then delete; *identified as outlier through plots; *if CPM = 2.341 then delete; *identified as outlier through stars method; *if CPM = 3.306 then delete; *identified as outlier through stars method; * not creating bucket variable for plane size; if 0.0 <= SPA <= 0.1 then psize=1; if 0.1 < SPA <= 0.2 then psize=2; if 0.2 < SPA <= 0.3 then psize=3; if 0.3 < SPA <= 0.4 then psize=4; *now creating bucket variables for ASL; if 0 <= ASL <= 500 then pdistance = 1; if 500 < ASL <= 1000 then pdistance = 2; if 1000 < ASL <= 1500 then pdistance = 3; if 1500 < ASL <= 2000 then pdistance = 4; if 2000 < ASL <= 2500 then pdistance = 5; if 2500 < ASL <= 3000 then pdistance = 6; if 3000 < ASL <= 3500 then pdistance = 7; if 3500 < ASL <= 4000 then pdistance = 8; Seats = SPA*1000; Avgfilled = ALF*seats; *create variable for filled seats; AvgEmpty = (1-ALF)*seats; *create variable for empty seats; Fill_to_Empty = Avgfilled / AvgEmpty; LCPM = Log(CPM); LUTL = Log(UTL); LASL = Log(ASL); LSPA = Log(SPA); LALF = log(ALF); LFilled = log(avgfilled); Lempty = log (avgempty); Lpsize = log(psize); Lpdistance = log(pdistance); SUTL = UTL**2; SASL = ASL**2; SSPA = SPA**2; SALF = ALF**2; Sfilled = Avgfilled**2; Sempty = Avgempty**2; label CPM = 'CPM: Cost per Passenger Mile (cents)' UTL = 'UTL: Avg Hrs per Day in Use' ASL = 'ASL: Avg Length of nonstop legs (1000 miles)' SPA = 'SPA:Avg Nbr of Seats per Aircraft (per 1000 seats)' Seats = 'Seats:Actual Seats on aircraft' ALF = 'ALF: Avg Load Factor (% occupied)' Type = 'Type: Range (short or long)' AvgEmpty = 'Average Nbr of Empty Seats' Avgfilled = 'Average Nbr of Filled Seats' Fill_to_Empty = 'Ratio filled to Empty' Psize = 'Plane Size: 1 is lowest'; run; Proc print data=source.air1; run; Data source.AirShort source.airLong; set source.air1; if type = 0 then output source.airshort; If type = 1 then output source.airlong; run; Proc print data=source.airshort; run; Proc print data=source.airlong; run; /*keeping ASL in, plays a role in it*/ /* Below is Choice 1 or 2: (we get more with LUTL but it could be UTL) */ Proc Reg data=source.air1; Air1A: Model CPM = ASL LUTL LFilled SPA / stb vif influence p r partial; Title 'Air1A: CPM = ASL LUTL Lfilled SPA'; Plot Student.*predicted. cookd.*obs.; Plot Npp.*Residual.; run; quit; /*Below is Choice 1 or 2 */ Proc Reg data=source.air1; Air1B: Model CPM = ASL LUTL LFilled Lempty / stb vif influence p r partial; Title 'Air1B: CPM = ASL LUTL Lfilled Lempty'; Plot Student.*predicted. cookd.*obs.; Plot Npp.*Residual.; run; quit; /*this is old model 5 -- but changed to SLE = .05, ASL drops out --*/ Proc reg data=source.air1; m5xASL: model CPM = UTL ASL SPA ALF Type / Selection = stepwise SLE=.15 SLS = .05 p r cli clm stb vif collinoint; Title " m5xASL: Stepwise Regression of original variables"; run; quit; * ------------------------------------------------------------------------------------------ ; * Now breaking it up by Type, with the idea that there is a different function driving ; * short range and long range planes. It's OK for short range planes to fly w/ low capacity ; * and then make it up by doing many flights per day. But, Long Range planes can't fly w/ low ; * capacity, b/c they are not making many flights per day. In this case, more miles makes it way ; * way more costly. It's less costly for short range flights to fly w/ low capacity and it might ; * even be better as they will do more flights *------------------------------------------------------------------------------------------------; /* By breaking it by Type - ASL drops out b/c there is not alot of variation*/ /* doesn't matter if we have Log UTL, doesn't make much of a difference */ Proc Reg data=source.airshort; Short1: Model CPM = ASL LUTL LFilled Lempty / stb vif influence p r partial; Title 'Short1: CPM = ASL LUTL LFilled Lempty'; Plot Student.*predicted. cookd.*obs.; Plot Npp.*Residual.; run; quit; /* according to above we drop ASL */ /* this is the best for airshort */ Proc reg data = source.airshort; Short2: Model CPM = Lfilled Lempty UTL / stb vif influence p r; Title 'Short2: CPM = Lfilled Lempty UTL'; run; quit; /*lpsize is not significant*/ Proc reg data = source.airshort; Short3: Model CPM = Lfilled Lpsize UTL / stb vif influence p r; Title 'Short3: CPM = Lfilled Lpsize UTL'; run; quit; /*Now looking at airlong */ /*UTL is no longer significant */ Proc reg data = source.airlong; Long1: Model CPM = Lfilled lempty UTL/ stb vif influence p r; Title 'Long1: CPM = lfilled lpsize'; run; quit; Proc reg data = source.airlong; Long2: Model CPM = Lfilled lempty/ stb vif influence p r; Title 'Long2: CPM = lfilled lempty'; run; quit; /*Below is the best for airlong */ Proc Reg data=source.airlong; Long3: Model CPM =Lfilled lpsize/ stb vif influence; Title 'Long3:CPM = Lfilled lpsize'; run; Quit; /*stepwise that was used: Plugged in different data source*/ Proc Reg data=source.air1; Bigstep: MOdel CPM = UTL ASL SPA ALF TYPE Avgfilled avgempty Psize pdistance LUTL LASL LSPA LALF Lfilled Lempty Lpsize lpdistance SUTL SASL SSPA SALF Sfilled Sempty / Selection=stepwise SLE = .15 SLS =.1 p r stb vif influence; title 'BIG stepwise'; run; Quit;