* Christine Iodice 061211 Results of myTop3.sas ; * Course project team: Christine Iodice, Prasad Satich, Saranne Warner, Daniel Brockman ; * Course X446 SAS Data Analysis. Jian-min Liu, Instructor. * 061211 db: 1. Minor operational adjustments * 2. addition of model im1 * 3. removed stepwise selection from m5xASL * 4. restated m5xASL to omit ASL and TYPE, correcting error *; options ls=80 nocenter mlogic mprint symbolgen; libname source 'C:\Documents and Settings\Christine\Desktop\SAS_Project'; libname source 'c:\b\SAS-X446\prj' ; * location of data; %let orig=air_jianmin_23Nov06 ; * orig data file ; %let clean=air_clean_pct ; * orig data excluding extreme values ; %let clean=air_clean_str ; %let extremes=air_ex_pct ; * excluded values data file ; %let extremes=air_ex_str ; ********************************************; * select input data set to use for models ; * ; %let inuse=&orig; * ; ********************************************; * end parameters ------------------------------- ; title; /* Create temporary dataset w/ labels*/ Data source.air1 (Drop=AVS CSM); set source.&inuse; */below are potential outliers to delete.; *if CPM = 4.737 then delete; *identified as outlier through plots; *if ALF = .287 then delete; *identified as outlier through plots; *if CPM = 2.341 then delete; *identified as outlier through stars method; *if CPM = 3.306 then delete; *identified as outlier through stars method; * not creating bucket variable for plane size; if 0.0 <= SPA <= 0.1 then psize=1; if 0.1 < SPA <= 0.2 then psize=2; if 0.2 < SPA <= 0.3 then psize=3; if 0.3 < SPA <= 0.4 then psize=4; *now creating bucket variables for ASL; if 0 <= ASL <= 500 then pdistance = 1; if 500 < ASL <= 1000 then pdistance = 2; if 1000 < ASL <= 1500 then pdistance = 3; if 1500 < ASL <= 2000 then pdistance = 4; if 2000 < ASL <= 2500 then pdistance = 5; if 2500 < ASL <= 3000 then pdistance = 6; if 3000 < ASL <= 3500 then pdistance = 7; if 3500 < ASL <= 4000 then pdistance = 8; Seats = SPA*1000; Avgfilled = ALF*seats; *create variable for filled seats; AvgEmpty = (1-ALF)*seats; *create variable for empty seats; Fill_to_Empty = Avgfilled / AvgEmpty; LCPM = Log(CPM); LUTL = Log(UTL); LASL = Log(ASL); LSPA = Log(SPA); LALF = log(ALF); LFilled = log(avgfilled); Lempty = log (avgempty); Lpsize = log(psize); Lpdistance = log(pdistance); SUTL = UTL**2; SASL = ASL**2; SSPA = SPA**2; SALF = ALF**2; Sfilled = Avgfilled**2; Sempty = Avgempty**2; * db: Vars for IM1 ---------------------- ; ISPA = 1/SPA ; IALF = 1/ALF ; * --------------------------------------- ; label CPM = 'CPM: Cost per Passenger Mile (cents)' UTL = 'UTL: Avg Hrs per Day in Use' ASL = 'ASL: Avg Length of nonstop legs (1000 miles)' SPA = 'SPA:Avg Nbr of Seats per Aircraft (per 1000 seats)' Seats = 'Seats:Actual Seats on aircraft' ALF = 'ALF: Avg Load Factor (% occupied)' Type = 'Type: Range (short or long)' AvgEmpty = 'Average Nbr of Empty Seats' Avgfilled = 'Average Nbr of Filled Seats' Fill_to_Empty = 'Ratio filled to Empty' Psize = 'Plane Size: 1 is lowest'; run; /* Proc print data=source.air1; run; Data source.AirShort source.airLong; set source.air1; if type = 0 then output source.airshort; If type = 1 then output source.airlong; run; Proc print data=source.airshort; run; Proc print data=source.airlong; run; */ Ods pdf file = 'C:\Documents and Settings\Christine\Desktop\SAS_Project\Top3Results.pdf'; /*Below is my Choice 1 */ Proc Reg data=source.air1; Air1B: Model CPM = ASL LUTL LFilled Lempty / vif influence p r; Title 'Air1: CPM = ASL LUTL Lfilled Lempty'; Plot Student.*predicted. cookd.*obs.; Plot Npp.*Residual.; run; quit; /*Below is my choice #2 */ /*this is old model 5 -- but changed to SLE = .05, ASL drops out --*/ Proc reg data=source.air1; /* db m5xASL: model CPM = UTL ASL SPA ALF Type / */ m5xASL: model CPM = UTL SPA ALF / /* db Selection = stepwise SLE=.15 SLS = .05 */ vif influence p r; /* db Title " Air1: Stepwise Regression of original variables"; */ Title "m5xASL: model5, without var ASL"; run; quit; /* Short - Long modelling */ /*this is the best for airshort */ /* db Proc reg data = source.airshort; */ Proc reg data = source.air1; Long1: Model CPM = Lfilled Lempty UTL / stb vif influence p r; Title 'Airshort: CPM = Lfilled Lempty UTL'; run; quit; /*This is the best for airlong */ /* db Proc Reg data=source.airlong; */ Proc Reg data=source.air1; Long3: Model CPM =Lfilled lpsize/ stb vif influence p r; Title 'Airlong:CPM = Lfilled lpsize'; run; Quit; /* db im1 */ Proc Reg data=source.air1; im1: model CPM=ISPA IALF / stb vif influence p r; Title 'im1: model CPM=ISPA IALF'; run; Quit; ods pdf close;