* procreg_share_08Dec06.sas saranne ; %let file=air_jianmin_23Nov06; options nocenter ; libname airlib "c:\1warner\5conventions\06dec2006" ; libname airlib "c:\b\SAS-X446\prj"; *---------------------------------------------------------- ; * Sort by average length of leg asl which type bins ; *---------------------------------------------------------- ; data airlib.air1 ; set airlib.&file ; run ; title 'Air1 copy of jianmins data, unsorted' ; proc print data = airlib.air1 ; run ; proc sort data = airlib.&file out = airlib.airsrt2 ; by asl ; run; title 'jianmins data sorted by miles asl'; proc print data = airlib.airsrt2 ; run ; *---------------------------------------------------------- ; * Add descriptive labels for Procs ; *---------------------------------------------------------- ; data airlib.air_labeled ; set airlib.&file ; label cpm = "CPM Cost per Passenger Mile (cents)" utl = "UTL Average hours per day use of aircraft" asl = "ASL Average Length of Nonstop Legs of Flights (1000 miles)" spa = "SPA Average number of seats per aircraft (100 seats)" alf = "ALF Average load factor (% of seats occupied by passengers)" type = "TYPE Indicator variable =1 if ASL > 1200 miles" ; run ; /* descriptive stats, tests for normality and histograms on variables*/ Title 'Descriptive Statistics using Proc Univariate'; Proc univariate data=airlib.air_labeled normal plot; Var CPM UTL ASL SPA ALF ; Histogram CPM UTL ASL SPA ALF / normal; run; *---------------------------------------------------------- ; * Try Proc Corr - Compares 2 continuous variables ; *---------------------------------------------------------- ; title1 "Proc Corr - All Dependent Variables not Greater than .70 If yes, Explain" ; proc corr data = airlib.air_labeled ; var cpm utl asl spa alf ; run ; title1 "Proc Corr - using with cpm" ; proc corr data = airlib.air_labeled ; var utl asl spa alf ; with cpm ; run ; *---------------------------------------------------------- ; * Proc Reg ; *---------------------------------------------------------- ; Title 'Proc Reg Model 5: Cost = Utilization FlightLength NbrSeats Perc_Occupied'; proc reg data=airlib.air_labeled ; model CPM = UTL ASL SPA ALF; *seems to be BEST MODEL; run ; symbol value = dot Color = black; /* Plotting regression and residuals of Model 5 */ /* Below follows code in Jianmin's REGEX program */ Proc Reg data=airlib.air_labeled ; Title "Model 5"; model CPM = UTL ASL SPA ALF; output out = airlib.resid p=Pcost r=rcost student=student; run; Proc Plot data=airlib.resid hpercent=50 vpercent=50; Plot rcost*pcost student*pcost / vref=0; Run; Quit; *---------------------------------------------------------- ; * Proc Reg with Influence a couple obs have 4/5 stars ; *---------------------------------------------------------- ; title "Proc Reg with Influence and Residual options Use unsorted Air1" ; proc reg data = airlib.air1 ; model CPM = UTL ASL SPA ALF / influence R ; output out=airlib.reg_influ ; run ; *---------------------------------------------------------- ; * Christines stepwise with influence and partial options ; *---------------------------------------------------------- ; Title "Stepwise Regression of independent continuous variables"; Proc reg data=airlib.air1; model CPM = UTL ASL SPA ALF / Selection = stepwise SLE=.15 SLS = .05 influence partial; run; *---------------------------------------------------------- ; * Proc RobustReg ; *---------------------------------------------------------- ; Title 'Proc RobustReg Model 5: Cost = Utilization FlightLength NbrSeats Perc_Occupied'; proc robustreg data=airlib.air_labeled ; model CPM = UTL ASL SPA ALF; run ; *---------------------------------------------------------- ; * Proc Insight (works interactively) Run Last or Alone ; *---------------------------------------------------------- ; Title 'Proc Insight for Airline Data' ; proc insight data=airlib.air_labeled; run ;