* h-01 Daniel Brockman 061211 Influence Analysis -- Air; * Course project team: Christine Iodice, Prasad Satich, Saranne Warner, Daniel Brockman ; * Course X446 SAS Data Analysis. Jian-min Liu, Instructor. ; Title 'Influence analysis'; Title2 'Program h-01.sas'; options ls=80; * parameters ----------------------------------- ; * libname source 'C:\Documents and Settings\Christine\Desktop\SAS_Project'; libname source 'c:\b\SAS-X446\prj' ; * location of data; %let orig=air_jianmin_23Nov06 ; * orig data file ; %let clean=air_clean_pct ; * orig data excluding extreme values ; %let extremes=air_ex_pct ; * excluded values data file ; ********************************************; * select input data set to use for models ; * ; %let inuse=&orig; * ; ********************************************; * end parameters ------------------------------- ; title; Title1 "Original Data" ; * sort the data before further use ; data orig4 ; set source.&orig ; run; proc sort data=orig4; by descending CPM ; run; proc print data=orig4 ; run; Title1 "Excluding extreme observations"; * sort the data before further use ; data clean4 ; set source.&clean ; run; proc sort data=clean4; by descending CPM ; run; proc print data=clean4 ; run; Title1 ; * The data we are running with ; * sort the data before further use ; data inuse3 ; set source.&inuse ; run; proc sort data=inuse3; by descending CPM ; run; * cook up some variable transformations; data air2; set inuse3 ; Seats = SPA*1000; AvgFilled = ALF*seats; *create variable for filled seats; AvgEmpty = (1-ALF)*seats; *create variable for empty seats; IAVGF = 1/AvgFilled ; ISPA = 1/SPA ; IALF = 1/ALF ; IASL = 1/ASL ; * BUCKETIZATION ; * now creating bucket variable for plane size; if 0.0 <= SPA <= 0.1 then psize=1; if 0.1 < SPA <= 0.2 then psize=2; if 0.2 < SPA <= 0.3 then psize=3; if 0.3 < SPA <= 0.4 then psize=4; *now creating bucket variables for ASL; if 0 <= ASL <= 500 then pdistance = 1; if 500 < ASL <= 1000 then pdistance = 2; if 1000 < ASL <= 1500 then pdistance = 3; if 1500 < ASL <= 2000 then pdistance = 4; if 2000 < ASL <= 2500 then pdistance = 5; if 2500 < ASL <= 3000 then pdistance = 6; if 3000 < ASL <= 3500 then pdistance = 7; if 3500 < ASL <= 4000 then pdistance = 8; Fill_to_Empty = Avgfilled / AvgEmpty; LCPM = Log(CPM); LUTL = Log(UTL); LASL = Log(ASL); LSPA = Log(SPA); LALF = log(ALF); LFilled = log(avgfilled); Lempty = log (avgempty); Lpsize = log(psize); Lpdistance = log(pdistance); SUTL = UTL**2; SASL = ASL**2; SSPA = SPA**2; SALF = ALF**2; Sfilled = Avgfilled**2; Sempty = Avgempty**2; run; Title1 'Isolating Observations of Influence'; proc reg data=air2 outest=airregest; Long3: Model CPM =Lfilled lpsize / stb vif influence p r ; Long2: Model CPM = Lfilled Lempty / stb vif influence p r ; im1: model CPM=ISPA IALF / stb vif influence p r ; run; *----------------------------------------;