2016年2月24日 星期三

使用 proc reg 找出影響點 (influential observations)

%myreg(data = mysas.b11, y = Quality, x = clarity aroma body flavor oakiness, option = selection = cp)


%macro myreg(data, y, x, option);
/*********************************************************************************************************
*名稱:迴歸 診斷 影響點                                                                                   *
*作者:jimmy                                                                                              *
*日期:2016/04/19                                                                                         *
*說明:                                                                                           *                         
*     option 為 proc reg  model的option 此macro 已有 all influence vif                                       *
*     可以產出Cook's D  CovRatio DFFITS DFBETAS 圖形 以及將各個 obs的指標 與 超出指標的obs 匯出至同資料集*
*     計算出Cook's D  CovRatio DFFITS DFBETAS 界線                                                       *
*********************************************************************************************************/

ods trace on / listing; /*output 名稱 位置 ods output 可以將sas執行出的報表匯出成sas資料集*/
proc reg data =&data  plots(label)=(CooksD RStudentByLeverage DFFITS DFBETAS);
 model &y = &x / all influence &option;
 ods output OutputStatistics = outstatistics
    ANOVA = asd
    ParameterEstimates = pest
/*     DFFITSPlot = dffitsplotx*/
/*     DFBETASPanel= dddd*/
    RStudentByLeverage = outlier;
/* output out = point h=lev;*/
run;
quit;
ods trace off; /*output 名稱 位置*/


data testp;
 set asd;
 if Step = . and Source = "Model" then do  p2 = df +1;
      call symputx("p", p2);
     end; 
run;

data test / view = test;
 set asd outstatistics;

 if _n_ = 3 then n = df + 1;
 retain n;
 if _n_ = 4 then do; /*界*/
  CooksD_bound = &p/n;
  HatDiagonal_bound = 2 * &p/n;
  ACovRatio_bound = 1 + 3*&p/n;
  BCovRatio_bound = 1 - 3*&p/n;
  DFFITS_bound = 2 * sqrt(&p/n);
  DFB_bound = 2/sqrt(n);
 end;

 retain Observation CooksD CooksD_bound HatDiagonal HatDiagonal_bound
    CovRatio ACovRatio_bound BCovRatio_bound DFFITS DFFITS_bound DFB_bound;
 drop model Dependent Source DF SS MS FValue ProbF DepVar
  PredictedValue StdErrMeanPredict LowerCLMean UpperCLMean
  LowerCL UpperCL Picture Residual StdErrResidual StudentResidual RStudent;
run;

data test2;
 retain Observation CooksD CooksD_bound HatDiagonal HatDiagonal_bound    /*改變位置*/
    CovRatio ACovRatio_bound BCovRatio_bound DFFITS DFFITS_bound;
 set test;
 array testx {*}  CooksD CovRatio DFFITS;
 array testxx {*} CooksD_bound ACovRatio_bound DFFITS_bound;
 array y {3} ;
 array dfb {*} DFB_Intercept -- dfb_bound;          /*dfb_ - dfb_*/
 array DFBETAS {&p};
 do i = 1 to 3;
  if abs(testx{i}) > testxx{i} then do;
   y{i} = Observation;  
  end;
  if i = 2 and abs(testx{i}) < BCovRatio_bound then do;
   y{i} = Observation;
  end;
 end;
 do j = 1 to &p;
  if abs(dfb{j}) > DFB_bound then do;
   DFBETAS{j} =  Observation;   
  end;
 end;



 drop i  j  DFBETAS1 n;
 label y1 = "CooksD" y2 = "CovRatio" y3 = "DFFITS";
run;

proc sgplot data = TEST2;
 scatter x = OBSERVATION y = COVRATIO / datalabel = OBSERVATION;
 refline ACovRatio_bound;
 refline BCovRatio_bound;
 title "CovRatio";
run;
title;

proc print data = test2 (firstobs = 4) label;
 title "out";
run;
title;
%mend;







  • 是否為影響點我是以四種指標作為依據
  1. Cook's D
  2. CovRatio
  3. DFFITS
  4. DFBETAS
程式說明:
第17列 ods output 可以將sas執行出的報表匯出成sas資料集,
            原報表名稱須使用ods trace on / listing;   ods trace off;
            找出.
第27列 資料集asd為anova table 匯入asd主要是用來計算資料筆數(n).
資料集test 在計算四種指標的界
資料集test2 為找出超出的點






























如果不要參考COVRATIO指標可以只執行這段程式碼








資料集dffitsplotx為:





















Cook's D
DFFITS
DFBETAS
reg程序有內建






0 意見:

張貼留言