以bootstrap方法產生多個資料集,
分別為每個資料集建立linear regression模型,
整合多個模型降低mse。
作者:jimmy
日期:2017/3/9
%macro my_bootstrap(data = , b = , obs =);
%do bootstrap_i = 1 %to &b.;
proc surveyselect data= &data. outall method=urs n=&obs. noprint
out=BOOTSTRAP_out;
run;
data BOOTSTRAP_out&bootstrap_i;
set BOOTSTRAP_out;
do hit_i = 1 to NumberHits;
output;
end;
run;
%end;
%mend;
/*proc reg data = mysas.red1;
model quality = Sulphates pH volatile_acidity Alcohol / vif all;
quit;*/
%macro my_reg(data = mysas.red1
, testdata = mysas.red2
, y = Quality
, x = Sulphates pH volatile_acidity Alcohol
, k = 4
, alpha = 0.05
);
%let open_d = %sysfunc (open(&data.));
%let obs = %sysfunc(attrn(&open_d., nobs));
/*%let p = %sysfunc(attrn(&open_d., nvars));*/
%let close_d = %sysfunc(close(&open_d.));
%let test_open_d = %sysfunc (open(&testdata.));
%let test_obs = %sysfunc(attrn(&test_open_d., nobs));
%let test_close_d = %sysfunc(close(&test_open_d.));
%let p = %sysevalf(&k.+1);
%do i_name = 1 %to &k.;
%let x_name&i_name. = %scan(&x., &i_name., %str( ));
/* %let x_name_num&i_name. = %length(x_name&i_name.);*/
%end;
/*%put x_name1 = &x_name1 x_name2 = &x_name2;*/
/*%put x_name_num1 = &x_name_num1 x_name_num2 = &x_name_num2;*/
proc iml;
use &data.;
read all var _all_;
close &data.;
y = &y.;
x0 = {[&obs.] 1};
%do i = 1 %to &k.;
x&i. = &&x_name&i.;
xc = xc || x&i. - mean(x&i.);
xall = xall || x&i.;
%end;
x_matrix = x0` || xall;
beta_matrix = inv(x_matrix` * x_matrix) * x_matrix` *y;
beta_matrix_no0 = beta_matrix[2:&p., ];
h = x_matrix * inv(x_matrix` * x_matrix) * x_matrix`;
e = (i(&obs.) - h) * y;
/*ANOVA*/
SS_res = y`*y - beta_matrix` * x_matrix`*y;
SS_t = y`*y - (sum(y)##2)/&obs.;
SS_r = SS_t - SS_res;
MS_res = SS_res/(&obs.- &p.);
MS_r = SS_r/(&k.);
f0 = MS_r / MS_res;
f0_p_value = 1- probf(f0, &k., &obs.-&k.-1);
R_square = 1 - SS_res/SS_t;
R_square_adj = 1 - (SS_res/(&obs.-&p.))/(SS_t/(&obs.-1));
XX = inv(x_matrix`*x_matrix);
se_beta_matrix_ = sqrt(ms_res#diag(xx));
/*beta t test ci*/
%do j = 1 %to &p.;
t0_&j. = beta_matrix[&j., ]/se_beta_matrix_[&j., &j.];
if t0_&j. >=0 then t0_P_value_&j. =(1-probt(t0_&j., &obs.-&p.))#2;
else t0_P_value_&j. =(probt(t0_&j., &obs.-&p.))#2;
beta_t0 = beta_t0 // t0_&j.;
beta_t_P_value = beta_t_P_value // t0_P_value_&j.;
beta_matrix_ci_a_&j. = beta_matrix[&j., ] - tinv((&alpha.)/2, &obs.-&p.) # se_beta_matrix_[&j., &j.];
beta_matrix_ci_b_&j. = beta_matrix[&j., ] + tinv((&alpha.)/2, &obs.-&p.) # se_beta_matrix_[&j., &j.];
beta_matrix_ci_a = beta_matrix_ci_a // beta_matrix_ci_a_&j.;
beta_matrix_ci_b = beta_matrix_ci_b // beta_matrix_ci_b_&j.;
se_beta_matrix = se_beta_matrix // se_beta_matrix_[&j., &j.];
%end;
y_hat = x_matrix* beta_matrix;
/*residual*/
%do ii = 1 %to &obs.;
h_ii_&ii. = h[&ii., &ii.];
h_ii = h_ii // h_ii_&ii.;
%end;
/* e = (i(&obs.) - h) * y;*/
standardized_residual = e/sqrt(ms_res);
Studentized_residual = e/sqrt(ms_res#(1 - h_ii));
s2_i = ((&obs.-&p.) # MS_res - (e##2/(1-h_ii))) / (&obs. - &p. -1);
R_student = e/sqrt(s2_i#(1-h_ii));
/*leverage influence*/
/* h_ii > 2P/n
cook_s_d >p/n
|dffits| >2(p/n)^0.5*/
cook_s_d = ((Studentized_residual##2)/&p.) # (h_ii/(1-h_ii));
dffits = sqrt(h_ii/(1-h_ii)) # R_student;
print SS_r SS_res SS_t ms_r ms_res f0 f0_p_value R_square R_square_adj;
print beta_matrix se_beta_matrix beta_matrix_ci_a beta_matrix_ci_b beta_t0 beta_t_P_value;
print xx;
/*print h_ii cook_s_d dffits;
print e standardized_residual Studentized_residual R_student;
create output_yr var {y y_hat e};
append;
close output_yr;
*/
/*vif
Sulphates pH volatile_acidity Alcohol*/
/* %put x_name1 = &x_name1 x_name2 = &x_name2;*/
%do ai = 1 %to &k.;
vif_y&ai. = &&x_name&ai.;
%do vi = 1 %to &k.; /*1 = 2 3 4 2 = 1 3 4 3 = 1 2 4 4 = 1 2 3 */
%if &ai. ^= &vi. %then %do;
vif_x&vi. = &&x_name&vi.;
vif_xc&ai. = vif_xc&ai. || vif_x&vi. - mean(vif_x&vi.);
vif_xall&ai. = vif_xall&ai. || vif_x&vi.;
%end;
%end;
vif_x_matrix&ai. = x0` || vif_xall&ai.;
vif_beta_matrix&ai. = inv(vif_x_matrix&ai.` * vif_x_matrix&ai.) * vif_x_matrix&ai.` *vif_y&ai.;
vif_SS_res&ai. = vif_y&ai.`*vif_y&ai. - vif_beta_matrix&ai.` * vif_x_matrix&ai.`*vif_y&ai.;
vif_SS_t&ai. = vif_y&ai.`*vif_y&ai. - (sum(vif_y&ai.)##2)/&obs.;
vif_R_square&ai. = 1 - (vif_SS_res&ai./vif_SS_t&ai.);
VIF_&&x_name&ai. = 1/(1-vif_R_square&ai.);
print VIF_&&x_name&ai.;
%end;
/*pred
new_x = {1 8 275};
new_y_hat = new_x*beta_matrix;
new_y_hat_ci_a = new_y_hat - tinv((&alpha.)/2, &obs.-&p.) # sqrt(ms_res#(1+new_x`*(XX)*new_x));
new_y_hat_ci_b = new_y_hat + tinv((&alpha.)/2, &obs.-&p.) # sqrt(ms_res#(1+new_x`*(XX)*new_x));
*/
use &testdata.;
read all var _all_;
close &testdata.;
test_y = &y.;
x0 = {[&test_obs.] 1};
%do test_i = 1 %to &k.;
test_x&test_i. = &&x_name&test_i.;
test_xall = test_xall || test_x&test_i.;
%end;
test_x_matrix = x0`||test_xall;
test_y_pred = test_x_matrix *beta_matrix;
create output_test_y var {test_y test_y_pred};
append;
close output_test_y;
/* print test_y_pred;*/
quit;
/*proc sgplot data = output_yr;*/
/* scatter x = y_hat y = e;*/
/*run;*/
/**/
/*proc sgplot data = output_yr;*/
/* scatter x = y_hat y = R_student;*/
/*run;*/
/**/
/*proc sgplot data = output_yr;*/
/* scatter x = h_ii y = R_student;*/
/*run;*/
/**/
/*proc univariate data=output_yr noprint;*/
/* qqplot e;*/
/*run;*/
/**/
%mend;
%my_reg()
%macro testbo(data = mysas.red1, testdata = mysas.red2, b = 500, obs = 999 , testobs= 600);
proc datasets noprint;
delete all_test_y;
quit;
data all_test_y;
run;
%my_bootstrap(data = mysas.red1 , b = &b., obs = &obs.)
%do bost = 1 %to &b.;
%my_reg(data = BOOTSTRAP_out&bost.
, testdata = &testdata.
, y = Quality
, x =Sulphates pH volatile_acidity Alcohol
, k = 4
, alpha =0.05)
data output_test_y&bost.;
set output_test_y;
run;
data all_test_y;
set output_test_y&bost. all_test_y;
run;
proc datasets noprint;
delete BOOTSTRAP_out&bost. output_test_y&bost.;
quit;
%end;
data mean_test_y ;
set all_test_y;
%do mean_i = 1 %to &testobs.;
if mod(_n_, &testobs.) = &mean_i.-1 then TEST_Y_PRED&mean_i. = TEST_Y_PRED;
/* sum_y_pred&mean_i. +TEST_Y_PRED&mean_i.;*/
/* mean_y_pred&mean_i. = sum_y_pred&mean_i./&b.-1;*/
%end;
run;
proc iml;
use mean_test_y;
read all var _all_;
close mean_test_y;
%do pred_i = 1 %to &testobs.;
mean_pred&pred_i. = mean(TEST_Y_PRED&pred_i.);
mean_pred_ = mean_pred_ // mean_pred&pred_i. ;
%end;
r_mean_pred_ =mean_pred_[2:&testobs., ]// mean_pred_[1, ];
create mean_y_pred var {mean_pred_ r_mean_pred_};
append;
close mean_y_pred;
quit;
data mean_y_pred_testdata;
merge &testdata. mean_y_pred;
drop mean_pred_;
run;
/*
data mean_test_y2;
set mean_test_y;
if _n_ = (&b.-1) * &testobs.;
keep mean_y_pred1 - mean_y_pred&testobs.;
run;
proc transpose data=mean_test_y2 out=mean_test_y2;
var mean_y_pred1 - mean_y_pred&testobs.;
run;
data y_pred;
merge &testdata. mean_test_y2;
run;*/
%mend;
%testbo()
0 意見:
張貼留言