以bootstrap方法產生多個資料集,
分別為每個資料集建立linear regression模型,
整合多個模型降低mse。
作者:jimmy
日期:2017/3/9
%macro my_bootstrap(data = , b = , obs =); %do bootstrap_i = 1 %to &b.; proc surveyselect data= &data. outall method=urs n=&obs. noprint out=BOOTSTRAP_out; run; data BOOTSTRAP_out&bootstrap_i; set BOOTSTRAP_out; do hit_i = 1 to NumberHits; output; end; run; %end; %mend;
/*proc reg data = mysas.red1; model quality = Sulphates pH volatile_acidity Alcohol / vif all; quit;*/ %macro my_reg(data = mysas.red1 , testdata = mysas.red2 , y = Quality , x = Sulphates pH volatile_acidity Alcohol , k = 4 , alpha = 0.05 ); %let open_d = %sysfunc (open(&data.)); %let obs = %sysfunc(attrn(&open_d., nobs)); /*%let p = %sysfunc(attrn(&open_d., nvars));*/ %let close_d = %sysfunc(close(&open_d.)); %let test_open_d = %sysfunc (open(&testdata.)); %let test_obs = %sysfunc(attrn(&test_open_d., nobs)); %let test_close_d = %sysfunc(close(&test_open_d.)); %let p = %sysevalf(&k.+1); %do i_name = 1 %to &k.; %let x_name&i_name. = %scan(&x., &i_name., %str( )); /* %let x_name_num&i_name. = %length(x_name&i_name.);*/ %end; /*%put x_name1 = &x_name1 x_name2 = &x_name2;*/ /*%put x_name_num1 = &x_name_num1 x_name_num2 = &x_name_num2;*/ proc iml; use &data.; read all var _all_; close &data.; y = &y.; x0 = {[&obs.] 1}; %do i = 1 %to &k.; x&i. = &&x_name&i.; xc = xc || x&i. - mean(x&i.); xall = xall || x&i.; %end; x_matrix = x0` || xall; beta_matrix = inv(x_matrix` * x_matrix) * x_matrix` *y; beta_matrix_no0 = beta_matrix[2:&p., ]; h = x_matrix * inv(x_matrix` * x_matrix) * x_matrix`; e = (i(&obs.) - h) * y; /*ANOVA*/ SS_res = y`*y - beta_matrix` * x_matrix`*y; SS_t = y`*y - (sum(y)##2)/&obs.; SS_r = SS_t - SS_res; MS_res = SS_res/(&obs.- &p.); MS_r = SS_r/(&k.); f0 = MS_r / MS_res; f0_p_value = 1- probf(f0, &k., &obs.-&k.-1); R_square = 1 - SS_res/SS_t; R_square_adj = 1 - (SS_res/(&obs.-&p.))/(SS_t/(&obs.-1)); XX = inv(x_matrix`*x_matrix); se_beta_matrix_ = sqrt(ms_res#diag(xx)); /*beta t test ci*/ %do j = 1 %to &p.; t0_&j. = beta_matrix[&j., ]/se_beta_matrix_[&j., &j.]; if t0_&j. >=0 then t0_P_value_&j. =(1-probt(t0_&j., &obs.-&p.))#2; else t0_P_value_&j. =(probt(t0_&j., &obs.-&p.))#2; beta_t0 = beta_t0 // t0_&j.; beta_t_P_value = beta_t_P_value // t0_P_value_&j.; beta_matrix_ci_a_&j. = beta_matrix[&j., ] - tinv((&alpha.)/2, &obs.-&p.) # se_beta_matrix_[&j., &j.]; beta_matrix_ci_b_&j. = beta_matrix[&j., ] + tinv((&alpha.)/2, &obs.-&p.) # se_beta_matrix_[&j., &j.]; beta_matrix_ci_a = beta_matrix_ci_a // beta_matrix_ci_a_&j.; beta_matrix_ci_b = beta_matrix_ci_b // beta_matrix_ci_b_&j.; se_beta_matrix = se_beta_matrix // se_beta_matrix_[&j., &j.]; %end; y_hat = x_matrix* beta_matrix; /*residual*/ %do ii = 1 %to &obs.; h_ii_&ii. = h[&ii., &ii.]; h_ii = h_ii // h_ii_&ii.; %end; /* e = (i(&obs.) - h) * y;*/ standardized_residual = e/sqrt(ms_res); Studentized_residual = e/sqrt(ms_res#(1 - h_ii)); s2_i = ((&obs.-&p.) # MS_res - (e##2/(1-h_ii))) / (&obs. - &p. -1); R_student = e/sqrt(s2_i#(1-h_ii)); /*leverage influence*/ /* h_ii > 2P/n cook_s_d >p/n |dffits| >2(p/n)^0.5*/ cook_s_d = ((Studentized_residual##2)/&p.) # (h_ii/(1-h_ii)); dffits = sqrt(h_ii/(1-h_ii)) # R_student; print SS_r SS_res SS_t ms_r ms_res f0 f0_p_value R_square R_square_adj; print beta_matrix se_beta_matrix beta_matrix_ci_a beta_matrix_ci_b beta_t0 beta_t_P_value; print xx; /*print h_ii cook_s_d dffits; print e standardized_residual Studentized_residual R_student; create output_yr var {y y_hat e}; append; close output_yr; */ /*vif Sulphates pH volatile_acidity Alcohol*/ /* %put x_name1 = &x_name1 x_name2 = &x_name2;*/ %do ai = 1 %to &k.; vif_y&ai. = &&x_name&ai.; %do vi = 1 %to &k.; /*1 = 2 3 4 2 = 1 3 4 3 = 1 2 4 4 = 1 2 3 */ %if &ai. ^= &vi. %then %do; vif_x&vi. = &&x_name&vi.; vif_xc&ai. = vif_xc&ai. || vif_x&vi. - mean(vif_x&vi.); vif_xall&ai. = vif_xall&ai. || vif_x&vi.; %end; %end; vif_x_matrix&ai. = x0` || vif_xall&ai.; vif_beta_matrix&ai. = inv(vif_x_matrix&ai.` * vif_x_matrix&ai.) * vif_x_matrix&ai.` *vif_y&ai.; vif_SS_res&ai. = vif_y&ai.`*vif_y&ai. - vif_beta_matrix&ai.` * vif_x_matrix&ai.`*vif_y&ai.; vif_SS_t&ai. = vif_y&ai.`*vif_y&ai. - (sum(vif_y&ai.)##2)/&obs.; vif_R_square&ai. = 1 - (vif_SS_res&ai./vif_SS_t&ai.); VIF_&&x_name&ai. = 1/(1-vif_R_square&ai.); print VIF_&&x_name&ai.; %end; /*pred new_x = {1 8 275}; new_y_hat = new_x*beta_matrix; new_y_hat_ci_a = new_y_hat - tinv((&alpha.)/2, &obs.-&p.) # sqrt(ms_res#(1+new_x`*(XX)*new_x)); new_y_hat_ci_b = new_y_hat + tinv((&alpha.)/2, &obs.-&p.) # sqrt(ms_res#(1+new_x`*(XX)*new_x)); */ use &testdata.; read all var _all_; close &testdata.; test_y = &y.; x0 = {[&test_obs.] 1}; %do test_i = 1 %to &k.; test_x&test_i. = &&x_name&test_i.; test_xall = test_xall || test_x&test_i.; %end; test_x_matrix = x0`||test_xall; test_y_pred = test_x_matrix *beta_matrix; create output_test_y var {test_y test_y_pred}; append; close output_test_y; /* print test_y_pred;*/ quit; /*proc sgplot data = output_yr;*/ /* scatter x = y_hat y = e;*/ /*run;*/ /**/ /*proc sgplot data = output_yr;*/ /* scatter x = y_hat y = R_student;*/ /*run;*/ /**/ /*proc sgplot data = output_yr;*/ /* scatter x = h_ii y = R_student;*/ /*run;*/ /**/ /*proc univariate data=output_yr noprint;*/ /* qqplot e;*/ /*run;*/ /**/ %mend; %my_reg()
%macro testbo(data = mysas.red1, testdata = mysas.red2, b = 500, obs = 999 , testobs= 600); proc datasets noprint; delete all_test_y; quit; data all_test_y; run; %my_bootstrap(data = mysas.red1 , b = &b., obs = &obs.) %do bost = 1 %to &b.; %my_reg(data = BOOTSTRAP_out&bost. , testdata = &testdata. , y = Quality , x =Sulphates pH volatile_acidity Alcohol , k = 4 , alpha =0.05) data output_test_y&bost.; set output_test_y; run; data all_test_y; set output_test_y&bost. all_test_y; run; proc datasets noprint; delete BOOTSTRAP_out&bost. output_test_y&bost.; quit; %end; data mean_test_y ; set all_test_y; %do mean_i = 1 %to &testobs.; if mod(_n_, &testobs.) = &mean_i.-1 then TEST_Y_PRED&mean_i. = TEST_Y_PRED; /* sum_y_pred&mean_i. +TEST_Y_PRED&mean_i.;*/ /* mean_y_pred&mean_i. = sum_y_pred&mean_i./&b.-1;*/ %end; run; proc iml; use mean_test_y; read all var _all_; close mean_test_y; %do pred_i = 1 %to &testobs.; mean_pred&pred_i. = mean(TEST_Y_PRED&pred_i.); mean_pred_ = mean_pred_ // mean_pred&pred_i. ; %end; r_mean_pred_ =mean_pred_[2:&testobs., ]// mean_pred_[1, ]; create mean_y_pred var {mean_pred_ r_mean_pred_}; append; close mean_y_pred; quit; data mean_y_pred_testdata; merge &testdata. mean_y_pred; drop mean_pred_; run; /* data mean_test_y2; set mean_test_y; if _n_ = (&b.-1) * &testobs.; keep mean_y_pred1 - mean_y_pred&testobs.; run; proc transpose data=mean_test_y2 out=mean_test_y2; var mean_y_pred1 - mean_y_pred&testobs.; run; data y_pred; merge &testdata. mean_test_y2; run;*/ %mend;
%testbo()
0 意見:
張貼留言