2017年7月4日 星期二

sas macro logistic regression real adaboost

data comb_data;
length comb_x $ 100;
input comb_x $ @@;
/*輸入所有x變數名稱,方便隨機抽取,放入不同模型裡*/
datalines;
area da hfs i0 pa500
;
run;

%macro logistic_real_adabost(data = ,
y = ,
        y_testing=,
x =  ,
ab = ,
training_obs=,
class_x= ,
n_x=);
/*ab為要有幾個模型*/
/*n_x為模型內最多有幾個變數*/
data ck_pv_out1;
set &data.;
w1 = 1/&training_obs.;
run;



%do lrab = 1 %to &ab.;
%let lrab_1 = %sysevalf(&lrab. + 1);


data _null_;
u=rand('uniform');   
max=&n_x.;   
var_n=ceil(max*u);
call symputx("var_n", var_n);
run;
%put &var_n.;
proc surveyselect data= comb_data  n=&var_n. noprint out=comb_out;
run;

proc sql noprint;
select comb_x
into :comb_xx  separated by " "
from comb_out;
quit;

%put &comb_xx.;



proc logistic data = ck_pv_out&lrab. desc noprint;
class      &class_x.    ;
model &y. = &comb_xx.;
output out = logistic_out
PREDICTED= p&lrab.;
weight w&lrab.;
run;





data ck_pv;
set logistic_out;
where &y. ^=.;
if p >0.5 then p_h = 1;
else if p < 0.5 then p_h = 0;
else p_h =.;
if &y. ^= p_h then error = 1;
else error = 0;
run;

proc iml;
use ck_pv;
read all var _all_;
close ck_pv;



fm = (1/2) # log((p&lrab.)/(1-p&lrab.));

temp = exp((-&y.)#fm);

w_&lrab_1. = w&lrab. # temp;

w&lrab_1. = w_&lrab_1./sum(w_&lrab_1.);

g_x&lrab. = fm;

create ck_pv_out&lrab_1. var {&y. &x. w&lrab_1.};
append;     
close ck_pv_out&lrab_1.;
create ck_pv_out_gx&lrab.  var {g_x&lrab.};
append;     
close ck_pv_out_gx&lrab.;
quit;

%end;


data all;
merge  &data.
ck_pv_out_gx1-ck_pv_out_gx&ab.;
x = sum(of g_x1-g_x&ab.);
if x >0 then tf = 1;
else tf = -1;
run;


data training testing;
set all;
if Selected =0 then output training;
else output testing;
run;

proc freq data = training;
table &y. * tf / nocol nopercent ;
title "整體學習 結果 training";
run;title;

proc freq data = testing;
table &y_testing. * tf/ nocol nopercent ;
title "整體學習 結果 testing";
run;title;

%do delete_ = 1 %to &ab.;
proc datasets noprint;
delete ck_pv_out&delete_.  ck_pv_out_gx&delete_. ;
quit;
%end;
%mend;
/*
以real_adabost
%logistic_real_adabost(data = mysas.breast_2,
y = Classcar,
                                                        y_testing=Classcar_test,
x = area da hfs i0 pa500 p max_ip a_da,
ab = 10,
training_obs=55,
class_x= ,
n_x=4)

=================================================


以逐步迴歸
proc logistic data = mysas.breast_2 noprint;
model Classcar =  area da hfs i0 pa500 p max_ip a_da
/ selection = stepwise;
output out = logistic_out2   PREDICTED= p;
quit;

data logistic_out2;
set logistic_out2;
if p2 >= 0.5 then p2=1;
else p2=-1;
run;
data training2 testing2;
set logistic_out2;
if Selected =0 then output training2;
else output testing2;
run;
proc freq data = training2;
table Classcar * p2 / nocol nopercent ;
title "逐步迴歸 結果 training";
run;
proc freq data = testing2;
table Classcar_test * p2/ nocol nopercent ;
title "逐步迴歸 結果 testing";
run;title;

*/

data set: https://archive.ics.uci.edu/ml/datasets/Breast+Tissue
Y:Classcar (惡性腫瘤)
I0 Impedivity (ohm) at zero frequency
PA500 phase angle at 500 KHz
HFS high-frequency slope of phase angle
DA impedance distance between spectral ends
AREA area under spectrum
A/DA area normalized by DA
MAX IP maximum of the spectrum
DR distance between I0 and real part of the maximum frequency point
P length of the spectral curve





Read More