2017年10月27日 星期五

分組

%macro my_cn2(data =, y =, x =, c_x =, l_r=, z =);

ods trace on / listing; 
proc contents data =  &data.;
 ods output Variables = contents_;
run;
ods trace off;

data contents_;
 set contents_;
 where upcase(Variable) = "%upcase(&y.)";
 call symputx("type", type);
run;

%if "&type." = "Num" %then %do;
 data temp_data;
  set &data.;
  y_ = put(&y. ,z&z..);
 run;
%end; %else %do;
  data temp_data;
   set &data.;
   y_ = &y.;
  run;
%end;


proc sql;
 create table class_ as
  select  distinct  y_
  from temp_data;
quit;

%let data_class = class_;
%let open_d = %sysfunc (open(&data_class.));
%let row_n = %sysfunc(attrn(&open_d., nobs));
%let close_d = %sysfunc(close(&open_d.));

proc sql noprint;
 select  y_
  into :y1 - :y&row_n.
  from class_;
quit;

data _null_;
 comb_ = comb(&row_n.,2);
 call symputx("comb_", comb_);
run;

proc iml;
 allcomb_ = allcomb(&row_n., 2);
 create allcomb var {allcomb_}; 
 append;       
 close allcomb;
quit;

%let data__ = allcomb;
%let open_d = %sysfunc (open(&data__.));
%let row_nn = %sysfunc(attrn(&open_d., nobs));
%let close_d = %sysfunc(close(&open_d.));

proc sql noprint;
 select  allcomb_
  into :r2n_1 - :r2n_&row_nn.
  from allcomb;
quit;



%do comb_i = 1 %to &row_nn. %by 2;
%let comb_i_ = %sysevalf(&comb_i. +1);
%let comb__ = 1;/*66*/
/*%put &comb_i. &comb_i_.;*/
/*data comb_&comb__.;*/
data _&&&&y&&r2n_&comb_i...vs_&&&&y&&r2n_&comb_i_...;
 set temp_data;
  where y_= "&&&&y&&r2n_&comb_i..." or y_ = "&&&&y&&r2n_&comb_i_...";
run;


 %if "&l_r." = "t" %then %do; 
  proc logistic data = _&&&&y&&r2n_&comb_i...vs_&&&&y&&r2n_&comb_i_... plots=all;
   class &c_x.;
   model y_ = &x. &c_x.;
   output out = o__&&&&y&&r2n_&comb_i...vs_&&&&y&&r2n_&comb_i_... PREDICTED= p PREDPROBS= p2;
  quit;
 %end;
 %let comb__ = %sysevalf(&comb__. +1);/*66*/
%end;

%mend;
%my_cn2(data = mysas.unk, y = final, z=1)


data 資料集
y 類別變數
z 類別變數為數值型態時且類別總類小於10 z=1 小於100 z=2 ....  文字形態時可不輸入


4類別分組後結果

Read More

2017年7月4日 星期二

sas macro logistic regression real adaboost

data comb_data;
length comb_x $ 100;
input comb_x $ @@;
/*輸入所有x變數名稱,方便隨機抽取,放入不同模型裡*/
datalines;
area da hfs i0 pa500
;
run;

%macro logistic_real_adabost(data = ,
y = ,
        y_testing=,
x =  ,
ab = ,
training_obs=,
class_x= ,
n_x=);
/*ab為要有幾個模型*/
/*n_x為模型內最多有幾個變數*/
data ck_pv_out1;
set &data.;
w1 = 1/&training_obs.;
run;



%do lrab = 1 %to &ab.;
%let lrab_1 = %sysevalf(&lrab. + 1);


data _null_;
u=rand('uniform');   
max=&n_x.;   
var_n=ceil(max*u);
call symputx("var_n", var_n);
run;
%put &var_n.;
proc surveyselect data= comb_data  n=&var_n. noprint out=comb_out;
run;

proc sql noprint;
select comb_x
into :comb_xx  separated by " "
from comb_out;
quit;

%put &comb_xx.;



proc logistic data = ck_pv_out&lrab. desc noprint;
class      &class_x.    ;
model &y. = &comb_xx.;
output out = logistic_out
PREDICTED= p&lrab.;
weight w&lrab.;
run;





data ck_pv;
set logistic_out;
where &y. ^=.;
if p >0.5 then p_h = 1;
else if p < 0.5 then p_h = 0;
else p_h =.;
if &y. ^= p_h then error = 1;
else error = 0;
run;

proc iml;
use ck_pv;
read all var _all_;
close ck_pv;



fm = (1/2) # log((p&lrab.)/(1-p&lrab.));

temp = exp((-&y.)#fm);

w_&lrab_1. = w&lrab. # temp;

w&lrab_1. = w_&lrab_1./sum(w_&lrab_1.);

g_x&lrab. = fm;

create ck_pv_out&lrab_1. var {&y. &x. w&lrab_1.};
append;     
close ck_pv_out&lrab_1.;
create ck_pv_out_gx&lrab.  var {g_x&lrab.};
append;     
close ck_pv_out_gx&lrab.;
quit;

%end;


data all;
merge  &data.
ck_pv_out_gx1-ck_pv_out_gx&ab.;
x = sum(of g_x1-g_x&ab.);
if x >0 then tf = 1;
else tf = -1;
run;


data training testing;
set all;
if Selected =0 then output training;
else output testing;
run;

proc freq data = training;
table &y. * tf / nocol nopercent ;
title "整體學習 結果 training";
run;title;

proc freq data = testing;
table &y_testing. * tf/ nocol nopercent ;
title "整體學習 結果 testing";
run;title;

%do delete_ = 1 %to &ab.;
proc datasets noprint;
delete ck_pv_out&delete_.  ck_pv_out_gx&delete_. ;
quit;
%end;
%mend;
/*
以real_adabost
%logistic_real_adabost(data = mysas.breast_2,
y = Classcar,
                                                        y_testing=Classcar_test,
x = area da hfs i0 pa500 p max_ip a_da,
ab = 10,
training_obs=55,
class_x= ,
n_x=4)

=================================================


以逐步迴歸
proc logistic data = mysas.breast_2 noprint;
model Classcar =  area da hfs i0 pa500 p max_ip a_da
/ selection = stepwise;
output out = logistic_out2   PREDICTED= p;
quit;

data logistic_out2;
set logistic_out2;
if p2 >= 0.5 then p2=1;
else p2=-1;
run;
data training2 testing2;
set logistic_out2;
if Selected =0 then output training2;
else output testing2;
run;
proc freq data = training2;
table Classcar * p2 / nocol nopercent ;
title "逐步迴歸 結果 training";
run;
proc freq data = testing2;
table Classcar_test * p2/ nocol nopercent ;
title "逐步迴歸 結果 testing";
run;title;

*/

data set: https://archive.ics.uci.edu/ml/datasets/Breast+Tissue
Y:Classcar (惡性腫瘤)
I0 Impedivity (ohm) at zero frequency
PA500 phase angle at 500 KHz
HFS high-frequency slope of phase angle
DA impedance distance between spectral ends
AREA area under spectrum
A/DA area normalized by DA
MAX IP maximum of the spectrum
DR distance between I0 and real part of the maximum frequency point
P length of the spectral curve





Read More

2017年3月9日 星期四