SAS AML 的 SOURCE_CHECK 已經有資料筆數、NULL、UNIQUE與LOOKUP的檢核了
SOURCE_CHECK_2 檢核觀測值的MACRO。
範例:
data DB_A;
input Pty $ score number;
datalines;
A 100 1
B 90 2
C 80 4
C . .
D 50 5
F 72 6
G . .
;
run;
data DB_B;
input Pty $ score number;
datalines;
A 100 1
B 90 2
C 82 4
D 51 6
E 59 7
G . .
;
run;
%CK_VALUE(DB1_DATA=work.DB_A,
DB2_DATA=work.DB_B,
BY=Pty,
ALL_VAR=score | number,
VAR_NUM=2)
結果:
NG:兩邊DB,VAR的觀測值不一樣。
NOT FOUND 兩邊MISSING。
OK:一至。
/*====================================================
DATA1 DB_A 資料集1 work.也要輸入
DATA2 DB_B 資料集2
BY KEY
ALL_VAR 全部變數名稱 | 分隔
VAR_NUM 幾個變數
JIMMY CHEN
======================================================*/
%macro CK_VALUE(DB1_DATA=,
DB2_DATA=,
BY =,
ALL_VAR = ,
VAR_NUM=);
%let DATA1 = %scan(&DB1_DATA., 2,%str(.));
%let DATA2 = %scan(&DB2_DATA., 2,%str(.));
proc sort data = &DB1_DATA. out = DB1_REN nodupkey; by &BY.;run;
proc sort data = &DB2_DATA. out = DB2_REN nodupkey; by &BY.;run;
%do d = 1 %to 2;
%do VAR_N = 1 %to &VAR_NUM.;
%let VAR&VAR_N. = %scan(&ALL_VAR., &VAR_N.,%str(|));
/* %put &&VAR&VAR_N.. ;*/
/* %put &&VAR&VAR_N.._&&DATA&D..;*/
data DB&d._REN;
length &&VAR&VAR_N.._ck $30.;
set DB&d._REN(rename =(&&VAR&VAR_N.. =&&VAR&VAR_N.._DB&d.));
run;
%end;
%end;
%do VAR_N = 1 %to &VAR_NUM.;
%let VAR&VAR_N. = %scan(&ALL_VAR., &VAR_N.,%str(|));
data CK_&&VAR&VAR_N.. (keep =&BY. &&VAR&VAR_N.._ck);
merge DB1_REN
DB2_REN;
by &BY.;
if &&VAR&VAR_N.._DB1 = &&VAR&VAR_N.._DB2 then &&VAR&VAR_N.._ck = "OK";
else &&VAR&VAR_N.._ck = "NG";
if missing(&&VAR&VAR_N.._DB1) then &&VAR&VAR_N.._ck = "NOT FOUND &DB1_DATA.";
if missing(&&VAR&VAR_N.._DB2) then &&VAR&VAR_N.._ck = "NOT FOUND &DB2_DATA.";
if missing(&&VAR&VAR_N.._DB1) and missing(&&VAR&VAR_N.._DB2) then &&VAR&VAR_N.._ck = "NOT FOUND";
run;
%end;
data CK_VALUE_ALL;
merge DB1_REN
DB2_REN;
by &BY.;
run;
%do VAR_N = 1 %to &VAR_NUM.;
%let VAR&VAR_N. = %scan(&ALL_VAR., &VAR_N.,%str(|));
data CK_VALUE_ALL;
merge CK_VALUE_ALL
CK_&&VAR&VAR_N..;
by &BY.;
run;
proc freq data = CK_VALUE_ALL;
title "檢核觀測值: &&VAR&VAR_N..";
table &&VAR&VAR_N.._ck;
run;title;
proc print data = Ck_value_all;
where &&VAR&VAR_N.._ck = "NG";
run;
%end;
%mend;
%CK_VALUE(DB1_DATA=amlland.Account_trade_extract,
DB2_DATA=keepok.account_trade_extract,
BY=TRANSACTION_REFERENCE_NUMBER,
ALL_VAR=PRINCIPAL_AMOUNT_IN_BASE_CCY | ACCOUNT_NUMBER,
VAR_NUM=2)
0 意見:
張貼留言