Box Plot (Turkey) Method of Identifying Outliers
If a value is higher than the 1.5*IQR above the upper quartile (Q3), the value will be considered as outlier. Similarly, if a value is lower than the 1.5*IQR below the lower quartile (Q1), the value will be considered as outlier.
IQR is interquartile range. It measures dispersion or variation. IQR = Q3 -Q1.If distribution of a variable is skewed, cutoff of 1.5 times IQR might lead to a very high number of values to be assigned outliers. In this case, you can use cutoff of 3.
Imputing Values (Capping / Flooring)
You can replace all the values that exist outside the following limit with the limit value.
Lower limit of acceptable range = Q1 - 3* (Q3-Q1)
Upper limit of acceptable range = Q3 + 3* (Q3-Q1)
SAS Macro for Outlier Capping / Flooring
%macro outliers(input=, vars=, output= );
%let Q1=;
%let Q3=;
%let varL=;
%let varH=;
%let n=%sysfunc(countw(&vars));
%do i= 1 %to &n;
%let val = %scan(&vars,&i);
%let Q1 = &Q1 &val._P25;
%let Q3 = &Q3 &val._P75;
%let varL = &varL &val.L;
%let varH = &varH &val.H;
%end;
/* Calculate the quartiles and inter-quartile range using proc univariate */
proc means data=&input nway noprint;
var &vars;
output out=temp P25= P75= / autoname;
run;
/* Extract the upper and lower limits into macro variables */
data temp;
set temp;
ID = 1;
array varb(&n) &Q1;
array varc(&n) &Q3;
array lower(&n) &varL;
array upper(&n) &varH;
do i = 1 to dim(varb);
lower(i) = varb(i) - 3 * (varc(i) - varb(i));
upper(i) = varc(i) + 3 * (varc(i) - varb(i));
end;
drop i _type_ _freq_;
run;
data temp1;
set &input;
ID = 1;
run;
data &output;
merge temp1 temp;
by ID;
array vars(&n) &vars;
array lower(&n) &varL;
array upper(&n) &varH;
do i = 1 to dim(vars);
if not missing(vars(i)) then do;
if vars(i) < lower(i) then vars(i) = lower(i);
if vars(i) > upper(i) then vars(i) = upper(i);
end;
end;
drop &Q1 &Q3 &varL &varH ID i;
run;
%mend;
%outliers(input=abcd, vars = a b c d, output= test);
So helpful! Thanks for the SAS code!
ReplyDeletewhat do I do with this macro? where do I input things?
ReplyDelete