*Data Mining SAS Notes * *PROC IML and Macros *PROC IML (Interactive Matrix Language); proc iml; *Defining two matrices; A={1 2, 3 4}; B={5 6, 7 8}; C=A+B; *Sum; D=A*B; *Product; E=inv(A); *Inverse; F=A`; *Transpose; print A B C D E F; *Printing the matrices; run; quit; *Using the repeat statement; proc iml; A=repeat(0,4,3); x=repeat(1,10,1); print A x; run; quit; *Editing components of a matrix/vector; proc iml; A=repeat(0,4,3); x=repeat(1,10,1); A[1,2]=7; x[3]=7; print A x; run; quit; *MATH 5305 Lab 2 with PROC IML; *Importing and printing data; data math5305lab2; infile 'C:\Users\jcrawford\Desktop\math5305Lab2Data.txt' dlm=','; input Y X1 X2 X3; proc print data=math5305lab2; run; proc iml; use math5305lab2; *Imports math5305lab2 data set into this procedure; read all; X0=repeat(1,100,1); *Creates a vector of ones to serve as an intercept column; X=X0 || X1 || X2 || X3; *Combines vectors into a design matrix; betahat=solve(X`*X,X`*Y); *Calculates OLS estimator betahat; Yhat=X*betahat; *Calculates fitted values Yhat; e=Y-Yhat; *Calculates residuals e; print betahat; *Prints betahat; call pgraf(Yhat || Y); *Scatterplot of Y vs. Yhat; call pgraf(Yhat || e); *Scatterplot of e vs. Yhat; run; quit; *Writing to a data set with proc iml; proc iml; x={1,2,3}; y={51,52,53}; create mydata1; append var {x,y}; close mydata1; run; quit; proc print data=mydata1; run; *Appending data to an existing data set with PROC IML; proc iml; x={4,5,6}; y={54,55,56}; edit mydata1; append var {x,y}; close mydata1; run; quit; proc print data=mydata1; run; *Appending data sets with PROC APPEND; *Data set 1; proc iml; x={1,2,3}; y={51,52,53}; create mydata1; append var {x,y}; close mydata1; run; quit; proc print data=mydata1; run; *Data set 2; proc iml; x={4,5,6}; y={54,55,56}; create mydata2; append var {x,y}; close mydata2; run; quit; proc print data=mydata2; run; *Appending mydata2 to mydata1; proc append base=mydata1 data=mydata2; run; proc print data=mydata1; run; *PROC IML Functions; *Creating a simple function to add two numbers; proc iml; start mysum(x,y); return(x+y); finish; s=mysum(5,7); print s; s2=mysum({1,2,3,4},{5,6,7,8}); print s2; run; quit; *Storing the first n Fibonacci numbers in a data set; proc iml; start fib(n); if (n=1 | n=2) then return(1); fibvector=repeat(1,n,1); do i=3 to n; fibvector[i]=fibvector[i-1]+fibvector[i-2]; end; return(fibvector); finish; f=fib(30); print(f); create fibonacci; append; close fibonacci; run; quit; proc print data=fibonacci; run; *Sequences and random vectors; proc IML; x=(1:20)`; u=uniform(repeat(0,20,1)); print x u; run; quit; *Creating a vector y; proc IML; x=(1:20)`; u=uniform(repeat(0,20,1)); y=repeat(0,20,1); do i = 1 to 10; if u[i]<0.2 then y[i]=1; end; do i=11 to 20; if u[i]>0.2 then y[i]=1; end; print x y; run; quit; *The Loc command; proc IML; x=(1:20)`; u=uniform(repeat(0,20,1)); y=repeat(0,20,1); do i = 1 to 10; if u[i]<0.2 then y[i]=1; end; do i=11 to 20; if u[i]>0.2 then y[i]=1; end; y1=y[loc(x<=10)]; y2=y[loc(x>10)]; mu1=mean(y1); mu2=mean(y2); print x y y1 y2 mu1 mu2; run; quit; ******** *MACROS* ********; *Simple macro example; %macro mysum(x=,y=); mysum_output=&x+&y; %mend; data testing_mysum; %mysum(x=5,y=7); proc print data=testing_mysum; run; *Macros are just text substitution; data testing_mysum; mysum_output=5+7; proc print data=testing_mysum; run;