######################### #Nonparametric Statistics # #The Quade Test ######################################################################## #Lotion Example y = matrix(c( 5, 4, 7, 10, 12, 1, 3, 1, 0, 2, 16, 12, 22, 22, 35, 5, 4, 3, 5, 4, 10, 9, 7, 13, 10, 19, 18, 28, 37, 58, 10, 7, 6, 8, 7), nrow = 7, byrow = TRUE, dimnames = list(Store = as.character(1:7), Brand = LETTERS[1:5])) quade.test(y) #Post-hoc Pairwise Tests library(PMCMRplus) posthoc.quade.test(y, dist="TDist", p.adj="none") ################################################################### #Machine Learning Example library(MASS) View(Cars93) #Removing Data Leaks cbind(colnames(Cars93)) model.data=Cars93[,-c(1,2,27)] #Removing Missing Data apply(is.na(model.data),1,sum) na.index=(apply(is.na(model.data),1,sum)>0) table(na.index) model.data=model.data[!na.index,] #Cylinders Variable table(model.data$Cylinders) is.factor(model.data$Cylinders) model.data$Cylinders=as.numeric(model.data$Cylinders) table(model.data$Cylinders) #Folds for CV createfolds=function(n,K){ reps=ceiling(n/K) folds=sample(rep(1:K,reps)) return(folds[1:n]) } set.seed(5301) folds=createfolds(nrow(model.data),nrow(model.data)) table(folds) ########################################################## #Predictions for Random Forest, SVM, and Linear Regression #Loading libraries library(randomForest) library(e1071) #Setting up prediction vectors n = nrow(model.data) pred.rf=rep(0,n) pred.svm=rep(0,n) pred.lm=rep(0,n) #CV K=nrow(model.data) for(i in 1:K){ train=(folds!=i) test=(folds==i) rf.model=randomForest(MPG.city~.,data=model.data[train,]) svm.model=svm(MPG.city~.,data=model.data[train,]) linear.model=lm(MPG.city~.,data=model.data[train,]) pred.rf[test]=predict(rf.model,newdata=model.data[test,]) pred.svm[test]=predict(svm.model,newdata=model.data[test,]) pred.lm[test]=predict(linear.model,newdata=model.data[test,]) } #Performance (R^2) all.metrics=function(y,pred){ n=length(y) sigma.y=sd(y) e=y-pred rmse=sqrt(mean(e^2)) nrmse=rmse/sigma.y R2=1-n/(n-1)*nrmse^2 mae=mean(abs(e)) return(list(rmse=rmse,nrmse=nrmse,R2=R2,mae=mae)) } all.metrics(model.data$MPG.city,pred.rf)$R2 plot(pred.rf,model.data$MPG.city) lines(c(0,50),c(0,50),col="blue") all.metrics(model.data$MPG.city,pred.svm)$R2 plot(pred.svm,model.data$MPG.city) lines(c(0,50),c(0,50),col="blue") all.metrics(model.data$MPG.city,pred.lm)$R2 plot(pred.lm,model.data$MPG.city) lines(c(0,50),c(0,50),col="blue") #Residual Vectors e.rf=(model.data$MPG.city-pred.rf) e.svm=(model.data$MPG.city-pred.svm) e.lm=(model.data$MPG.city-pred.lm) #Quade Test x=abs(cbind(e.rf,e.svm,e.lm)) quade.test(x) #Post-hoc Tests posthoc.quade.test(x, dist="TDist", p.adj="none") #Removing MPG.highway model.data=model.data[,-6] #Removing MPG.city and Predicting MPG.highway Instead model.data[,5]=model.data[,6] model.data=model.data[,-6]