########################### #NONLINEAR REGRESSION IN R# ########################### ########################## #LOESS (Local Regression)# ########################## #Creating a data set set.seed(5366) x=runif(1000,0,2*pi) y=sin(x)+0.1*rnorm(1000) mydata=data.frame(x,y) plot(y~x,data=mydata) trainsample=function(dataset,trainprop){ return(sample(nrow(dataset), round(nrow(dataset)*trainprop,0))) } train=trainsample(mydata,0.7) #Loess model=loess(y~x,data=mydata[train,], control=loess.control(surface="direct")) plot(model) plot(model$x,model$y) plot(model$x,model$fitted) lines(sort(model$x),sin(sort(model$x)),col='red') #Predicting values on test set yhat=predict(model,newdata=mydata[-train,]) plot(mydata$y[-train],yhat) cor(mydata$y[-train],yhat) #Effect of Span/Bandwidth #span = 0.75 (default) model=loess(y~x,data=mydata[train,], control=loess.control(surface="direct"), span=0.75) plot(model$x,model$fitted) temp=function(s){ model=loess(y~x,data=mydata[train,], control=loess.control(surface="direct"), span=s) plot(model$x,model$fitted) } temp(0.75) temp(0.01) temp(0.05) temp(0.1) temp(0.2) temp(0.3) temp(0.4) temp(0.5) temp(0.6) temp(0.7) temp(0.75) temp(0.8) temp(0.9) temp(1) temp(1.1) temp(1.2) temp(1.3) temp(1.4) temp(1.5) temp(2.0) temp(2.5) temp(3.0) temp(10) temp(100) temp(1000) plot(x,y) l.model=lm(y~x,data=mydata[train,]) plot(mydata[train,]) abline(l.model,col='red') ############################## #Weighted k-Nearest Neighbors# ############################## library(kknn) fit.sine = train.kknn(y~x,data=mydata) yhat=(kknn(y~x,train=mydata[train,],test=mydata[-train,], k=11,kernel="optimal",distance=2))$fitted.values plot(mydata$y[-train],yhat) cor(mydata$y[-train],yhat) plot(mydata$x[-train],yhat) lines(sort(mydata$x[-train]), sin(sort(mydata$x[-train])), col="red") ########################### #Support Vector Regression# ########################### library(e1071) model=svm(y~x,data=mydata[train,]) yhat=predict(model,newdata=mydata[-train,]) plot(mydata$y[-train],yhat) cor(mydata$y[-train],yhat) plot(mydata$x[-train],yhat) lines(sort(mydata$x[-train]), sin(sort(mydata$x[-train])), col="red") ################# #Neural Networks# ################# library(nnet) model=nnet(y~x,data=mydata[train,],size=10,linout=TRUE) yhat=predict(model,newdata=mydata[-train,]) plot(mydata$y[-train],yhat) cor(mydata$y[-train],yhat) plot(mydata$x[-train],yhat) lines(sort(mydata$x[-train]), sin(sort(mydata$x[-train])), col="red") ############### #Random Forest# ############### library(randomForest) model=randomForest(y~x,data=mydata[train,]) yhat=predict(model,newdata=mydata[-train,]) plot(mydata$y[-train],yhat) cor(mydata$y[-train],yhat) plot(mydata$x[-train],yhat) lines(sort(mydata$x[-train]), sin(sort(mydata$x[-train])), col="red")