################################### #LINEAR REGRESSION IN R # #DIAGNOSTICS AND REMEDIAL MEASURES# ################################### ########################################### #Consequences of Incorrect Functional Form# ########################################### #Creating data set.seed(5366) x=1:100 X=cbind(1,x,x^2) beta=c(10,-1.15,0.0205) y=X%*%beta+5*rnorm(100) mydata=data.frame(y,x) #Plot plot(y~x,data=mydata) #Fitting Quadratic Model model2=lm(y~x+I(x^2),data=mydata) summary(model2) plot(y~x,data=mydata) x.plot.vector=1:100 y.plot.vector=predict(model2, newdata=data.frame(x=x.plot.vector)) lines(x.plot.vector, y.plot.vector, col="red") #Residuals for Quadratic Model betahat2=coef(model2) X2=model.matrix(model2) e2=y-X2%*%betahat2 e2 e2=residuals(model2) e2 plot(x,e2) lines(x,rep(0,length(x)),col="red") #Fitting a linear model plot(y~x,data=mydata) model1=lm(y~x,data=mydata) summary(model1) abline(model1,col="red") #Residuals for Linear Model e1=residuals(model1) plot(x,e1) lines(x,rep(0,length(x)),col="red") #f-test Function ftest=function(model0,model){ d0=length(coef(model0)) d=length(coef(model)) n=length(residuals(model)) F=((deviance(model0)-deviance(model))/(d-d0))/ ((deviance(model))/(n-d)) return(pf(F,d-d0,n-d,lower.tail=FALSE)) } ftest(model1,model2) summary(model2) #Plots involving Yhat for Quadratic Model X2 = model.matrix(model2) betahat2=coef(model2) yhat2=X2%*%betahat2 plot(yhat2,y) lines(1:100,1:100,col="red") plot(yhat2,e2) lines(1:100,rep(0,100),col="red") #Plots involving Yhat for Linear Model X1 = model.matrix(model1) betahat1=coef(model1) yhat1=X1%*%betahat1 plot(yhat1,y) lines(1:100,1:100,col="red") plot(yhat1,e1) lines(1:100,rep(0,100),col="red") ########################### #Design Matrix Assumptions# ########################### #Rank of Design Matrix for Model2 install.packages("Matrix") library(Matrix) head(X2) dim(X2) rankMatrix(X2) #Model without Full Rank z=x model3=lm(y~x+I(x^2)+z) X3=model.matrix(model3) head(X3) dim(X3) rankMatrix(X3) summary(model3) #Multicollinearity w=x+rnorm(100) plot(x,w) cor(x,w) model4=lm(y~x+I(x^2)+w) summary(model4) ########################## #NORMALITY OF ERROR TERMS# ########################## #Normal Data set.seed(1234) e=rnorm(1000) hist(e) qqnorm(e) shapiro.test(e) #Uniform Data e=runif(1000) hist(e) qqnorm(e) shapiro.test(e) #Model with Nonnormal Errors x=1:100 y=(200+3*x+20*rnorm(100))^3.56 plot(x,y) model=lm(y~x) abline(model,col="red") e=residuals(model) qqnorm(e) shapiro.test(e) #Achieving Normality with Box-Cox Transformations library(MASS) boxcox.results=boxcox(model) cbind(boxcox.results$x,boxcox.results$y) which.max(boxcox.results$y) lambda=boxcox.results$x[which.max(boxcox.results$y)] y.tilde=y^lambda trans.model=lm(y.tilde~x) plot(x,y.tilde) abline(trans.model,col="red") e.tilde=residuals(trans.model) qqnorm(e.tilde) shapiro.test(e.tilde) #Using Transformed Model to Make Predictions new.data=data.frame(x=runif(10,min=0,max=100)) y.tilde.hat=predict(trans.model,newdata=new.data) plot(new.data$x,y.tilde.hat) abline(trans.model,col="red") y.hat=y.tilde.hat^(1/lambda) plot(x,y) points(new.data$x,y.hat,col="red",pch=20) ################################################ #Constancy of Error Variance (Homoscedasticity)# ################################################ install.packages("lawstat") library("lawstat") plot(x,abs(e)) levene.test(e,as.factor(x<=median(x))) plot(x,abs(e.tilde)) levene.test(e.tilde,as.factor(x<=median(x))) ############################ #A Permutation Test Example# ############################ #Creating Data set.seed(403) x1=runif(100,min=0,max=100) x2=runif(100,min=0,max=100) epsilon=runif(100,min=-50,max=50) y=200+3*x1+5*x2+epsilon mydata=data.frame(y,x1,x2) plot(mydata) #Fitting a model and examining residuals model=lm(y~x1+x2) summary(model) e=residuals(model) hist(e) qqnorm(e) shapiro.test(e) #Model is y = beta0 + beta1*x1 + beta2*x2 #Permutation Test for H0: beta2=0 N=1000 beta2.star.vect = 1:N for(i in 1:N){ temp.x2 = sample(x2, size=length(x2), replace=FALSE) temp.model=lm(y~x1+temp.x2) temp.betahat=coef(temp.model) beta2.star.vect[i]=temp.betahat[3] } hist(beta2.star.vect) #All beta2 values from permutation test are stored in this vector betahat=coef(model) beta2hat=betahat[3] beta2hat #Actual value of beta2hat that we observed #How do they compare? range(beta2.star.vect) perm.pvalue=mean(abs(beta2hat)<=abs(beta2.star.vect)) perm.pvalue