####################################### #MATH 5301 Final Exam Review Solutions# ####################################### ################################################################################################################## #Importing the data sets residual.data=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/residual-data.txt")) waiting.times=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/waiting-times.txt")) experimental.data=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/experimental-data1.txt")) time.series=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/time-series1.txt")) sales=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/sales1.txt")) #################################################################### #Problem 1 e=residual.data$e ######### #Part (a) hist(e) #Yes, they appear to be normal ######### #Part (b) qqnorm(e) #The above qq-plot function is specifically built to assess #normality. The straight, linear plot indicates normality. ######### #Part (c) #The highest power test of normality is the Shapiro-Wilk test. shapiro.test(e) ################################################################### #Problem 2 w=waiting.times$w #We will use the Kolmogorov-Smirnov test to test whether the #waiting times are exponentially distributed. #First, we need to build an exponential cdf function with #rate parameter lambda = 56.3 exp.cdf=function(x){ return(pexp(x,rate=56.3)) } #KS-test ks.test(w,exp.cdf) ################################################################### #Problem 3 x1=experimental.data$x1 x2=experimental.data$x2 ######### #Part (a) #The Mann-Whitney-Wilcoxon test will usually fail to detect a #difference between two samples if they have the same mean/median. #Therefore, the Kolmogorov-Smirnov test would be a better choice #in this situation. ######### #Part (b) mean(x1) mean(x2) sd(x1) sd(x2) ######### #Part (c) hist(x1) hist(x2) #No, they certainly do not appear to be identically distributed. #x1 appears to be approximately normal, and x2 appears to be #uniformly distributed. ######### #Part (d) plot(sort(x1),sort(x2)) #The curvature in this qq-plot indicates that the variables are #not identically distributed. #By contrast, consider some hypothetical examples: u1=rnorm(1000) u2=rnorm(1000) plot(sort(u1),sort(u2)) u1=rexp(1000) u2=rexp(1000) plot(sort(u1),sort(u2)) u1=rt(1000,df=17) u2=rt(1000,df=17) plot(sort(u1),sort(u2)) #The above qq-plots are linear, because in each of those examples, #the variables u1 and u2 are identically distributed. ######### #Part (e) wilcox.test(x1,x2,paired=FALSE) ######### #Part (f) ks.test(x1,x2) ######### #Part (g) #The results of parts (b) through (f) confirm our statements #from part (a). ################################################################## #Problem 4 x=time.series$x y=time.series$y library(forecast) ######### #Part (a) acf(x) #The acf function cuts off after a time lag of tau = 2, indicating #a MA(2) model. ######### #Part (b) acf(y) #The acf function gradually attenuates, so an AR model is a #better choice. ################################################################# #Problem 7 library(freqdom) x=sales$x #Spectral density function freq.range=seq(from=0,to=pi,length=1000) x.spec=spectral.density(x,freq=freq.range) plot(x.spec$freq,x.spec$operators,type="l", xlab="Frequency", ylab="Spectral Density") M=max(abs(x.spec$operators)) lines(c(2*pi/365,2*pi/365),c(0,M),col="blue") lines(c(2*pi/30,2*pi/30),c(0,M),col="blue") lines(c(2*pi/7,2*pi/7),c(0,M),col="blue") #Weekly variation contributes the most to the variance in the #time series, followed by annual variation. Based on the spectral #density, there is no indication of monthly variation. #The ranked order is: #Highest: Weekly #Mid: Annual #Lowest: Monthly #Time Series Plots plot(1:length(x),x,type="l", xlab="t (in days)", ylab="x") t=1:365 plot(t,x[t],type="l", xlab="t (in days)", ylab="x")