#######################################
#MATH 5301 Final Exam Review Solutions#
#######################################


##################################################################################################################
#Importing the data sets

residual.data=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/residual-data.txt"))
waiting.times=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/waiting-times.txt"))
experimental.data=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/experimental-data1.txt"))
time.series=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/time-series1.txt"))
sales=read.csv(url("https://faculty.tarleton.edu/crawford/documents/math5301/sales1.txt"))



####################################################################
#Problem 1

e=residual.data$e

#########
#Part (a)
hist(e)

#Yes, they appear to be normal


#########
#Part (b)

qqnorm(e)

#The above qq-plot function is specifically built to assess 
#normality.  The straight, linear plot indicates normality.


#########
#Part (c)

#The highest power test of normality is the Shapiro-Wilk test.

shapiro.test(e)



###################################################################
#Problem 2

w=waiting.times$w

#We will use the Kolmogorov-Smirnov test to test whether the
#waiting times are exponentially distributed.

#First, we need to build an exponential cdf function with 
#rate parameter lambda = 56.3

exp.cdf=function(x){
  return(pexp(x,rate=56.3))
}

#KS-test

ks.test(w,exp.cdf)



###################################################################
#Problem 3

x1=experimental.data$x1
x2=experimental.data$x2


#########
#Part (a)

#The Mann-Whitney-Wilcoxon test will usually fail to detect a 
#difference between two samples if they have the same mean/median.
#Therefore, the Kolmogorov-Smirnov test would be a better choice
#in this situation.


#########
#Part (b)

mean(x1)
mean(x2)
sd(x1)
sd(x2)


#########
#Part (c)

hist(x1)
hist(x2)

#No, they certainly do not appear to be identically distributed.
#x1 appears to be approximately normal, and x2 appears to be 
#uniformly distributed.


#########
#Part (d)

plot(sort(x1),sort(x2))

#The curvature in this qq-plot indicates that the variables are
#not identically distributed.

#By contrast, consider some hypothetical examples:

u1=rnorm(1000)
u2=rnorm(1000)
plot(sort(u1),sort(u2))

u1=rexp(1000)
u2=rexp(1000)
plot(sort(u1),sort(u2))

u1=rt(1000,df=17)
u2=rt(1000,df=17)
plot(sort(u1),sort(u2))

#The above qq-plots are linear, because in each of those examples,
#the variables u1 and u2 are identically distributed.


#########
#Part (e)

wilcox.test(x1,x2,paired=FALSE)


#########
#Part (f)

ks.test(x1,x2)


#########
#Part (g)

#The results of parts (b) through (f) confirm our statements
#from part (a).



##################################################################
#Problem 4

x=time.series$x
y=time.series$y

library(forecast)

#########
#Part (a)

acf(x)

#The acf function cuts off after a time lag of tau = 2, indicating
#a MA(2) model.


#########
#Part (b)

acf(y)

#The acf function gradually attenuates, so an AR model is a 
#better choice.



#################################################################
#Problem 7


library(freqdom)

x=sales$x


#Spectral density function
freq.range=seq(from=0,to=pi,length=1000)
x.spec=spectral.density(x,freq=freq.range)

plot(x.spec$freq,x.spec$operators,type="l",
     xlab="Frequency",
     ylab="Spectral Density")

M=max(abs(x.spec$operators))
lines(c(2*pi/365,2*pi/365),c(0,M),col="blue")
lines(c(2*pi/30,2*pi/30),c(0,M),col="blue")
lines(c(2*pi/7,2*pi/7),c(0,M),col="blue")


#Weekly variation contributes the most to the variance in the 
#time series, followed by annual variation.  Based on the spectral
#density, there is no indication of monthly variation.

#The ranked order is:

#Highest:  Weekly
#Mid:      Annual
#Lowest:   Monthly



#Time Series Plots

plot(1:length(x),x,type="l",
     xlab="t (in days)",
     ylab="x")


t=1:365

plot(t,x[t],type="l",
     xlab="t (in days)",
     ylab="x")