###########################
#MATH 5305 F-Test Examples#
###########################

f.test=function(model0,model){
  e02=sum((model0$residuals)^2)   #Residual sum of squares for small model.
  e2=sum((model$residuals)^2)     #Residual sum of squares for big model.
  df0=model0$df.residual          #Residual degrees of freedom for small model.
  df=model$df.residual            #Residual degrees of freedom for big model.
  
  f.stat=((e02-e2)/(df0-df))/(e2/df)
  
  p.value=pf(f.stat,
             df1=df0-df,
             df2=df,
             lower.tail=FALSE)
  
  return(list(f.stat=f.stat,p.value=p.value))
}



#Importing variables in iris data set
#into our workspace.
attach(iris)    

#Fitting a model for predicting petal length
#based on the other three quantitative variables.

model=lm(Petal.Length~Petal.Width+Sepal.Length+Sepal.Width)
summary(model)


#Fitting an intercept only model.

int.model=lm(Petal.Length~1)
summary(int.model)


#Testing int.model vs model with an F-test.
#Note this returns the same F-statistic that
#is listed in the model summary.

f.test(int.model,model)

f.test(int.model,model)$f.stat

f.test(int.model,model)$p.value

#Based on this p-value, we can reject the null hypothesis
#that the intercept-only model is adequate (p-value = 6.98e-109).


#############################################
#Are Sepals Good Predictors of Petal Length?#
#############################################

#To test this, consider the null hypothesis that the
#coefficients on Sepal.Length and Sepal.Width are zero, resulting
#in a model with just an intercept and Petal.Width

model0=lm(Petal.Length~Petal.Width)
summary(model0)

#F-test
f.test(model0,model)

#Again, we can reject the null hypothesis that sepals are not 
#statistically significant predictors of petal length (p-value = 7.75e-27).


###############################
#A Model Based on Species Only#
###############################

#Boxplot Stratified by Species
plot(Species,Petal.Length)

species.model=lm(Petal.Length~Species)
summary(species.model)

#Interpretation.

#R selected setosa as the reference level, since
#it is the first level in the categorical variable Species.

levels(Species)

#The average petal length for setosa flowers is 1.46

#The average petal length for versicolor flowers exceeds
#that of setosa flowers by 2.798.

#The average petal length for virginica flowers exceeds
#that of setosa flowers by 4.09.



####################################
#A Model Using all of the Variables#
####################################

full.model=lm(Petal.Length~.,data=iris)
summary(full.model)

#Model with only quantitative predictors.
model=lm(Petal.Length~Petal.Width+Sepal.Length+Sepal.Width)
summary(model)

#Is species a statistically significant predictor?

f.test(model,full.model)     #Yes!



########################################
#Do Versicolor and Virginica Flowers   #
#Have the Same Petal Length on Average?#
########################################

Species
table(Species)

Species.recode=c("setosa","versicolor/virginica","versicolor/virginica")
bin.Species=as.factor(Species.recode[Species])

bin.Species
table(bin.Species)

head(iris)

bin.iris=cbind(iris[,1:4],bin.Species)
bin.iris


#Building a Model Based on bin.iris

bin.model=lm(Petal.Length~.,data=bin.iris)
summary(bin.model)

#Comparing with an F-test
f.test(bin.model,full.model)

#Therefore, we can reject the null hypothesis
#that versicolor and virginica flowers have the 
#same petal lengths on average.