########################### #MATH 5305 F-Test Examples# ########################### f.test=function(model0,model){ e02=sum((model0$residuals)^2) #Residual sum of squares for small model. e2=sum((model$residuals)^2) #Residual sum of squares for big model. df0=model0$df.residual #Residual degrees of freedom for small model. df=model$df.residual #Residual degrees of freedom for big model. f.stat=((e02-e2)/(df0-df))/(e2/df) p.value=pf(f.stat, df1=df0-df, df2=df, lower.tail=FALSE) return(list(f.stat=f.stat,p.value=p.value)) } #Importing variables in iris data set #into our workspace. attach(iris) #Fitting a model for predicting petal length #based on the other three quantitative variables. model=lm(Petal.Length~Petal.Width+Sepal.Length+Sepal.Width) summary(model) #Fitting an intercept only model. int.model=lm(Petal.Length~1) summary(int.model) #Testing int.model vs model with an F-test. #Note this returns the same F-statistic that #is listed in the model summary. f.test(int.model,model) f.test(int.model,model)$f.stat f.test(int.model,model)$p.value #Based on this p-value, we can reject the null hypothesis #that the intercept-only model is adequate (p-value = 6.98e-109). ############################################# #Are Sepals Good Predictors of Petal Length?# ############################################# #To test this, consider the null hypothesis that the #coefficients on Sepal.Length and Sepal.Width are zero, resulting #in a model with just an intercept and Petal.Width model0=lm(Petal.Length~Petal.Width) summary(model0) #F-test f.test(model0,model) #Again, we can reject the null hypothesis that sepals are not #statistically significant predictors of petal length (p-value = 7.75e-27). ############################### #A Model Based on Species Only# ############################### #Boxplot Stratified by Species plot(Species,Petal.Length) species.model=lm(Petal.Length~Species) summary(species.model) #Interpretation. #R selected setosa as the reference level, since #it is the first level in the categorical variable Species. levels(Species) #The average petal length for setosa flowers is 1.46 #The average petal length for versicolor flowers exceeds #that of setosa flowers by 2.798. #The average petal length for virginica flowers exceeds #that of setosa flowers by 4.09. #################################### #A Model Using all of the Variables# #################################### full.model=lm(Petal.Length~.,data=iris) summary(full.model) #Model with only quantitative predictors. model=lm(Petal.Length~Petal.Width+Sepal.Length+Sepal.Width) summary(model) #Is species a statistically significant predictor? f.test(model,full.model) #Yes! ######################################## #Do Versicolor and Virginica Flowers # #Have the Same Petal Length on Average?# ######################################## Species table(Species) Species.recode=c("setosa","versicolor/virginica","versicolor/virginica") bin.Species=as.factor(Species.recode[Species]) bin.Species table(bin.Species) head(iris) bin.iris=cbind(iris[,1:4],bin.Species) bin.iris #Building a Model Based on bin.iris bin.model=lm(Petal.Length~.,data=bin.iris) summary(bin.model) #Comparing with an F-test f.test(bin.model,full.model) #Therefore, we can reject the null hypothesis #that versicolor and virginica flowers have the #same petal lengths on average.