########################## #LOGISTIC REGRESSION IN R# ########################## ################ #Simple Example# ################ #Generating data X=runif(100000,0,100) g=-3+.06 * X Pi=(1/(1+exp(-g))) U=runif(100000) Y=(U.5)&(U[i]<.8)){Mother[i]="Bachelors"} if((U[i]>.8)&(U[i]<.95)){Mother[i]="HighSchool"} if(U[i]>0.95){Mother[i]="GradDegree"} } Mother=as.factor(Mother) Mother=relevel(Mother,4) tempmodel=lm(1:200~Rank+SAT+Mother) X=model.matrix(tempmodel) beta=c(-9.3,10,.00364,1.5,1.5,0) g=X%*%beta Pi=1/(1+exp(-g)) U2=runif(200,0,1) Retention=(U2=0.5)*1 table(pred.retention) confmatrix(Retention[-train],pred.retention) #ROC Curve install.packages("pROC") library(pROC) plot(roc(Retention[-train],Pihat)) #Hosmer-Lemeshow Goodness-of-fit Test install.packages("MKmisc") library(MKmisc) model=glm(Retention~., data=Retention.data.2, family=binomial) Pihat=predict(model, type="response") HLgof.test(fit=Pihat,obs=Retention) #Visualizing HLgof.test hlgof=function(pihat,Y,ngr){ sortframe=sort(pihat,index=TRUE) pihat=sortframe$x Y=Y[sortframe$ix] s=floor(length(pihat)/ngr) #groupsize pipred=1:ngr piobs=1:ngr Chat=0 for(i in 1:ngr){ index=(((i-1)*s+1):(i*s)) if(i==ngr){index=(((i-1)*s+1):(length(pihat)))} pipred[i]=mean(pihat[index]) nk=length(index) ok=sum(Y[index]) piobs[i]=ok/nk Chat=Chat+(ok-nk*pipred[i])^2/(nk*pipred[i]*(1-pipred[i]))} pvalue=1-pchisq(Chat,ngr-2) return(list(pipred=pipred,piobs=piobs,Chat=Chat,pvalue=pvalue))} hlgof.results=hlgof(Pihat,Retention,10) #Comparing Pihat and Piobs plot(hlgof.results$pipred,hlgof.results$piobs, xlim=c(0,1), ylim=c(0,1)) lines((1:100)/100,(1:100)/100,col="red") piobs=hlgof.results$piobs pipred=hlgof.results$pipred cbind(piobs,pipred) cbind(piobs,pipred)*20 sum((20*piobs-20*pipred)^2/(20*pipred*(1-pipred)))