######################### #CHAPTER 8 EXAMPLES IN R# ######################### ######### #k-means# ######### #Generating some data set.seed(5364) x1=rnorm(100,5,1) x2=rnorm(100,15,1) x3=rnorm(100,10,1) y1=rnorm(100,10,1) y2=rnorm(100,10,1) y3=rnorm(100,20,1) mydata=data.frame(x=c(x1,x2,x3),y=c(y1,y2,y3)) plot(y~x,data=mydata,asp=1) #Finding clusters with kmeans kmeans.output=kmeans(mydata,centers=3) clusters=kmeans.output$cluster colvect=c("red","green","blue") plot(y~x,data=mydata,col=colvect[clusters],asp=1) #Centers of clusters centers=kmeans.output$centers plot(y~x,data=mydata,col=colvect[clusters],asp=1) points(centers,col='black',pch=24,bg='black') #Sums of squares for kmeans kmeans.output$totss kmeans.output$withinss kmeans.output$tot.withinss kmeans.output$betweenss kmeans.output$tot.withinss+kmeans.output$betweenss #Bad choice of initial centers centers0=cbind(c(9,10,11),c(20,10,20)) kout=kmeans(mydata,centers=centers0) plot(y~x,data=mydata,col=colvect[kout$cluster],asp=1) points(centers0,col='black',pch=24,bg='black') kout$tot.withinss #Repeating k-means a large number of random times repeat.kmeans=function(data,centers,repetitions){ best.kmeans=NULL best.ssw=Inf for(i in 1:repetitions){ kmeans.temp=kmeans(x=data,centers=centers) if(kmeans.temp$tot.withinss