#This is a brief introduction to the statistical programming package R,
#presented mostly through examples.


#DOWNLOADING R AND R STUDIO

#R can be downloaded for free at www.r-project.org/.

#R studio is a nice "front end" for R and can be downloaded for free at
#http://rstudio.org/.


#SCRIPTS AND THE CONSOLE

#The two windows you will work with most in R are the script window, where
#programs are written/edited, and the console window, where commands are
#executed.

#This document is a script, a text file for keeping R code neat and 
#organized.

#Highlight the command below and press "ctrl + r".

2+2

#R copied this command into the console window and executed it.

#You can also type commands directly into the console.  Try typing

x=7
x
y=5
y
x+y

#Note that the variables x and y now appear in the workspace window to the
#right.  You can also see a list of variables in your workspace using the
#command 

ls()

#Scripts can be saved as files with the extension .R.  In the workspace
#window, you can save all the data in your workspace as a .RData file.

#The Clear All command in the Workspace window deletes all objects from your
#workspace.  You can delete the variable x with this command

remove(x)

#Go to "Edit", "Clear Console" (or "ctrl + l") to clear all the text from 
#the console.  This doesn't actually delete anything -- it just clears the
#text from the window.

#The comment character in R is "#".  Lines of code starting with this
#character are ignored by R.  This allows you to write comments in your
#scripts without generating errors when the entire script is executed.


##################
#BASIC ARITHMETIC#
##################

2+2
8-5
4*7
7/3
2^3
sqrt(16)
log(2.718)           #Natural log    
log(1000, base=10)   #Log base 10

#The "=" replaces the variable on the left with what's on the right.
x=5
x

x=x+20
x

#########
#VECTORS#
#########

x=c(7,9,3,-8,5)
x
x[1]
x[4]

length(x)
sum(x)

#By default, operations on vectors are performed elementwise.
x=1:10
x

x+3
x-4
x^2
1/x

x=rep(15,4)
x

x=seq(from=10,to=40,by=5)
x

x=1:5
y=6:10


x+y
x-y
x*y
x/y
x^y

z=c(x,y)
z



#######
#PLOTS#
#######

#Parabola
x=-5:5
y=x^2
plot(x,y)
plot(x,y,type="l")

plot(x,y)
lines(x,y,col="red")

#sin
x=seq(from=0,to=2*pi,by=.01)
x
y=sin(x)
plot(x,y,type="l")
plot(x,y,type="l",ylim=c(-.5,.5))    #ylim controls the window for y



############
#STATISTICS#
############

x=1:10

mean(x)
sd(x)                            #Standard Deviation
summary(x)

x=rnorm(1000,mean=500,sd=100)    #Generating normal random variables.

length(x)
sum(x)
mean(x)
sd(x)
summary(x)

hist(x)                          #Frequency histogram
hist(x,breaks=100)

hist(x,breaks=100,freq=FALSE)    #Relative frequency histogram
index=200:800
lines(index,1/(sqrt(2*pi)*100)*exp(-(index-500)^2/(2*100^2)),col="red")

plot(density(x))
lines(index,1/(sqrt(2*pi)*100)*exp(-(index-500)^2/(2*100^2)),col="red")


x=runif(1000,min=70,max=90)      #Generating uniform random variables.
hist(x)

U=runif(1000)
x=-20*log(1-U)
hist(x,breaks=50,freq=FALSE)
lines(1:150,1/20*exp(-(1:150)/20),col="red")



##########
#MATRICES#
##########


#Matrix multiplication in R is given by %*%.

x%*%y

#Vectors in R do not have a strict identity as a row-vector or
#column-vector.  R will interpret them to be whatever makes the expression
#sensible.  In the above, R assumes that x%*%y makes sense, even though it
#wouldn't if they were both row-vectors or both column-vectors.


#HELP

help(rep)

#The args command tells you what the arguments (input parameters) are for
#a fucntion, which can be helpful.

args(rep)

#Google.  In my experience, using google to get help on something is often
#more effective than the help menu itself.


#MATRICES

#The command cbind takes vectors and binds them together to form a matrix,
#where those vectors are the columns.

x=1:3
y=4:6
z=7:9

A=cbind(x,y,z)
A

B=matrix(c(10,11,12,50,51,52),3,2)
B

B=matrix(c(10,11,12,50,51,52),3,2,byrow='true')
B

dim(B)      #Returns the dimensions of B.  It's a 3x2 matrix.
dim(B)[1]   #Number of rows in B.
dim(B)[2]   #Number of cols in B.

A=matrix(1:4,2,2,byrow='true')
B=matrix(5:8,2,2,byrow='true')
A
B
A+B


A*B         #Elementwise multiplication
A%*%B       #Matrix multiplication
A^2         #Square each element
A+7         #Add 7 to each element of A
A%*%A       #Square A using matrix multiplication
t(A)        #Transpose of A
diag(A)     #Returns the diagonal of A as a vector.
diag(3)     #Creates the 3x3 identity matrix.
help(diag)  #Other uses for diag

A^(-1)      #Find the reciprocal of each entry in A.
solve(A)    #Matrix inverse of A

solve(A)%*%A  #Gives the identity matrix as expected, with some
#round off error, due to the numberical instability
#of finding matrix inverses.


#More on Solve.  Why is it called that?

A=matrix(1:4,2,2,byrow='true')
C=matrix(20:23,2,2,byrow='true')
A
C

#Suppose we want to solve the equation AB=C for B.
#The solution is B=inverse(A)C, and can be found with Gaussian elimination.

#Try this command.

B=solve(A,C)
B

A%*%B

#The result is equal to C, as expected.

#Now, inverse of A is the solution to the equation AB=I, where I is the
#2x2 identity matrix.

I=diag(2)

B=solve(A,I)
B

A%*%B       #Returns I as expected, with round-off error.

B=solve(A)  #We can leave out the second argument, and the default is the
B           #identity matrix.


#MATRIX ENTRIES

x=(1:5)^2
x

x[3]
x[c(1,2,5)]
x[2:4]

A=matrix(1:15,3,5,byrow='true')
A

A[3,4]
A[1,]
A[,4]
A[c(1,3),c(1,2,4)]


#LOGICAL OPERATIONS

3==7
3!=7
3<7
3>7
3<=7
3>=7

x=1:10
x==7
x<=7

x[x<=7]

#Some more logical operations:
#"&" is AND
#"|" is OR
#"!" is NOT

TRUE & FALSE
1 & 0

TRUE | FALSE

!TRUE

P=c(TRUE,TRUE,FALSE,FALSE)
Q=c(TRUE,FALSE,TRUE,FALSE)

cbind(P,Q,P&Q)

cbind(P,Q,P|Q)

cbind(P,!P)


#SOME PROGRAMMING TOOLS

if(5>1){print('Yes, it is.')}

if(5<1){print('Yes, it is.')}

if(5>1){print('Yes, it is.')}else{
  print('No, it is not')}

if(5<1){print('Yes, it is.')}else{
  print('No, it is not')}


x=rep(1,10)
x

for(i in 1:10){
  x[i]=i^2
}

x


#Functions

mysum=function(x,y){
  x+y
}

mysum(5,3)


#The next function uses an R object called a list.  Here's an example to 
#show how lists work.

L=list(x=5,y=1:4,z=diag(7))

L$x
L$y
L$z


myops=function(x,y){
  mysum=x+y
  mydiff=x-y
  myprod=x*y
  myquot=x/y
  list(sum=mysum,diff=mydiff,prod=myprod,quot=myquot)
}

myops(5,3)$sum
myops(5,3)$diff
myops(5,3)$prod
myops(5,3)$quot


#STATISTICAL FUNCTIONS

#R is equipped with many statistical functions.

x=rnorm(1000,100,15)  #Generates a sample of size 1000 from a normal
x                     #distribution with mu=100 and sigma=15

mean(x)   #Computes the sample mean.
sd(x)     #Computes the sample st dev.

sum(x)    #Computes the sum of the elements of x.
hist(x)   #Plots a histogram for x.



u=runif(1000,10,20)  #Generates a sample of size 1000 from a uniform
#Distribution on the interval [10,20].

min(u)    #Finds the smallest component of u.
max(u)    #Finds the largest component of u.

sortedu=sort(u)
sortedu
sortedu[1]
sortedu[1000]

x=rnorm(1000,100,15)       #Random x values.
epsilon=rnorm(1000,0,5)    #Random "errors".
y=x+epsilon                #Random y values dependent on x.

plot(x,y)                  #Scatterplot of (x,y) pairs.


plot(-10:10,(-10:10)^2)    #Plot of the parabola y=x^2.
plot(-10:10,(-10:10)^2,type='l')


plot(x,y)
lines(50:150,50:150,col='red')   #Adds the line y=x to the plot.