Chapter 18 Solutions to Exercises
18.0.2 Functions
Corresponding exercises: see Section 2.1.2
- Type
?round
orhelp(round)
into the console. Purpose: rounds the values in its first argument to the specified number of decimal places. Arguments: 1) x = number that should be rounded. 2) digits = how many decimal places (how many numbers after the comma should remain). Default: the second argument (digits) has default 0. - Searching in the help pages does not help much. Try googling something like “R binomial coefficient”.
choose(5,3)
18.0.4 R-Scripts
Corresponding exercises: see Section 3.0.1
#------------------------------- # Experimenting with logarithms #------------------------------ # the data my_data <- pi # different bases of the logarithm base_1 <- 10 base_2 <- exp(1) # print the logarithm of my_data for the different bases print(log(my_data, base=base_1)) print(log(my_data, base=base_2))
- Execute your script using the buttons “Run” and “Source”, or the corresponding shortcuts
Ctrl
+Enter
andCtrl
+Shift
+Enter
, respectively.
18.0.9 Factors
Corresponding exercises: see Section 4.4.1
months <- c("july", "december", "july", "july", "may", "december", "july", "september", "may", "october", "december", "december", "august", "may", "september", "december", "september") fMonths <- factor(months) print(fMonths) str(fMonths) # There are 6 levels: august december july may october september.
18.0.21 Plotting
Corresponding exercises: see Section 9.1.6
av.high <- c(32, 34, 34, 32, 32, 30, 32, 30, 30, 30, 32, 32) av.rain <- c(16, 31, 104, 131, 161, 155, 193, 225, 192, 199, 76, 27) par(las=1, mar=c(4,4,0.5,4)) # las changes the orientation of the labels # of the axis. The plot becomes more readable this way plot(1:12, av.high, ylim=c(0, max(av.high)), type='b', col='red', pch=19, xlab="Month", ylab="Average temperature high [°C]") scale <- max(av.rain) / max(av.high) lines(1:12, av.rain / scale, type='b', col='blue', pch=17) labels <- pretty(av.rain) axis(side=4, at=labels / scale, labels=labels) mtext("Average rainfall [mm]", side=4, line=3, las=3) legend('bottom', bty='n', col=c('red', 'blue'), lwd=1, pch=c(19, 17), legend=c("Average temperature", "Average rainfall"))
18.0.23 Probability distributions
Corresponding exercises: see Section 10.0.4
height <- seq(140, 200, length.out = 1000) females <- dnorm(height, 164.7, 7.1) males <- dnorm(height, 178.4, 7.6) plot(height, females, col = 'dodgerblue', type = "l", ylab = "density") lines(height, males, col = 'orange2') myHeight <- 175 abline(v = myHeight, col = 'dodgerblue', lty = 2) dnorm(myHeight, 164.7, 7.1) # for females
height <- seq(140, 200, length.out = 1000) females <- pnorm(height, 164.7, 7.1) males <- pnorm(height, 178.4, 7.6) plot(height, females, col = 'dodgerblue', type = "l", ylab = "cumulative density") lines(height, males, col = 'orange2') myHeight <- 175 abline(v = myHeight, col = 'dodgerblue', lty = 2) pnorm(myHeight, 164.7, 7.1) # for females
quantiles <- c(0.05, 0.32, 0.5, 0.68, 0.95) femalesQuantiles <- qnorm(quantiles, 164.7, 7.1) malesQuantiles <- qnorm(quantiles, 178.4, 7.6) # plot probability density function height <- seq(140, 200, length.out = 1000) females <- dnorm(height, 164.7, 7.1) males <- dnorm(height, 178.4, 7.6) plot(height, females, col = 'dodgerblue', type = "l", ylab = "density") lines(height, males, col = 'orange2') # add lines abline(v = femalesQuantiles, col = 'dodgerblue', lty = 4) abline(v = malesQuantiles, col = 'orange2', lty = 4)
lambda <- c(0.5, 1, 1.5) color <- c("orange", "purple", "cadetblue") par(mfrow=c(2,1), mar=c(4,4,1,0.1)) x <- seq(0, 5, length.out=100) # plot probability density function plot(0, type='n', xlim=range(x), ylim=c(0, 1.5), xlab="x", ylab="P(x)") for(l in 1:length(lambda)){ lines(x, dexp(x, rate=lambda[l]), col=color[l]) } legend('topright', bty='n', col=color, lwd=1, legend=lambda) # plot cumulative distribution function plot(0, type='n', xlim=range(x), ylim=c(0, 1), xlab="x", ylab="P(X<=x)") for(l in 1:length(lambda)){ lines(x, pexp(x, rate=lambda[l]), col=color[l]) } legend('bottomright', bty='n', col=color, lwd=1, legend=lambda)
nRep <- 10000 net <- numeric(nRep) for(r in 1:nRep){ # start of one game invest <- 1 expenses <- invest # simulate first round win <- runif(1) < 18/37 # simulate all subsequent rounds while(!win & invest <= 50){ invest <- 2 * invest expenses <- expenses + invest win <- runif(1) < 18/37 } if(win){ # we won -> invested money is doubled net[r] <- 2 * invest - expenses } else { # we lost -> no income, only expenses net[r] <- 0 - expenses } } sum(net>0)/length(net) mean(net) # The net benefit is very likely to be negative, since the probability of winning is < 0.5 and since we cut the investment at 100 CHF.
18.0.24 Empirical Distributions
Corresponding exercises: see Section 10.1.3
data("iris") # plot histograms par(mfrow = c(1,3)) hist(iris$Petal.Length[iris$Species == "setosa"], col = "dodgerblue", xlab = "petal length", main = "setosa") hist(iris$Petal.Length[iris$Species == "versicolor"], col = "orange2", xlab = "petal length", main = "versicolor") hist(iris$Petal.Length[iris$Species == "virginica"], col = "firebrick", xlab = "petal length", main = "virginica") # plot kernel par(mfrow = c(1,1)) plot(density(iris$Petal.Length), ylim = c(0, 2.6), main = "Distribution of petal length") lines(density(iris$Petal.Length[iris$Species == "setosa"]), col = "dodgerblue") lines(density(iris$Petal.Length[iris$Species == "versicolor"]), col = "orange2") lines(density(iris$Petal.Length[iris$Species == "virginica"]), col = "firebrick") legend("topright", legend = c("global", "setosa", "versicolor", "virginica"), col = c("black", "dodgerblue", "orange2", "firebrick"), lty = rep(1, 4))
18.0.25 Data Input and Export
Corresponding exercises: see Section 11.0.1
- Create a directory using any means your file system offers. Then switch there using
setwd()
or with RStudio. Check where you are withgetwd()
.
18.0.29 RStudio Workspace
Corresponding exercises: see Section 11.4.1
18.0.30 Installing packages
Corresponding exercises: see Section 12.0.1
install.packages("RColorBrewer") library(RColorBrewer) x <- seq(-4, 4, by = 0.1) numSds <- 10 cols <- brewer.pal(numSds, "Spectral") # generate empty plot plot(0, type = "n", xlim=range(x), ylim=c(0,4), xlab="x", ylab = "density") for (i in 1:numSds){ y <- dnorm(x, 0, i/numSds) lines(x, y, col = cols[i]) }
18.0.36 Loops are slow
Corresponding exercises: see Section 14.0.2
myExpSum <- function(x){ s <- 0 for(i in 1:length(x)){ s <- s + exp(x[i]) } return(s) } system.time(myExpSum(x)) # As you will notice, calculating the sum of exp(x) takes about twice as long as calculating the sum of x when using a for loop. # When using functions on vectors (first exercise), it took about 4 times as long. # This suggests, that the majority of the calculation time is now caused by the for loop, not the mathematical function used.
18.0.37 Apply
Corresponding exercises: see Section 14.1.6
means <- rowMeans(X) sds <- apply(X, 1, sd) X <- sweep(X, 1, means, "-") X <- sweep(X, 1, sds, "/") apply(X, 1, mean) apply(X, 1, sd) # Note that the means may be just very close to zero (e.g. 10^-15) but not actually zero. This is due to numerical issues when dealing with very small numbers and perfectly normal.
18.0.39 S3 classes
Corresponding exercises: see Section 15.5.1
plot.circle <- function(x){ grid.circle(r = x$radius) } # call plot plot(smallCircle) plot(largeCircle) # the circle is so large that it doesn't fit the window, so don't worry if you can't see it # This is nice, isn't it? Now we can plot circles with plot(). # No one needs to remember a complicated name for plotting a circle.
# generic function perimeter <- function(x){ UseMethod("perimeter", x) } # S3 function perimeter.circle <- function(x){ return(2 * pi * x$radius) } # We need to write the generic function perimeter() because this is not a built-in # generic function of R. # On the other hand, print() and plot() are built-in generic functions, so we didn't need # to write them again.
# constructor for class rectangle rectangle <- function(width, height){ if (width < 0) stop("Width must be positive!") if (height < 0) stop("Height must be positive!") r <- list(width = width, height = height) class(r) <- "rectangle" return(r) } # create some objects class rectangle smallRec <- rectangle(0.01, 0.05) largeRec <- rectangle(50, 100) # invalidRec <- rectangle(-1, 0.01) # S3 function for print print.rectangle <- function(x){ output <- paste0("Here's a rectangle with width = ", x$width, " and height = ", x$height, ".") print(output) } # call print print(smallRec) print(largeRec) # S3 function for plot plot.rectangle <- function(x){ grid.rect(x$width, x$height) } # call plot plot(smallRec) plot(largeRec) # the rectangle is so large that it doesn't fit the window, so don't worry if you can't see it # S3 function for perimeter (Note: we don't need to define the perimeter() generic function again!) perimeter.rectangle <- function(x){ return(2 * (x$width + x$height)) } # S3 function for area (Note: we don't need to define the area() generic function again!) area.rectangle <- function(x){ return(x$width * x$height) } # check if perimeter() and print() are working! perimeter(smallRec) area(smallRec) perimeter(largeRec) area(largeRec)
print()
is a generic function and will callUseMethod("print", x)
.UseMethod()
will then search for a print method for class “mammal”. However, we haven’t defined such a method. It will therefore call theprint.default()
method, which will do some default printing.print.mammal <- function(x, ...){ print(paste0("This is ", x$name, ". It's a ", x$color, " ", x$sex, " mammal.")) } print(bobby) # print() is a generic function and calls UseMethod("print", x). # UseMethod() searches for a print method for class "mammal". # As we've named our function print.mammal() - which is the correct syntax for S3 function - # R will call print.mammal()!
class(bobby) <- rev(class(bobby)) print(bobby) # still prints the function print.mammal(). # Although class "cow" comes before class "mammal" in the vector of class attributes, # class "cow" does not have a printing function yet. # Therefore, R goes to the second element in the vector - "mammal" - and calls the corresponding function print.mammal().