## R basics x <- 5 y.1 <- 10 ## Data types x1 <- "Hello everyone!" x2 <- 5.4 x3 <- 20L x4 <- 1 + 1i x5 <- TRUE # print content of a variable x1 # or print(x1) # get class of the variable: class(x1) x <- 5 y <- c(1,1,2,3,5,8) p <- c("one", "two", "three") r <- c(T, F, TRUE, FALSE) r z1 <- 1:10 z1 <- seq(1, 10, 1) # from, to, by z1 z2 <- 5:-5 z2 <- seq(5, -5, -1) z2 length(z2) a <- numeric(0) length(a) b <- numeric(5) b list1 <- list("a", FALSE, 4) list2 <- list(1, list1) # nested list list2 m1 <- matrix(1:8, nrow = 2, ncol = 4) m1 m2 <- matrix(0, nrow = 2, ncol = 2) m2 first = 1:3 second = c("a", "b", "cd") d <- data.frame(first, second) d char.vector <- c("male", "female", "female", "male") f <- factor(char.vector) f levels(f) ## Operators 1 + 1 5 - 4 5 * 4 5 / 3 # real division 5 %/% 3 # integer division 5 %% 3 # modulo 5 ^ 4 # power c(1,1,2) - c(3,5,8) c(1,1,2) * 3 c(1,1,2) * c(3,5,8) paste(c("You", "are", "welcome!"), collapse = " ") c(2,3,5) %*% c(1,2,3) a <- matrix(1:4, 2, 2); a a %*% a y <- c(1,1,2,3,5,8) y > 1 !(y > 1) y > 1 & y < 8 # AND y == 2 | y == 5 # OR y <- c(1,1,2,3,5,8) y %in% 2:5 ## Control Structures x <- 2 if (x > 4) { print("YES") } else { print("NO") } for(i in c("a", "b", "c")) { print(i) } x <- 1 while(x < 5) { print(x) x <- x+1 } x <- 1 repeat { if (x == 3) { x <- x+1 next # continue } if (x > 5) { break } print(x) x <- x+1 } myFunc <- function(x, y = 3) { x + y } myFunc(2) myFunc(2, 5) myFunc2 <- function(x, y = 3) { return(x + y) } myFunc2(2) myFunc2(x = 2, y = 5) ## Data indexing y <- c(1,1,2,3,5,8) y[3] y[c(F, F, T, T, F, F)] y[2:4] y[-c(2,3)] y <- c(1,1,2,3,5,8) y[numeric(0)] y[-numeric(0)] y <- c(1,1,2,3,5,8) y[y >= 2] # returns values which(y >= 2) # returns indices which(c(T,F,T)) d <- data.frame(first = 4:6, second = c("a", "b", "c")) d d[2,2] d[2, 1:2] # also: d[2, ] d <- data.frame(first = 1:3, second = c("a", "b", "c")) d$first d[[1]] d[2, "first"] d[1] ## File reading and writing weather <- read.csv("weather.csv") weather[1:3, ] weather <- read.csv("weather.csv") head(weather) # see also tail() tail(weather) summary(weather) table(weather$outlook) dim(weather) # dimension nrow(weather) # number of rows ncol(weather) # number of columns ## Missing values a <- c(1,2,3,NA,5,NA) is.na(a) ## Plots plot(x = weather$temperature, y = weather$humidity) plot(x = weather$temperature, y = weather$humidity, col = weather$play, pch = 20, cex = 2, xlab = "temperature", ylab = "humidity", main = "Weather data") legend("topright", levels(weather$play), col = 1:2, pch = 20) random_values <- rnorm(1000, mean = 25, sd = 10) head(random_values) hist(random_values) plot(1:20, random_values[1:20], type = "l") ## R example - once again library(RWeka) library(partykit) weather <- read.csv("weather.csv") model <- J48(play ~ ., data = weather) plot(model) ## ------------------------------------------------- ## EXTRA - not in slides ## indexing lists l <- list(1:3, 30:35, 3:8) l[1] l[[1]] class(l[1]) class(l[[1]]) l[2:3] l[[3]][[1]] l[[c(3,1)]] ## repetition of values rep(c(1,2,3), 5) rep(c(1,2,3), c(3,1,3)) rep(c(1,2,3), each = 5) ## combine data frames: df1 <- data.frame(a = 1:3, b = c("a", "b", "c")) df2 <- data.frame(a = 4:5, b = c("d", "e")) df3 <- data.frame(x = c(T,T,F), y = 10:12) rbind(df1, df2) ## by rows cbind(df1, df3) ## by columns cbind(df1, m = c("x", "y", "z")) ## append column ## data conversion a <- 4:10 a as.character(a) f <- as.factor(a) f as.integer(f) ## returns integer values used for internal representation !!! unclass(f) ## same values as.numeric(as.character(f)) ## this will return original values ## computing summary statistics a <- 1:10 sum(a) max(a) min(a) mean(a) sd(a) median(a) quantile(a) m <- matrix(rep(0:3, each = 4), 8, 2) m rowSums(m) colSums(m) rowMeans(m) colMeans(m) ## LAPPLY, SAPPLY lapply(1:4, sqrt) sapply(1:4, sqrt) sapply(1:4, function(x) { sqrt(x) }) sapply(1:4, function(x) { -x }) l <- list(1:3, 30:35, 3:8) l sapply(l, mean) ## MAPPLY a <- 2:4 b <- 3:5 a b mapply(function(x, y) { x ^ y }, 2:4, 3:5) ## APPLY m <- matrix(rep(0:3, each = 4), 8, 2) m apply(m, 1, sum) apply(m, 2, sum)