### Run this cell before continuing. library(tidyverse) library(repr) library(readxl) source("tests.R") source("cleanup.R") options(repr.matrix.max.rows = 6) # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_1.1() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_1.2() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_1.3() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_1.4() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_1.5() ### Run this cell to learn more about the arguments used in read_csv ### Reading over the help file will assist with the next question. ?read_csv # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_2.1() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_2.2() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_2.3() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_3.1() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_3.2() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_3.3() # happiness_report_path <- "..." # ... <- ...(file = happiness_report_path) # your code here fail() # No Answer - remove if you provide an answer print(happiness_report, n = 10) # the n = 10 argument tells R to print 10 lines test_3.4() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_3.5() #... <- read_delim(file = "data/...", delim = "...") # your code here fail() # No Answer - remove if you provide an answer happy_semi_df test_3.6.1() #... <- ...(file = "...") # your code here fail() # No Answer - remove if you provide an answer happy_semi_df2 test_3.6.2() #... <- ...(file = "...") # your code here fail() # No Answer - remove if you provide an answer happy_tsv test_3.6.3() #... <- ...(file = "data/happiness_report_metadata.csv", skip = ...) # your code here fail() # No Answer - remove if you provide an answer happy_metadata test_3.6.4() #... <- ...(file = "...", col_names = c("country", "happiness_score", "GDP_per_capita", "life_expectancy", "freedom")) # your code here fail() # No Answer - remove if you provide an answer happy_header test_3.6.5() #... <- ...(path = "...") # your code here fail() # No Answer - remove if you provide an answer happy_xlsx test_3.7() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_3.8() options(repr.plot.width = 8, repr.plot.height = 7) # your code here fail() # No Answer - remove if you provide an answer header_plot test_3.9() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_4.1() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_4.2() # Run this cell before continuing. library(DBI) library(RSQLite) library(dbplyr) #... <- dbConnect(RSQLite::SQLite(), '...') #replace ... with the database relative path # your code here fail() # No Answer - remove if you provide an answer test_4.3.1() # Use this cell to figure out how to answer the question # Call the dbListTables function in this cell and take a look at the output # If you don't know what argument to give dbListTables, use ?dbListTables to find out! #dbListTables(...) #replace ... with the right argument #once you've called this and seen the output, insert the output string in the cell below as denoted #... <- '...' # your code here fail() # No Answer - remove if you provide an answer test_4.3.2() #flight_data <- ... # your code here fail() # No Answer - remove if you provide an answer test_4.3.3() # Run this cell before continuing. head(flight_data) # your code here fail() # No Answer - remove if you provide an answer test_4.4() # Take a look at `delay_data` to make sure it has the two columns we expect. # Run this cell before continuing. head(delay_data) # Run this cell before continuing. count(delay_data) # Run this cell before continuing. ggplot(faithful, aes(x = waiting)) + geom_histogram(bins = 40) + xlab("Waiting Time (mins)") + ylab("Count") + theme(text = element_text(size=20)) # Replace each ... with the correct item in the list above. # ... <- ggplot(delay_data, aes(x = ...)) + # ...(aes(y = 100 * stat(count) / sum(stat(count))), # binwidth = .25, # fill = "lightblue", # color = ...) + # scale_x_continuous(limits = c(-2, 5)) + # ylab("% of Flights") + # xlab(...) + # theme(text = element_text(size=20)) # ... # your code here fail() # No Answer - remove if you provide an answer test_4.5() # your code here fail() # No Answer - remove if you provide an answer test_4.6() # your code here fail() # No Answer - remove if you provide an answer test_4.7() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_4.8.1() #If you don't know how to call collect or write_csv, use this cell to #check the documentation by calling ?collect or ?write_csv # If you input the wrong tbl in the collect() function below your worksheet will time out # Please make sure you check your answer to question 4.8.1 and input the correct tbl in the collect() function below #delay_dataframe <- collect(...) #write_csv(..., ...) # your code here fail() # No Answer - remove if you provide an answer test_4.8.2() source("cleanup.R") # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_5.1.0() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_5.1.1() # Run this cell library(rvest) url <- 'https://en.wikipedia.org/wiki/Gross_world_product' # your code here fail() # No Answer - remove if you provide an answer print(gwp) test_5.2() # Run this cell to create the first column for your data set. year <- html_text(html_nodes(gwp, ".wikitable tbody:nth-child(1) td:nth-child(1)")) head(year) # Run this cell. # Use stringr library. library(stringr) # Replace " CE\n" with nothing. year <- str_replace_all(string = year, pattern = " CE\n", replacement = "") print(year) # Run this cell to clean up the year data and convert it to a number. # Use grep to select the lines containing " BC\n" and put a - at the beginning of them. year[grepl(pattern = " BCE\n", x = year)] <- str_replace_all(string = year[grepl(pattern = " BCE\n", x = year)], pattern = "^", replacement = "-") # Replace all commas with nothing. year <- str_replace_all(string = year, pattern = ",", replacement = "") # Extract the minus symbol and the numbers. year <- as.numeric(str_extract(string = year, pattern = "-?[0-9]+")) print(year) #... <- ...(html_nodes(gwp, ...)) # your code here fail() # No Answer - remove if you provide an answer head(gwp_value) test_5.4() # Run this cell to clean up the year data and convert it to a number. # Replace all commas with nothing. gwp_value <- str_replace_all(string = gwp_value, pattern = ",", replacement = "") # Extract the numbers and decimals. gwp_value <- as.numeric(str_extract(string = gwp_value, pattern = "[0-9.]+")) head(gwp_value) #... <- tibble(..., ...) # your code here fail() # No Answer - remove if you provide an answer gwp test_5.5() gwp <- mutate(gwp, sqrt_year = sqrt(abs(year))) gwp <- mutate(gwp, sqrt_year = if_else(year < 0, sqrt_year * -1, sqrt_year)) gwp #... <- ggplot(gwp, aes(x = ..., y = ...)) + #geom_line() + #scale_y_continuous(trans='log10') + #scale_x_continuous(breaks = c(-1000, -750, -500, -250, -77.7, 0, 38.7), # labels = c("-1000000", "-562500", "-250000", "-62500", "-5000", "0", "1500")) + #ylab("...") + #xlab("Year") + #theme(text = element_text(size=20)) options(repr.plot.width=15, repr.plot.height=7) # your code here fail() # No Answer - remove if you provide an answer gwp_historical test_5.6() # Replace the fail() with your answer. # your code here fail() # No Answer - remove if you provide an answer test_5.7() source("cleanup.R")