install.packages("curl")

library("curl")

install.packages("httr")

library("httr")

install.packages("rvest")

library("rvest")

Call the get_wiki_covid19_page function and print the response

covid19_url <- "https://en.wikipedia.org/w/index.php?title=Template:COVID-19_testing_by_country" response <- GET(covid19_url) response

Get the root html node from the http response in task 1

covid19_root_node <- read_html( "https://en.wikipedia.org/w/index.php?title=Template:COVID-19_testing_by_country") covid19_root_node

Get the table node from the root html node

covid19_table_node <- html_node(covid19_root_node, "table") covid19_table_node

Read the table node and convert it into a data frame, and print the data frame for review

covid19_data_frame <- html_table(covid19_table_node) head(covid19_data_frame)

Print the summary of the data frame

summary(covid19_data_frame)

call `preprocess_covid_data_frame` function and assign it to a new data frame

wiki_covid19_data_frame <- preprocess_covid_data_frame(covid19_data_frame) wiki_covid19_data_frame

Print the summary of the processed data frame again

summary(wiki_covid19_data_frame)

Export the data frame to a csv file

write.csv(wiki_covid19_data_frame, file = "covid.csv", row.names = FALSE)

Get working directory

wd <- getwd()

Get exported

file_path <- paste(wd, sep="", "/covid.csv")

File path

print(file_path) file.exists(file_path)

Download a sample csv file

#covid_csv_file <- download.file("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-RP0101EN-Coursera/v2/dataset/covid.csv", destfile="covid.csv") #covid_data_frame_csv <- read.csv("covid.csv", header=TRUE, sep=",")

Read covid_data_frame_csv from the csv file

covid_data_frame_csv <- read.csv("covid.csv", header=TRUE, sep=",")

Get the 5th to 10th rows, with two "country" "confirmed" columns

covid_data_frame_csv[ 5:10, c( "country", "confirmed") ]

Get the total confirmed cases worldwide

confirmed_cases <- covid_data_frame_csv[ , 4] confirmed_cases total_confirmed_cases <- sum(confirmed_cases) total_confirmed_cases

Get the total tested cases worldwide

tested_cases <- covid_data_frame_csv[ , 3] tested_cases total_tested_cases <- sum(tested_cases) total_tested_cases

Get the positive ratio (confirmed / tested)

positive_ratio <- total_confirmed_cases/total_tested_cases positive_ratio

Get the `country` column

country_column <- covid_data_frame_csv[ , 1] country_column

Check its class ( should be Factor)

class(country_column)

Conver the country column into character so that you can easily sort them

as.character ( country_column)

Sort the countries AtoZ

sort(country_column)

Sort the countries ZtoA

Country_ZtoA <- sort(country_column, decreasing = TRUE) Country_ZtoA

Print the sorted ZtoA list

print( Country_ZtoA)

Use a regular expression `United.+` to find matches

matches <- regexpr("United.+", covid_data_frame_csv[ ,"country"]) countires_start_with_United<- regmatches(covid_data_frame_csv[ ,"country"], matches) countires_start_with_United

Print the matched country names

print(countires_start_with_United)

Select a subset (should be only one row) of data frame based on a selected country name and columns

wiki_covid19_data_frame[1, c( "country", "confirmed", "confirmed.population.ratio") ]

Select a subset (should be only one row) of data frame based on a selected country name and columns

wiki_covid19_data_frame[ 20, c("country", "confirmed", "confirmed.population.ratio") ]

if (49621 > 1491) { print( "Afghanistan has larger ratio of confirmed cases to population") } else { print( "Bhutan has larger ratio of confirmed cases to population") }

Get a subset of any countries with `confirmed.population.ratio` less than the threshold

threshold = "lessRisk" if (threshold == "lessRisk"){ subset(wiki_covid19_data_frame, confirmed.population.ratio < .01) } else { subset(wiki_covid19_data_frame, confirmed.population.ratio > .01)
}

Tasnim-Jahan / Analysis-of-Global-COVID-19-Pandemic-Data

Call the get_wiki_covid19_page function and print the response

Get the root html node from the http response in task 1

Get the table node from the root html node

Read the table node and convert it into a data frame, and print the data frame for review

Print the summary of the data frame

call `preprocess_covid_data_frame` function and assign it to a new data frame

Print the summary of the processed data frame again

Export the data frame to a csv file

Get working directory

Get exported

File path

Download a sample csv file

Read covid_data_frame_csv from the csv file

Get the 5th to 10th rows, with two "country" "confirmed" columns

Get the total confirmed cases worldwide

Get the total tested cases worldwide

Get the positive ratio (confirmed / tested)

Get the `country` column

Check its class ( should be Factor)

Conver the country column into character so that you can easily sort them

Sort the countries AtoZ

Sort the countries ZtoA

Print the sorted ZtoA list

Use a regular expression `United.+` to find matches

Print the matched country names

Select a subset (should be only one row) of data frame based on a selected country name and columns

Select a subset (should be only one row) of data frame based on a selected country name and columns

Get a subset of any countries with `confirmed.population.ratio` less than the threshold

About

Languages

Call the get_wiki_covid19_page function and print the response

Get the root html node from the http response in task 1

Get the table node from the root html node

Read the table node and convert it into a data frame, and print the data frame for review

Print the summary of the data frame

call preprocess_covid_data_frame function and assign it to a new data frame

Print the summary of the processed data frame again

Export the data frame to a csv file

Get working directory

Get exported

File path

Download a sample csv file

Read covid_data_frame_csv from the csv file

Get the 5th to 10th rows, with two "country" "confirmed" columns

Get the total confirmed cases worldwide

Get the total tested cases worldwide

Get the positive ratio (confirmed / tested)

Get the country column

Check its class ( should be Factor)

Conver the country column into character so that you can easily sort them

Sort the countries AtoZ

Sort the countries ZtoA

Print the sorted ZtoA list

Use a regular expression United.+ to find matches

Print the matched country names

Select a subset (should be only one row) of data frame based on a selected country name and columns

Select a subset (should be only one row) of data frame based on a selected country name and columns

Get a subset of any countries with confirmed.population.ratio less than the threshold

About

Languages

call `preprocess_covid_data_frame` function and assign it to a new data frame

Get the `country` column

Use a regular expression `United.+` to find matches

Get a subset of any countries with `confirmed.population.ratio` less than the threshold