This package curate (downloads, clean, consolidate, smooth) data from Johns Hokpins and Our world in data for analysing international outbreak of COVID-19.
It includes several visualizations of the COVID-19 international outbreak.
- COVID19DataProcessor generates curated series
- visualizations by Yanchang Zhao are included in ReportGenerator R6 object
- More visualizations included int ReportGeneratorEnhanced R6 object
- Visualizations ReportGeneratorDataComparison compares all countries counting epidemy day 0 when confirmed cases > n (i.e. n = 100).
Release | Usage | Development |
---|---|---|
Install the R package using the following commands on the R console:
# install.packages("devtools")
devtools::install_github("rOpenStats/COVID19analytics", build_opts = NULL)
First configurate environment variables with your preferred
configurations in ~/.Renviron
. COVID19analytics_data_dir is
mandatory while COVID19analytics_credits can be configured if you want
to publish your own research with space separated alias. Mention
previous authors where corresponding
COVID19analytics_data_dir = "~/.R/COVID19analytics"
# If you want to generate your own reports
COVID19analytics_credits = "@alias1 @alias2 @aliasn"
library(COVID19analytics)
#> Warning: replacing previous import 'ggplot2::Layout' by 'lgr::Layout' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'readr::col_factor' by 'scales::col_factor'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::equals' by 'testthat::equals' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::not' by 'testthat::not' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::is_less_than' by
#> 'testthat::is_less_than' when loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::matches' by 'testthat::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'testthat::matches' by 'tidyr::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
#> loading 'COVID19analytics'
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(knitr)
library(lgr)
log.dir <- file.path(getEnv("data_dir"), "logs")
dir.create(log.dir, recursive = TRUE, showWarnings = FALSE)
log.file <- file.path(log.dir, "covid19analytics.log")
lgr::get_logger("root")$add_appender(AppenderFile$new(log.file))
lgr::threshold("info", lgr::get_logger("root"))
lgr::threshold("info", lgr::get_logger("COVID19ARCurator"))
data.processor <- COVID19DataProcessor$new(provider = "JohnsHopkingsUniversity", missing.values = "imputation")
#dummy <- data.processor$preprocess() is setupData + transform is the preprocess made by data provider
dummy <- data.processor$setupData()
#> INFO [10:14:12.965] {stage: processor-setup}
#> INFO [10:14:13.025] Checking required downloaded {downloaded.max.date: 2020-07-17, daily.update.time: 21:00:00, current.datetime: 2020-07-19 1.., download.flag: TRUE}
#> INFO [10:14:14.114] Checking required downloaded {downloaded.max.date: 2020-07-17, daily.update.time: 21:00:00, current.datetime: 2020-07-19 1.., download.flag: TRUE}
#> INFO [10:14:14.886] Checking required downloaded {downloaded.max.date: 2020-07-17, daily.update.time: 21:00:00, current.datetime: 2020-07-19 1.., download.flag: TRUE}
#> INFO [10:14:15.981] {stage: data loaded}
#> INFO [10:14:15.984] {stage: data-setup}
dummy <- data.processor$transform()
#> INFO [10:14:15.988] Executing transform
#> INFO [10:14:15.990] Executing consolidate
#> INFO [10:14:18.362] {stage: consolidated}
#> INFO [10:14:18.364] Executing standarize
#> INFO [10:14:18.924] gathering DataModel
#> INFO [10:14:18.926] {stage: datamodel-setup}
# Curate is the process made by missing values method
dummy <- data.processor$curate()
#> INFO [10:14:18.930] {stage: loading-aggregated-data-model}
#> Warning in countrycode(x, origin = "country.name", destination = "continent"): Some values were not matched unambiguously: MS Zaandam
#> INFO [10:14:20.710] {stage: calculating-rates}
#> INFO [10:14:20.908] {stage: making-data-comparison}
#> INFO [10:14:26.456] {stage: applying-missing-values-method}
#> INFO [10:14:26.459] {stage: Starting first imputation}
#> INFO [10:14:26.465] {stage: calculating-rates}
#> INFO [10:14:26.680] {stage: making-data-comparison-2}
#> INFO [10:14:32.335] {stage: calculating-top-countries}
#> INFO [10:14:32.354] {stage: curated}
current.date <- max(data.processor$getData()$date)
rg <- ReportGeneratorEnhanced$new(data.processor)
rc <- ReportGeneratorDataComparison$new(data.processor = data.processor)
top.countries <- data.processor$top.countries
international.countries <- unique(c(data.processor$top.countries,
"China", "Japan", "Singapore", "Korea, South"))
latam.countries <- sort(c("Mexico",
data.processor$countries$getCountries(division = "sub.continent", name = "Caribbean"),
data.processor$countries$getCountries(division = "sub.continent", name = "Central America"),
data.processor$countries$getCountries(division = "sub.continent", name = "South America")))
# Top 10 daily cases confirmed increment
kable((data.processor$getData() %>%
filter(date == current.date) %>%
select(country, date, rate.inc.daily, confirmed.inc, confirmed, deaths, deaths.inc) %>%
arrange(desc(confirmed.inc)) %>%
filter(confirmed >=10))[1:10,])
country | date | rate.inc.daily | confirmed.inc | confirmed | deaths | deaths.inc |
---|---|---|---|---|---|---|
US | 2020-07-18 | 0.0175 | 63698 | 3711413 | 140119 | 853 |
India | 2020-07-18 | 0.0372 | 38697 | 1077781 | 26816 | 543 |
Brazil | 2020-07-18 | 0.0139 | 28532 | 2074860 | 78772 | 921 |
South Africa | 2020-07-18 | 0.0394 | 13285 | 350879 | 4948 | 144 |
Kyrgyzstan | 2020-07-18 | 0.8782 | 11505 | 24606 | 900 | 727 |
Colombia | 2020-07-18 | 0.0470 | 8560 | 190700 | 6516 | 228 |
Mexico | 2020-07-18 | 0.0230 | 7615 | 338913 | 38888 | 578 |
Russia | 2020-07-18 | 0.0082 | 6214 | 764215 | 12228 | 122 |
Peru | 2020-07-18 | 0.0115 | 3963 | 349500 | 12998 | 199 |
Argentina | 2020-07-18 | 0.0270 | 3223 | 122524 | 2220 | 42 |
# Top 10 daily deaths increment
kable((data.processor$getData() %>%
filter(date == current.date) %>%
select(country, date, rate.inc.daily, confirmed.inc, confirmed, deaths, deaths.inc) %>%
arrange(desc(deaths.inc)))[1:10,])
country | date | rate.inc.daily | confirmed.inc | confirmed | deaths | deaths.inc |
---|---|---|---|---|---|---|
Brazil | 2020-07-18 | 0.0139 | 28532 | 2074860 | 78772 | 921 |
US | 2020-07-18 | 0.0175 | 63698 | 3711413 | 140119 | 853 |
Kyrgyzstan | 2020-07-18 | 0.8782 | 11505 | 24606 | 900 | 727 |
Mexico | 2020-07-18 | 0.0230 | 7615 | 338913 | 38888 | 578 |
India | 2020-07-18 | 0.0372 | 38697 | 1077781 | 26816 | 543 |
Colombia | 2020-07-18 | 0.0470 | 8560 | 190700 | 6516 | 228 |
Peru | 2020-07-18 | 0.0115 | 3963 | 349500 | 12998 | 199 |
Iran | 2020-07-18 | 0.0080 | 2166 | 271606 | 13979 | 188 |
South Africa | 2020-07-18 | 0.0394 | 13285 | 350879 | 4948 | 144 |
Russia | 2020-07-18 | 0.0082 | 6214 | 764215 | 12228 | 122 |
rg$ggplotTopCountriesStackedBarDailyInc(included.countries = latam.countries, countries.text = "Latam countries")
#> Warning: Removed 144 rows containing missing values (position_stack).
rc$ggplotComparisonExponentialGrowth(included.countries = latam.countries, countries.text = "Latam countries",
field = "confirmed", y.label = "Confirmed", min.cases = 100)
rc$ggplotComparisonExponentialGrowth(included.countries = latam.countries, countries.text = "Latam countries",
field = "remaining.confirmed", y.label = "Active cases", min.cases = 100)
rg$ggplotCountriesLines(included.countries = latam.countries, countries.text = "Latam countries",
field = "confirmed.inc", log.scale = TRUE)
#> Warning: Removed 126 row(s) containing missing values (geom_path).
rg$ggplotCountriesLines(included.countries = latam.countries, countries.text = "Latam countries",
field = "rate.inc.daily", log.scale = TRUE)
#> Warning: Removed 126 row(s) containing missing values (geom_path).
rc$ggplotComparisonExponentialGrowth(included.countries = latam.countries, field = "deaths", y.label = "Deaths", min.cases = 1)
rg$ggplotCrossSection(included.countries = latam.countries,
field.x = "confirmed",
field.y = "fatality.rate.max",
plot.description = "Cross section Confirmed vs Death rate min",
log.scale.x = TRUE,
log.scale.y = FALSE)
#> Warning: Removed 126 row(s) containing missing values (geom_path).
rg$ggplotTopCountriesStackedBarDailyInc(top.countries)
#> Warning: Removed 67 rows containing missing values (position_stack).
rc$ggplotComparisonExponentialGrowth(included.countries = international.countries,
field = "confirmed", y.label = "Confirmed", min.cases = 100)
#> Warning: Removed 2 row(s) containing missing values (geom_path).
rc$ggplotComparisonExponentialGrowth(included.countries = international.countries,
field = "remaining.confirmed", y.label = "Active cases", min.cases = 100)
#> Warning: Removed 2 row(s) containing missing values (geom_path).
rc$ggplotComparisonExponentialGrowth(included.countries = international.countries, field = "deaths",
y.label = "Deaths", min.cases = 1)
#> Warning: Removed 2 row(s) containing missing values (geom_path).
rg$ggplotCrossSection(included.countries = international.countries,
field.x = "confirmed",
field.y = "fatality.rate.max",
plot.description = "Cross section Confirmed vs Death rate min",
log.scale.x = TRUE,
log.scale.y = FALSE)
#> Warning: Removed 90 row(s) containing missing values (geom_path).
rg$ggplotCountriesLines(field = "confirmed.inc", log.scale = TRUE)
#> Warning: Removed 66 row(s) containing missing values (geom_path).
rg$ggplotCountriesLines(field = "rate.inc.daily", log.scale = TRUE)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: Removed 66 row(s) containing missing values (geom_path).
rg$ggplotTopCountriesPie()
rg$ggplotTopCountriesBarPlots()
rg$ggplotCountriesBarGraphs(selected.country = "Argentina")
-
Johns Hopkins University. Retrieved from: ‘https://github.com/CSSEGISandData/COVID-19/’ [Online Resource]
-
OurWorldInData.org. Retrieved from: ‘https://ourworldindata.org/coronavirus’ [Online Resource]
Yanchang Zhao, COVID-19 Data Analysis with Tidyverse and Ggplot2 - China. RDataMining.com, 2020.
URL: http://www.rdatamining.com/docs/Coronavirus-data-analysis-china.pdf.