Data Storytelling code
This is code accompanying an article about Data Storytelling in the German IT magazine iX 1/2022. The figures in this article were created as follows.
Note: Because the data is partly from a German source, the data description is in German as well.
library(readr)
library(tidyverse)
library(stringr)
library(lubridate)
Set colorblind-friendly palette
cbp2 <- c("#000000", "#E69F00", "#56B4E9", "#009E73",
"#999999", "#0072B2", "#D55E00", "#CC79A7")
ggplot <- function(...) ggplot2::ggplot(...) +
scale_color_manual(values = cbp2) +
scale_fill_manual(values = cbp2) + # note: needs to be overridden when using continuous color scales
theme_bw()
library(plotrix)
sliceValues <- rep(10, 8) # each slice value=10 for proportionate slices
(
p <- pie3D(sliceValues,
explode=0,
theta = 1.2,
col = cbp2,
labels = cbp2,
labelcex = 0.9,
shade = 0.6,
main = "Colorblind\nfriendly palette")
)
## [1] 0.3926991 1.1780972 1.9634954 2.7488936 3.5342917 4.3196899 5.1050881
## [8] 5.8904862
Data
RKI
Because the original data file is in Excel (.xlsx
) format, I exported worksheets with data of interest as .csv
. Alternatively, you could use the xlsx package to directly read in .xlsx
files.
You can directly download the csv files I used from Github.
file_fz <- "fallzahlen.csv"
file_sti <- "siebentageinzidenz.csv"
data_fz <- read_csv2(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_fz)) %>%
gather("jahr_woche", "fallzahl", -Altersgruppe)
data_fz_sti <- read_csv2(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_sti)) %>%
gather("jahr_woche", "siebentageinzidenz", -Altersgruppe) %>%
full_join(data_fz, by = c("Altersgruppe", "jahr_woche")) %>%
mutate(Jahr = as.numeric(str_extract(jahr_woche, "^.*(?=(_))")),
Woche = as.numeric(str_remove(jahr_woche, "^(.*?_)")),
fallzahl = as.numeric(fallzahl)) %>%
mutate(Datum = paste0(Jahr, "-W", Woche, "-1"),
Datum = make_datetime(year = Jahr) + weeks(Woche)) %>%
select(-jahr_woche) %>%
select(Datum, Jahr, Woche, Altersgruppe, everything()) %>%
gather("x", "y", siebentageinzidenz:fallzahl)
data_fz_sti %>%
head()
## # A tibble: 6 × 6
## Datum Jahr Woche Altersgruppe x y
## <dttm> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2020-03-11 00:00:00 2020 10 Gesamt siebentageinzidenz 1.08
## 2 2020-03-11 00:00:00 2020 10 90 siebentageinzidenz 0.12
## 3 2020-03-11 00:00:00 2020 10 85 - 89 siebentageinzidenz 0.38
## 4 2020-03-11 00:00:00 2020 10 80 - 84 siebentageinzidenz 0.36
## 5 2020-03-11 00:00:00 2020 10 75 - 79 siebentageinzidenz 0.62
## 6 2020-03-11 00:00:00 2020 10 70 - 74 siebentageinzidenz 0.3
Stand: 08.09.2021 07:09:21
Die dem RKI übermittelten COVID-19-Fälle nach Meldewoche und Geschlecht sowie Anteile mit für COVID-19 relevanten Symptomen, Anteile Hospitalisierter und Verstorbener für die Meldewochen KW 10 – 53/2020 und KW 01 - 35/2021
Altersmedian/-mittelwert für Hospitalisierte, Patienten auf Intensivstation und Verstorbene von an das RKI übermittelten COVID-19-Fällen für die Meldewochen KW 10 – 53/2020 und KW 01 - 35/2021
file_ka <- "klinische_aspekte.csv"
file_am <- "altersmedian.csv"
data_ka <- read_csv2(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_ka))
data_am <- read_delim(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_am), delim = ";", locale = locale(decimal_mark = ".")) %>%
select(-'Fälle gesamt') %>%
full_join(data_ka, by = c("Meldejahr", "Meldewoche" = "MW")) %>%
mutate(Datum = paste0(Meldejahr, "-W", Meldewoche, "-1"),
Datum = make_datetime(year = Meldejahr) + weeks(Meldewoche)) %>%
select(-'Anteil keine, bzw. keine für COVID-19 bedeutsamen Symptome',
-'Anteil der Hospitalisierten bei Fällen mit Angabe zur Hospitalisation') %>%
select(Datum, Meldejahr, Meldewoche, everything()) %>%
mutate(Männer = as.numeric(gsub("[^0-9.]", "", Männer)),
Frauen = as.numeric(gsub("[^0-9.]", "", Frauen)),
`Anteil Verstorben` = gsub(",", ".", `Anteil Verstorben`),
`Anteil Verstorben` = as.numeric(gsub("[^0-9.]", "", `Anteil Verstorben`))) %>%
gather("x", "y", Alle_Altersmedian:'Anteil Verstorben') %>%
rename(Jahr = Meldejahr, Woche = Meldewoche)
data_am %>%
head()
## # A tibble: 6 × 5
## Datum Jahr Woche x y
## <dttm> <dbl> <dbl> <chr> <dbl>
## 1 2020-03-11 00:00:00 2020 10 Alle_Altersmedian 45
## 2 2020-03-18 00:00:00 2020 11 Alle_Altersmedian 47
## 3 2020-03-25 00:00:00 2020 12 Alle_Altersmedian 47
## 4 2020-04-01 00:00:00 2020 13 Alle_Altersmedian 49
## 5 2020-04-08 00:00:00 2020 14 Alle_Altersmedian 52
## 6 2020-04-15 00:00:00 2020 15 Alle_Altersmedian 52
Vaccination data
file_vacc <- "vaccination_ger.csv"
data_vacc <- read_delim(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_vacc), delim = ",", locale = locale(decimal_mark = ".")) %>%
gather("x", "y", total_vaccinations:daily_vaccinations_per_million) %>%
select(-location, -iso_code)
Combination
data_gesamt <- data_fz_sti %>%
filter(Altersgruppe == "Gesamt") %>%
select(-Altersgruppe) %>%
unique() %>%
bind_rows(data_am) %>%
select(-Jahr,-Woche) %>%
rename(date = "Datum") %>%
bind_rows(data_vacc)
Plots
data_gesamt %>%
select(x) %>%
filter(!duplicated(x))
## # A tibble: 30 × 1
## x
## <chr>
## 1 siebentageinzidenz
## 2 fallzahl
## 3 Alle_Altersmedian
## 4 Hosp_Altersmedian
## 5 ITS_Altersmedian
## 6 Verstorben_Altersmedian
## 7 Alle_MW_Alter
## 8 Hosp_MW_Alter
## 9 ITS_MW_Alter
## 10 Verst_MW_Alter
## # … with 20 more rows
data_gesamt_perc_vacc <- data_gesamt %>%
filter(grepl("people_vaccinated$", x) | grepl("people_fully_vaccinated$", x)) %>%
mutate(y_perc = y / 83020000 * 100)
first_vac <- data_gesamt_perc_vacc %>%
slice(1) %>%
mutate(label = "Erste Impfungen")
bigger_ten <- data_gesamt_perc_vacc %>%
group_by(x) %>%
filter(y_perc > 10) %>%
slice(1) %>%
mutate(label = "> 10%")
bigger_25 <- data_gesamt_perc_vacc %>%
group_by(x) %>%
filter(y_perc > 25) %>%
slice(1) %>%
mutate(label = "> 25%")
bigger_50 <- data_gesamt_perc_vacc %>%
group_by(x) %>%
filter(y_perc > 50) %>%
slice(1) %>%
mutate(label = "> 50%")
last_entry <- data_gesamt_perc_vacc %>%
group_by(x) %>%
top_n(1) %>%
mutate(label = paste0("letzter Eintrag ", round(y_perc, digits = 2), "%"))
vacc_data <- first_vac %>%
bind_rows(bigger_ten) %>%
bind_rows(bigger_25) %>%
bind_rows(bigger_50) %>%
bind_rows(last_entry) %>%
mutate(x = ifelse(x == "people_vaccinated", "Prozent Geimpft", "Prozent Zweitimpfung"))
# Altersmedian & MW Alter
data_gesamt_prep <- data_gesamt %>%
filter(grepl("median", x) | grepl("MW", x)) %>%
mutate(stat = ifelse(grepl("median", x), "Median", "Mittelwert"),
x = gsub("_", " ", x),
x = gsub("Altersmedian|MW", "", x),
x = gsub("Alter", "", x),
x = gsub(" ", "", x),
x = gsub("Verst$", "Verstorben", x),
x = gsub("Verstorben", "Gestorben", x))
data_gesamt <- data_gesamt %>%
mutate(date = as.Date(date, "%Y-%m-%d", tz = "CEST"))
data_gesamt_prep <- data_gesamt_prep %>%
mutate(date = as.Date(date, "%Y-%m-%d", tz = "CEST"))
vacc_data <- vacc_data %>%
mutate(date = as.Date(date, "%Y-%m-%d", tz = "CEST"))
ggplot() +
geom_vline(data = vacc_data,
aes(xintercept = date, color = x)) +
geom_line(data = data_gesamt_prep,
aes(x = date, y = y, color = x, linetype = stat)) +
scale_x_date(date_breaks = "1 month",
date_minor_breaks = "1 week") +
ggrepel::geom_label_repel(data = vacc_data,
aes(x = date, y = 90, label = label, color = x),
arrow = arrow(length = unit(0.02, "npc"))) +
theme(legend.position="top") +
labs(x = "Datum",
y = "Alter in Jahren",
linetype = "Statistik",
color = "Einteilung der\nErkrankten/Geimpften",
title = "Unterscheiden sich das Durchschnittsalter aller an Covid-19 erkrankter von Hospitalisierten und Verstorbenen?\nUnd welchen Einfluss hat das Impfgeschehen darauf?",
#subtitle = "",
caption = "Datenquellen (Stand: 08.09.2021 07:09:21):
Robert-Koch-Institut: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Klinische_Aspekte.html &
https://ourworldindata.org/: https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")
ggplot() +
geom_vline(data = vacc_data %>% rename(xx = x), aes(xintercept = date, color = xx)) +
geom_line(data = data_gesamt %>% filter(grepl("Fälle gesamt", x) | grepl("Anteil Verstorben", x)),
aes(x = date, y = y, color = x)) +
facet_wrap(x ~ ., scales = "free", nrow = 2,
strip.position = "left",
labeller = as_labeller(c('Fälle gesamt' = "Fälle gesamt", 'Anteil Verstorben' = "Anteil Verstorbener") ) ) +
scale_x_date(date_breaks = "1 month",
date_minor_breaks = "1 week") +
ggrepel::geom_label_repel(data = vacc_data %>% rename(xx = x),
aes(x = date, y = 7, label = label, color = xx),
arrow = arrow(length = unit(0.02, "npc"))) +
theme(legend.position="top") +
labs(x = "Datum",
y = "",
color = "",
title = "Hat das Impfgeschehen Einfluss auf die Fallzahlen?",
#subtitle = "",
caption = "Datenquellen (Stand: 08.09.2021 07:09:21):
Robert-Koch-Institut: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Klinische_Aspekte.html &
https://ourworldindata.org/: https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")
ggplot() +
geom_vline(data = vacc_data %>% rename(xx = x), aes(xintercept = date, color = xx)) +
geom_line(data = data_gesamt %>% filter(grepl("Frauen", x)),
aes(x = date, y = y, color = x)) +
scale_x_date(date_breaks = "1 month",
date_minor_breaks = "1 week") +
ggrepel::geom_label_repel(data = vacc_data %>% rename(xx = x),
aes(x = date, y = 56, label = label, color = xx),
arrow = arrow(length = unit(0.02, "npc"))) +
theme(legend.position="top") +
labs(x = "Datum",
y = "Prozentanteil weiblicher Infektionsfälle",
color = "",
title = "Hat das Impfgeschehen Einfluss auf den Anteil von infizierten Männern und Frauen?",
#subtitle = "",
caption = "Datenquellen (Stand: 08.09.2021 07:09:21):
Robert-Koch-Institut: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Klinische_Aspekte.html &
https://ourworldindata.org/: https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.1.1 (2021-08-10)
## os macOS Big Sur 10.16
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Europe/Berlin
## date 2022-01-10
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.0)
## backports 1.2.1 2020-12-09 [1] CRAN (R 4.1.0)
## bit 4.0.4 2020-08-04 [1] CRAN (R 4.1.0)
## bit64 4.0.5 2020-08-30 [1] CRAN (R 4.1.0)
## blogdown 1.5 2021-09-02 [1] CRAN (R 4.1.0)
## bookdown 0.24 2021-09-02 [1] CRAN (R 4.1.0)
## broom 0.7.9 2021-07-27 [1] CRAN (R 4.1.0)
## bslib 0.3.0 2021-09-02 [1] CRAN (R 4.1.0)
## cachem 1.0.6 2021-08-19 [1] CRAN (R 4.1.0)
## callr 3.7.0 2021-04-20 [1] CRAN (R 4.1.0)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.1.0)
## cli 3.0.1 2021-07-17 [1] CRAN (R 4.1.0)
## colorspace 2.0-2 2021-06-24 [1] CRAN (R 4.1.0)
## crayon 1.4.1 2021-02-08 [1] CRAN (R 4.1.0)
## DBI 1.1.1 2021-01-15 [1] CRAN (R 4.1.0)
## dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.1.0)
## desc 1.3.0 2021-03-05 [1] CRAN (R 4.1.0)
## devtools 2.4.2 2021-06-07 [1] CRAN (R 4.1.0)
## digest 0.6.28 2021-09-23 [1] CRAN (R 4.1.1)
## dplyr * 1.0.7 2021-06-18 [1] CRAN (R 4.1.0)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 4.1.0)
## fansi 0.5.0 2021-05-25 [1] CRAN (R 4.1.0)
## farver 2.1.0 2021-02-28 [1] CRAN (R 4.1.0)
## fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.0)
## forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.1.0)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.1.0)
## generics 0.1.0 2020-10-31 [1] CRAN (R 4.1.0)
## ggplot2 * 3.3.5 2021-06-25 [1] CRAN (R 4.1.0)
## ggrepel 0.9.1 2021-01-15 [1] CRAN (R 4.1.0)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.1.0)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.1.0)
## haven 2.4.3 2021-08-04 [1] CRAN (R 4.1.0)
## highr 0.9 2021-04-16 [1] CRAN (R 4.1.0)
## hms 1.1.0 2021-05-17 [1] CRAN (R 4.1.0)
## htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.0)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.1.0)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
## jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.1.0)
## knitr 1.34 2021-09-09 [1] CRAN (R 4.1.0)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.1.0)
## lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.1)
## lubridate * 1.7.10 2021-02-26 [1] CRAN (R 4.1.0)
## magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.1.0)
## memoise 2.0.0 2021-01-26 [1] CRAN (R 4.1.0)
## modelr 0.1.8 2020-05-19 [1] CRAN (R 4.1.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.0)
## pillar 1.6.2 2021-07-29 [1] CRAN (R 4.1.0)
## pkgbuild 1.2.0 2020-12-15 [1] CRAN (R 4.1.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.0)
## pkgload 1.2.2 2021-09-11 [1] CRAN (R 4.1.0)
## plotrix * 3.8-2 2021-09-08 [1] CRAN (R 4.1.0)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.1.0)
## processx 3.5.2 2021-04-30 [1] CRAN (R 4.1.0)
## ps 1.6.0 2021-02-28 [1] CRAN (R 4.1.0)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.1.0)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.0)
## Rcpp 1.0.7 2021-07-07 [1] CRAN (R 4.1.0)
## readr * 2.0.1 2021-08-10 [1] CRAN (R 4.1.0)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 4.1.0)
## remotes 2.4.0 2021-06-02 [1] CRAN (R 4.1.0)
## reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.0)
## rlang 0.4.11 2021-04-30 [1] CRAN (R 4.1.0)
## rmarkdown 2.11 2021-09-14 [1] CRAN (R 4.1.0)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.1.0)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.0)
## rvest 1.0.1 2021-07-26 [1] CRAN (R 4.1.0)
## sass 0.4.0 2021-05-12 [1] CRAN (R 4.1.0)
## scales 1.1.1 2020-05-11 [1] CRAN (R 4.1.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.1.0)
## stringi 1.7.4 2021-08-25 [1] CRAN (R 4.1.0)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.1.0)
## testthat 3.0.4 2021-07-01 [1] CRAN (R 4.1.0)
## tibble * 3.1.4 2021-08-25 [1] CRAN (R 4.1.0)
## tidyr * 1.1.3 2021-03-03 [1] CRAN (R 4.1.0)
## tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.1.0)
## tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.1.0)
## tzdb 0.1.2 2021-07-20 [1] CRAN (R 4.1.0)
## usethis 2.0.1 2021-02-10 [1] CRAN (R 4.1.0)
## utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.0)
## vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.1.0)
## vroom 1.5.5 2021-09-14 [1] CRAN (R 4.1.0)
## withr 2.4.2 2021-04-18 [1] CRAN (R 4.1.0)
## xfun 0.26 2021-09-14 [1] CRAN (R 4.1.0)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 4.1.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.1.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library