This is code accompanying an article about Data Storytelling in the German IT magazine iX 1/2022. The figures in this article were created as follows.

Note: Because the data is partly from a German source, the data description is in German as well.

iX 1/2022



library(readr)
library(tidyverse)
library(stringr)
library(lubridate)

Set colorblind-friendly palette

cbp2 <- c("#000000", "#E69F00", "#56B4E9", "#009E73",
          "#999999", "#0072B2", "#D55E00", "#CC79A7")

ggplot <- function(...) ggplot2::ggplot(...) + 
  scale_color_manual(values = cbp2) +
  scale_fill_manual(values = cbp2) + # note: needs to be overridden when using continuous color scales
  theme_bw()
library(plotrix)
sliceValues <- rep(10, 8) # each slice value=10 for proportionate slices
(
  p <- pie3D(sliceValues, 
      explode=0, 
      theta = 1.2, 
      col = cbp2, 
      labels = cbp2, 
      labelcex = 0.9,
      shade = 0.6,
      main = "Colorblind\nfriendly palette")
)

## [1] 0.3926991 1.1780972 1.9634954 2.7488936 3.5342917 4.3196899 5.1050881
## [8] 5.8904862

Data

RKI

Because the original data file is in Excel (.xlsx) format, I exported worksheets with data of interest as .csv. Alternatively, you could use the xlsx package to directly read in .xlsx files.

You can directly download the csv files I used from Github.

file_fz <- "fallzahlen.csv"
file_sti <- "siebentageinzidenz.csv"

data_fz <- read_csv2(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_fz)) %>%
  gather("jahr_woche", "fallzahl", -Altersgruppe)

data_fz_sti <- read_csv2(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_sti)) %>%
  gather("jahr_woche", "siebentageinzidenz", -Altersgruppe) %>%
  full_join(data_fz, by = c("Altersgruppe", "jahr_woche")) %>%
  mutate(Jahr = as.numeric(str_extract(jahr_woche, "^.*(?=(_))")),
         Woche = as.numeric(str_remove(jahr_woche, "^(.*?_)")),
         fallzahl = as.numeric(fallzahl)) %>%
  mutate(Datum = paste0(Jahr, "-W", Woche, "-1"),
         Datum = make_datetime(year = Jahr) + weeks(Woche)) %>%
  select(-jahr_woche) %>%
  select(Datum, Jahr, Woche, Altersgruppe, everything()) %>%
  gather("x", "y", siebentageinzidenz:fallzahl)

data_fz_sti %>%
  head()
## # A tibble: 6 × 6
##   Datum                Jahr Woche Altersgruppe x                      y
##   <dttm>              <dbl> <dbl> <chr>        <chr>              <dbl>
## 1 2020-03-11 00:00:00  2020    10 Gesamt       siebentageinzidenz  1.08
## 2 2020-03-11 00:00:00  2020    10 90           siebentageinzidenz  0.12
## 3 2020-03-11 00:00:00  2020    10 85 - 89      siebentageinzidenz  0.38
## 4 2020-03-11 00:00:00  2020    10 80 - 84      siebentageinzidenz  0.36
## 5 2020-03-11 00:00:00  2020    10 75 - 79      siebentageinzidenz  0.62
## 6 2020-03-11 00:00:00  2020    10 70 - 74      siebentageinzidenz  0.3

Stand: 08.09.2021 07:09:21

Die dem RKI übermittelten COVID-19-Fälle nach Meldewoche und Geschlecht sowie Anteile mit für COVID-19 relevanten Symptomen, Anteile Hospitalisierter und Verstorbener für die Meldewochen KW 10 – 53/2020 und KW 01 - 35/2021

Altersmedian/-mittelwert für Hospitalisierte, Patienten auf Intensivstation und Verstorbene von an das RKI übermittelten COVID-19-Fällen für die Meldewochen KW 10 – 53/2020 und KW 01 - 35/2021

file_ka <- "klinische_aspekte.csv"
file_am <- "altersmedian.csv"

data_ka <- read_csv2(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_ka))
data_am <- read_delim(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_am), delim = ";", locale = locale(decimal_mark = ".")) %>%
  select(-'Fälle gesamt') %>%
  full_join(data_ka, by = c("Meldejahr", "Meldewoche" = "MW")) %>%
  mutate(Datum = paste0(Meldejahr, "-W", Meldewoche, "-1"),
         Datum = make_datetime(year = Meldejahr) + weeks(Meldewoche)) %>%
  select(-'Anteil keine, bzw. keine für COVID-19 bedeutsamen Symptome',
         -'Anteil der Hospitalisierten bei Fällen mit Angabe zur Hospitalisation') %>%
  select(Datum, Meldejahr, Meldewoche, everything()) %>%
  mutate(Männer = as.numeric(gsub("[^0-9.]", "",  Männer)),
         Frauen = as.numeric(gsub("[^0-9.]", "",  Frauen)),
         `Anteil Verstorben` = gsub(",", ".", `Anteil Verstorben`),
         `Anteil Verstorben` = as.numeric(gsub("[^0-9.]", "",  `Anteil Verstorben`))) %>%
  gather("x", "y", Alle_Altersmedian:'Anteil Verstorben') %>%
  rename(Jahr = Meldejahr, Woche = Meldewoche)

data_am %>%
  head()
## # A tibble: 6 × 5
##   Datum                Jahr Woche x                     y
##   <dttm>              <dbl> <dbl> <chr>             <dbl>
## 1 2020-03-11 00:00:00  2020    10 Alle_Altersmedian    45
## 2 2020-03-18 00:00:00  2020    11 Alle_Altersmedian    47
## 3 2020-03-25 00:00:00  2020    12 Alle_Altersmedian    47
## 4 2020-04-01 00:00:00  2020    13 Alle_Altersmedian    49
## 5 2020-04-08 00:00:00  2020    14 Alle_Altersmedian    52
## 6 2020-04-15 00:00:00  2020    15 Alle_Altersmedian    52

Vaccination data

https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv

file_vacc <- "vaccination_ger.csv"
data_vacc <- read_delim(paste0("/Users/shiringlander/Documents/Github/data_storytelling_ix2021/data/", file_vacc), delim = ",", locale = locale(decimal_mark = ".")) %>%
  gather("x", "y", total_vaccinations:daily_vaccinations_per_million) %>%
  select(-location, -iso_code)

Combination

data_gesamt <- data_fz_sti %>%
  filter(Altersgruppe == "Gesamt") %>%
  select(-Altersgruppe) %>%
  unique() %>%
  bind_rows(data_am) %>%
  select(-Jahr,-Woche) %>%
  rename(date = "Datum") %>%
  bind_rows(data_vacc)

Plots

data_gesamt %>%
  select(x) %>%
  filter(!duplicated(x))
## # A tibble: 30 × 1
##    x                      
##    <chr>                  
##  1 siebentageinzidenz     
##  2 fallzahl               
##  3 Alle_Altersmedian      
##  4 Hosp_Altersmedian      
##  5 ITS_Altersmedian       
##  6 Verstorben_Altersmedian
##  7 Alle_MW_Alter          
##  8 Hosp_MW_Alter          
##  9 ITS_MW_Alter           
## 10 Verst_MW_Alter         
## # … with 20 more rows
data_gesamt_perc_vacc <- data_gesamt %>%
  filter(grepl("people_vaccinated$", x) | grepl("people_fully_vaccinated$", x)) %>%
  mutate(y_perc = y / 83020000 * 100)
first_vac <- data_gesamt_perc_vacc %>%
  slice(1) %>%
  mutate(label = "Erste Impfungen")

bigger_ten <- data_gesamt_perc_vacc %>%
  group_by(x) %>%
  filter(y_perc > 10) %>%
  slice(1) %>%
  mutate(label = "> 10%")

bigger_25 <- data_gesamt_perc_vacc %>%
  group_by(x) %>%
  filter(y_perc > 25) %>%
  slice(1) %>%
  mutate(label = "> 25%")

bigger_50 <- data_gesamt_perc_vacc %>%
  group_by(x) %>%
  filter(y_perc > 50) %>%
  slice(1) %>%
  mutate(label = "> 50%")

last_entry <- data_gesamt_perc_vacc %>%
  group_by(x) %>%
  top_n(1) %>%
  mutate(label = paste0("letzter Eintrag ", round(y_perc, digits = 2), "%"))

vacc_data <- first_vac %>%
  bind_rows(bigger_ten) %>%
  bind_rows(bigger_25) %>%
  bind_rows(bigger_50) %>%
  bind_rows(last_entry) %>%
  mutate(x = ifelse(x == "people_vaccinated", "Prozent Geimpft", "Prozent Zweitimpfung"))
# Altersmedian & MW Alter
data_gesamt_prep <- data_gesamt %>%
  filter(grepl("median", x) | grepl("MW", x)) %>%
  mutate(stat = ifelse(grepl("median", x), "Median", "Mittelwert"),
         x = gsub("_", " ", x),
         x = gsub("Altersmedian|MW", "", x),
         x = gsub("Alter", "", x),
         x = gsub(" ", "", x),
         x = gsub("Verst$", "Verstorben", x),
         x = gsub("Verstorben", "Gestorben", x))
data_gesamt <- data_gesamt %>%
  mutate(date = as.Date(date, "%Y-%m-%d", tz = "CEST"))

data_gesamt_prep <- data_gesamt_prep %>%
  mutate(date = as.Date(date, "%Y-%m-%d", tz = "CEST"))

vacc_data <- vacc_data %>%
  mutate(date = as.Date(date, "%Y-%m-%d", tz = "CEST"))
ggplot() +
  geom_vline(data = vacc_data,
             aes(xintercept = date, color = x)) +
  geom_line(data = data_gesamt_prep,
            aes(x = date, y = y, color = x, linetype = stat)) +
  scale_x_date(date_breaks = "1 month",
               date_minor_breaks = "1 week") +
  ggrepel::geom_label_repel(data = vacc_data,
             aes(x = date, y = 90, label = label, color = x),
             arrow = arrow(length = unit(0.02, "npc"))) +
  theme(legend.position="top") +
  labs(x = "Datum",
       y = "Alter in Jahren",
       linetype = "Statistik",
       color = "Einteilung der\nErkrankten/Geimpften",
        title = "Unterscheiden sich das Durchschnittsalter aller an Covid-19 erkrankter von Hospitalisierten und Verstorbenen?\nUnd welchen Einfluss hat das Impfgeschehen darauf?", 
        #subtitle = "",
        caption = "Datenquellen (Stand: 08.09.2021 07:09:21):
       Robert-Koch-Institut: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Klinische_Aspekte.html &
       https://ourworldindata.org/: https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")

ggplot() +
  geom_vline(data = vacc_data %>% rename(xx = x), aes(xintercept = date, color = xx)) +
  geom_line(data = data_gesamt %>% filter(grepl("Fälle gesamt", x) | grepl("Anteil Verstorben", x)),
    aes(x = date, y = y, color = x)) +
  facet_wrap(x ~ ., scales = "free", nrow = 2, 
                strip.position = "left", 
                labeller = as_labeller(c('Fälle gesamt' = "Fälle gesamt", 'Anteil Verstorben' = "Anteil Verstorbener") ) ) +
  scale_x_date(date_breaks = "1 month",
               date_minor_breaks = "1 week") +
  ggrepel::geom_label_repel(data = vacc_data %>% rename(xx = x),
             aes(x = date, y = 7, label = label, color = xx),
             arrow = arrow(length = unit(0.02, "npc"))) +
  theme(legend.position="top") +
  labs(x = "Datum",
       y = "",
       color = "",
        title = "Hat das Impfgeschehen Einfluss auf die Fallzahlen?", 
        #subtitle = "",
        caption = "Datenquellen (Stand: 08.09.2021 07:09:21):
       Robert-Koch-Institut: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Klinische_Aspekte.html &
       https://ourworldindata.org/: https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")

ggplot() +
  geom_vline(data = vacc_data %>% rename(xx = x), aes(xintercept = date, color = xx)) +
  geom_line(data = data_gesamt %>% filter(grepl("Frauen", x)),
    aes(x = date, y = y, color = x)) +
  scale_x_date(date_breaks = "1 month",
               date_minor_breaks = "1 week") +
  ggrepel::geom_label_repel(data = vacc_data %>% rename(xx = x),
             aes(x = date, y = 56, label = label, color = xx),
             arrow = arrow(length = unit(0.02, "npc"))) +
  theme(legend.position="top") +
  labs(x = "Datum",
       y = "Prozentanteil weiblicher Infektionsfälle",
       color = "",
        title = "Hat das Impfgeschehen Einfluss auf den Anteil von infizierten Männern und Frauen?", 
        #subtitle = "",
        caption = "Datenquellen (Stand: 08.09.2021 07:09:21):
       Robert-Koch-Institut: https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Klinische_Aspekte.html &
       https://ourworldindata.org/: https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")


devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 4.1.1 (2021-08-10)
##  os       macOS Big Sur 10.16         
##  system   x86_64, darwin17.0          
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       Europe/Berlin               
##  date     2022-01-10                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version date       lib source        
##  assertthat    0.2.1   2019-03-21 [1] CRAN (R 4.1.0)
##  backports     1.2.1   2020-12-09 [1] CRAN (R 4.1.0)
##  bit           4.0.4   2020-08-04 [1] CRAN (R 4.1.0)
##  bit64         4.0.5   2020-08-30 [1] CRAN (R 4.1.0)
##  blogdown      1.5     2021-09-02 [1] CRAN (R 4.1.0)
##  bookdown      0.24    2021-09-02 [1] CRAN (R 4.1.0)
##  broom         0.7.9   2021-07-27 [1] CRAN (R 4.1.0)
##  bslib         0.3.0   2021-09-02 [1] CRAN (R 4.1.0)
##  cachem        1.0.6   2021-08-19 [1] CRAN (R 4.1.0)
##  callr         3.7.0   2021-04-20 [1] CRAN (R 4.1.0)
##  cellranger    1.1.0   2016-07-27 [1] CRAN (R 4.1.0)
##  cli           3.0.1   2021-07-17 [1] CRAN (R 4.1.0)
##  colorspace    2.0-2   2021-06-24 [1] CRAN (R 4.1.0)
##  crayon        1.4.1   2021-02-08 [1] CRAN (R 4.1.0)
##  DBI           1.1.1   2021-01-15 [1] CRAN (R 4.1.0)
##  dbplyr        2.1.1   2021-04-06 [1] CRAN (R 4.1.0)
##  desc          1.3.0   2021-03-05 [1] CRAN (R 4.1.0)
##  devtools      2.4.2   2021-06-07 [1] CRAN (R 4.1.0)
##  digest        0.6.28  2021-09-23 [1] CRAN (R 4.1.1)
##  dplyr       * 1.0.7   2021-06-18 [1] CRAN (R 4.1.0)
##  ellipsis      0.3.2   2021-04-29 [1] CRAN (R 4.1.0)
##  evaluate      0.14    2019-05-28 [1] CRAN (R 4.1.0)
##  fansi         0.5.0   2021-05-25 [1] CRAN (R 4.1.0)
##  farver        2.1.0   2021-02-28 [1] CRAN (R 4.1.0)
##  fastmap       1.1.0   2021-01-25 [1] CRAN (R 4.1.0)
##  forcats     * 0.5.1   2021-01-27 [1] CRAN (R 4.1.0)
##  fs            1.5.0   2020-07-31 [1] CRAN (R 4.1.0)
##  generics      0.1.0   2020-10-31 [1] CRAN (R 4.1.0)
##  ggplot2     * 3.3.5   2021-06-25 [1] CRAN (R 4.1.0)
##  ggrepel       0.9.1   2021-01-15 [1] CRAN (R 4.1.0)
##  glue          1.4.2   2020-08-27 [1] CRAN (R 4.1.0)
##  gtable        0.3.0   2019-03-25 [1] CRAN (R 4.1.0)
##  haven         2.4.3   2021-08-04 [1] CRAN (R 4.1.0)
##  highr         0.9     2021-04-16 [1] CRAN (R 4.1.0)
##  hms           1.1.0   2021-05-17 [1] CRAN (R 4.1.0)
##  htmltools     0.5.2   2021-08-25 [1] CRAN (R 4.1.0)
##  httr          1.4.2   2020-07-20 [1] CRAN (R 4.1.0)
##  jquerylib     0.1.4   2021-04-26 [1] CRAN (R 4.1.0)
##  jsonlite      1.7.2   2020-12-09 [1] CRAN (R 4.1.0)
##  knitr         1.34    2021-09-09 [1] CRAN (R 4.1.0)
##  labeling      0.4.2   2020-10-20 [1] CRAN (R 4.1.0)
##  lifecycle     1.0.1   2021-09-24 [1] CRAN (R 4.1.1)
##  lubridate   * 1.7.10  2021-02-26 [1] CRAN (R 4.1.0)
##  magrittr      2.0.1   2020-11-17 [1] CRAN (R 4.1.0)
##  memoise       2.0.0   2021-01-26 [1] CRAN (R 4.1.0)
##  modelr        0.1.8   2020-05-19 [1] CRAN (R 4.1.0)
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 4.1.0)
##  pillar        1.6.2   2021-07-29 [1] CRAN (R 4.1.0)
##  pkgbuild      1.2.0   2020-12-15 [1] CRAN (R 4.1.0)
##  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.1.0)
##  pkgload       1.2.2   2021-09-11 [1] CRAN (R 4.1.0)
##  plotrix     * 3.8-2   2021-09-08 [1] CRAN (R 4.1.0)
##  prettyunits   1.1.1   2020-01-24 [1] CRAN (R 4.1.0)
##  processx      3.5.2   2021-04-30 [1] CRAN (R 4.1.0)
##  ps            1.6.0   2021-02-28 [1] CRAN (R 4.1.0)
##  purrr       * 0.3.4   2020-04-17 [1] CRAN (R 4.1.0)
##  R6            2.5.1   2021-08-19 [1] CRAN (R 4.1.0)
##  Rcpp          1.0.7   2021-07-07 [1] CRAN (R 4.1.0)
##  readr       * 2.0.1   2021-08-10 [1] CRAN (R 4.1.0)
##  readxl        1.3.1   2019-03-13 [1] CRAN (R 4.1.0)
##  remotes       2.4.0   2021-06-02 [1] CRAN (R 4.1.0)
##  reprex        2.0.1   2021-08-05 [1] CRAN (R 4.1.0)
##  rlang         0.4.11  2021-04-30 [1] CRAN (R 4.1.0)
##  rmarkdown     2.11    2021-09-14 [1] CRAN (R 4.1.0)
##  rprojroot     2.0.2   2020-11-15 [1] CRAN (R 4.1.0)
##  rstudioapi    0.13    2020-11-12 [1] CRAN (R 4.1.0)
##  rvest         1.0.1   2021-07-26 [1] CRAN (R 4.1.0)
##  sass          0.4.0   2021-05-12 [1] CRAN (R 4.1.0)
##  scales        1.1.1   2020-05-11 [1] CRAN (R 4.1.0)
##  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 4.1.0)
##  stringi       1.7.4   2021-08-25 [1] CRAN (R 4.1.0)
##  stringr     * 1.4.0   2019-02-10 [1] CRAN (R 4.1.0)
##  testthat      3.0.4   2021-07-01 [1] CRAN (R 4.1.0)
##  tibble      * 3.1.4   2021-08-25 [1] CRAN (R 4.1.0)
##  tidyr       * 1.1.3   2021-03-03 [1] CRAN (R 4.1.0)
##  tidyselect    1.1.1   2021-04-30 [1] CRAN (R 4.1.0)
##  tidyverse   * 1.3.1   2021-04-15 [1] CRAN (R 4.1.0)
##  tzdb          0.1.2   2021-07-20 [1] CRAN (R 4.1.0)
##  usethis       2.0.1   2021-02-10 [1] CRAN (R 4.1.0)
##  utf8          1.2.2   2021-07-24 [1] CRAN (R 4.1.0)
##  vctrs         0.3.8   2021-04-29 [1] CRAN (R 4.1.0)
##  vroom         1.5.5   2021-09-14 [1] CRAN (R 4.1.0)
##  withr         2.4.2   2021-04-18 [1] CRAN (R 4.1.0)
##  xfun          0.26    2021-09-14 [1] CRAN (R 4.1.0)
##  xml2          1.3.2   2020-04-23 [1] CRAN (R 4.1.0)
##  yaml          2.2.1   2020-02-01 [1] CRAN (R 4.1.0)
## 
## [1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library