## ----setup2, message = FALSE, warning = FALSE, results = 'hide'---------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(baselinenowcast)
library(ggplot2)
library(dplyr)
library(tidyr)

## -----------------------------------------------------------------------------
nowcast_date <- "2021-08-01"
eval_date <- "2021-10-01"

target_data <- filter(
  germany_covid19_hosp,
  location == "DE", age_group == "00+",
  report_date <= eval_date,
  reference_date <= nowcast_date
)

## -----------------------------------------------------------------------------
latest_data <- target_data |>
  group_by(reference_date) |>
  summarise(final_count = sum(count))

## -----------------------------------------------------------------------------
observed_data <- filter(
  target_data,
  report_date <= nowcast_date
)
head(observed_data)

## -----------------------------------------------------------------------------
initial_reports <- observed_data |>
  group_by(reference_date) |>
  summarise(initial_count = sum(count))

## -----------------------------------------------------------------------------
max_delay <- 30

## -----------------------------------------------------------------------------
rep_tri_full <- as_reporting_triangle(observed_data)

## -----------------------------------------------------------------------------
rep_tri_full

## -----------------------------------------------------------------------------
rep_tri <- truncate_to_delay(rep_tri_full, max_delay = max_delay)
rep_tri

## -----------------------------------------------------------------------------
scale_factor <- 3
prop_delay <- 0.5
tv <- allocate_reference_times(
  reporting_triangle = rep_tri,
  scale_factor = scale_factor,
  prop_delay = prop_delay
)
n_history_delay <- tv$n_history_delay
n_retrospective_nowcasts <- tv$n_retrospective_nowcasts

## -----------------------------------------------------------------------------
delay_pmf <- estimate_delay(
  reporting_triangle = rep_tri,
  n = n_history_delay
)

## -----------------------------------------------------------------------------
delay_df <- data.frame(
  delay = 0:(length(delay_pmf) - 1),
  pmf = delay_pmf
)

delay_cdf_plot <- ggplot(delay_df) +
  geom_line(aes(x = delay, y = cumsum(pmf))) +
  xlab("Delay") +
  ylab("Cumulative proportion reported") +
  ggtitle("Empirical point estimate of cumulative proportion reported by delay") + # nolint
  theme_bw()

delay_pmf_plot <- ggplot(delay_df) +
  geom_line(aes(x = delay, y = pmf)) +
  xlab("Delay") +
  ylab("Proportion reported") +
  ggtitle("Empirical point estimate of proportion reported by delay") +
  theme_bw()

## -----------------------------------------------------------------------------
delay_cdf_plot
delay_pmf_plot

## -----------------------------------------------------------------------------
point_nowcast_matrix <- apply_delay(
  reporting_triangle = rep_tri,
  delay_pmf = delay_pmf
)

## -----------------------------------------------------------------------------
initial_reports_labeled <- initial_reports |>
  mutate(type = "Initial real-time") |>
  rename(count = initial_count)
point_nowcast_df <- latest_data |>
  rename(count = final_count) |>
  mutate(nowcast = rowSums(point_nowcast_matrix)) |>
  pivot_longer(
    cols = c(count, nowcast),
    names_to = "type",
    values_to = "count"
  ) |>
  mutate(type = case_when(
    type == "count" ~ "Final observed data",
    type == "nowcast" ~ "Point nowcast",
    TRUE ~ type
  )) |>
  bind_rows(
    initial_reports_labeled
  )

# Create plot with data type as a variable
plot_pt_nowcast <- ggplot(point_nowcast_df, aes(
  x = reference_date,
  y = count,
  color = type
)) +
  geom_line() +
  scale_color_manual(values = c(
    "Initial reports" = "darkred",
    "Final observed data" = "black",
    "Point nowcast" = "darkblue"
  )) +
  theme_bw() +
  xlab("Reference date") +
  ylab("Confirmed admissions") +
  scale_y_continuous(trans = "log10") +
  ggtitle("Comparing initial reports, nowcasted, and later observed cases") +
  theme(legend.position = "bottom") +
  labs(color = "Type")

## -----------------------------------------------------------------------------
plot_pt_nowcast

## -----------------------------------------------------------------------------
trunc_rep_tri_list <- truncate_to_rows(
  rep_tri,
  n = n_retrospective_nowcasts
)

## -----------------------------------------------------------------------------
retro_rep_tri_list <- apply_reporting_structures(trunc_rep_tri_list)

## -----------------------------------------------------------------------------
retro_pt_nowcast_mat_list <- estimate_and_apply_delays(
  retro_reporting_triangles = retro_rep_tri_list,
  n = n_history_delay
)

## -----------------------------------------------------------------------------
disp_params <- estimate_uncertainty(
  point_nowcast_matrices = retro_pt_nowcast_mat_list,
  truncated_reporting_triangles = trunc_rep_tri_list,
  retro_reporting_triangles = retro_rep_tri_list,
  n = n_retrospective_nowcasts
)

## -----------------------------------------------------------------------------
nowcast_draws_df <- sample_nowcasts(
  point_nowcast_matrix,
  rep_tri,
  uncertainty_params = disp_params,
  draws = 100
)

head(nowcast_draws_df)

## -----------------------------------------------------------------------------
obs_with_nowcast_draws_df <- nowcast_draws_df |>
  left_join(latest_data, by = "reference_date") |>
  left_join(initial_reports, by = "reference_date")
head(obs_with_nowcast_draws_df)

## -----------------------------------------------------------------------------
combined_data <- obs_with_nowcast_draws_df |>
  select(reference_date, initial_count, final_count) |>
  distinct() |>
  pivot_longer(
    cols = c(initial_count, final_count),
    names_to = "type",
    values_to = "count"
  ) |>
  mutate(type = case_when(
    type == "initial_count" ~ "Initial reports",
    type == "final_count" ~ "Final observed data"
  ))

## -----------------------------------------------------------------------------
combined_data <- obs_with_nowcast_draws_df |>
  select(reference_date, initial_count, final_count) |>
  distinct() |>
  pivot_longer(
    cols = c(initial_count, final_count),
    names_to = "type",
    values_to = "count"
  ) |>
  mutate(type = case_when(
    type == "initial_count" ~ "Initial reports",
    type == "final_count" ~ "Final observed data"
  ))

# Plot with draws for nowcast only
plot_prob_nowcast <- ggplot() +
  # Add nowcast draws as thin gray lines
  geom_line(
    data = obs_with_nowcast_draws_df,
    aes(
      x = reference_date, y = pred_count, group = draw,
      color = "Nowcast draw", linewidth = "Nowcast draw"
    )
  ) +
  # Add observed data and final data once
  geom_line(
    data = combined_data,
    aes(
      x = reference_date,
      y = count,
      color = type,
      linewidth = type
    )
  ) +
  theme_bw() +
  scale_color_manual(
    values = c(
      "Nowcast draw" = "gray",
      "Initial reports" = "darkred",
      "Final observed data" = "black"
    ),
    name = ""
  ) +
  scale_linewidth_manual(
    values = c(
      "Nowcast draw" = 0.2,
      "Initial reports" = 1,
      "Final observed data" = 1
    ),
    name = ""
  ) +
  scale_y_continuous(trans = "log10") +
  xlab("Reference date") +
  ylab("Hospital admissions") +
  theme(legend.position = "bottom") +
  ggtitle("Comparison of admissions as of the nowcast date, later observed counts, \n and probabilistic nowcasted counts") # nolint

## -----------------------------------------------------------------------------
plot_prob_nowcast

