library(downloadthis)
library(dplyr)
library(ggplot2)
library(tidyr)
c("M1", "M2", "W1", "W2") events =
purrr::map_dfr(
tbl_predictions =
events,# read in the predictions for each event
~readr::read_csv(
::here("analyses", paste0("2024-02-16_Pem-", .x, ".csv")),
herecol_types = readr::cols(
Competitor = readr::col_character(),
.default = readr::col_double()
),|>
) rename(crew_name = Competitor) |>
# pivot to long format for easier manipulation
pivot_longer(
cols = !c(crew_name),
names_to = "round",
values_to = "prob"
),.id = "event"
|>
) mutate(round = as.integer(round)) |>
# remove mistakes in output and byes
filter(round < 4, prob < 1)
tbl_predictions |>
tbl_cond_probs = # calculate the conditional probability
# p(win round A | compete in round A) = p(win round A) / p(compete in round A)
# NB: p(compete in round A) = 1 for the first round
group_by(crew_name) |>
arrange(round, .by_group = TRUE) |>
mutate(
# probability 0 breaks things, make it up to
# Monte Carlo error
prob = pmax(prob, 1/1000),
cond_prob = prob / c(lead(prob)[-n()], 1),
|>
) ungroup() |>
assertr::assert(assertr::not_na, cond_prob)
|>
tbl_cond_probs DT::datatable()
# results contain the round each crew lost, or 0 if they won the event
readr::read_csv(
tbl_results =::here("data/results/pem_regatta.csv"),
herecol_types = readr::cols(
crew_name = readr::col_character(),
round = readr::col_integer(),
)|>
) rename(last_round = round)
|>
tbl_results DT::datatable()
left_join(
tbl_probs_outcomes =
tbl_cond_probs,
tbl_results,by = "crew_name"
|>
) filter(
>= last_round - 1,
round # exclude match-ups due to LMBC scratching
!= "Girton W1",
crew_name !(crew_name == "Caius W1" & round == 3),
|>
) # set outcome to 1 if the crew progressed past the round,
# 0 if they didn't
mutate(
outcome = (round >= last_round),
brier = (cond_prob - outcome)^2
)
Should be a true and false pair for each race, hence overall 50% of outcomes are true:
|>
tbl_probs_outcomes summarise(
mean(outcome),
.by = event
)
## # A tibble: 4 × 2
## event `mean(outcome)`
## <chr> <dbl>
## 1 4 0.5
## 2 1 0.5
## 3 3 0.5
## 4 2 0.5
|>
tbl_probs_outcomes # bin into five equal size bins based on the conditional probability
# then check the calibration of the bins
mutate(bin = ntile(cond_prob, 5)) |>
summarise(
.by = bin,
x = sum(outcome),
n = n(),
mean_pred = mean(cond_prob),
|>
) # exact 80% confidence intervals for binomial proportions
mutate(
::binom.confint(
binom
x, n,conf.level = 0.8,
methods = "exact"
)|>
) # plot with binomial confidence intervals
ggplot(aes(x = mean_pred, y = mean)) +
geom_point() +
geom_errorbar(
aes(ymin = lower,
ymax = upper),
width = 0
+
) geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
scale_x_continuous(labels = scales::percent, limits = c(0, 1)) +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
labs(
x = "Predicted probability",
y = "Observed probability",
title = "Calibration of predicted probabilities",
subtitle = "Points are the mean observed probability in each bin, with 80% confidence intervals"
+
) theme_minimal()
Mean brier score is 0.1357385 with a standard deviation of 0.1683743.
|>
tbl_probs_outcomes ggplot(aes(brier)) +
geom_histogram(bins = 20) +
labs(
x = "Brier score",
y = "Count",
title = "Distribution of Brier scores"
+
) theme_minimal()
The table below shows the most surprising results, based on the brier score.
|>
tbl_probs_outcomes # calculate the log odds of the conditional probability
arrange(desc(brier)) |>
DT::datatable()
Top 10 are a bit above the rest on how surprising. These matchups were (with winner bolded, grouped by event).
Why did we underestimate Catz? Lets compare their race results to Clare and Emma.
readRDS(here::here("outputs/fit_archive/fit_2024-02-16_1331.rds"))
fit_prev = tar_read(all_data)
tbl_data = fit_prev |>
tbl_adjustment = tidybayes::spread_draws(`r_event:div`[event_div,], `b_releveleventrefEQnewnham_head(.+)`, regex = T) |>
mutate("b_releveleventrefEQnewnham_headnewnham_head" = 0) |>
pivot_longer(
starts_with("b_releveleventrefEQnewnham_head"),
names_prefix = "b_releveleventrefEQnewnham_head",
values_to = "event_adjust",
names_to = "event_adjust_event"
|>
) tidyr::extract(event_div, c("event", "div"), "^(.+)_([0-9]+)$") |>
filter(event == event_adjust_event) |>
replace_na(list(event_adjust = 0)) |>
group_by(event, div) |>
summarise(
adjust_div = mean(`r_event:div`),
adjust_all = mean(`r_event:div` + event_adjust)
)
## `summarise()` has grouped output by 'event'. You can override using the
## `.groups` argument.
|>
tbl_data add_crew_name() |>
filter(crew_name %in% c("St. Catherine's W1", "Clare W1", "Emmanuel W1")) |>
filter(censor == "none") |>
inner_join(tbl_adjustment, by = c("event", "div")) |>
mutate(div = as.integer(div)) |>
transmute(
event,
div,
crew_name,raw_time = time,
raw_split = 500 / speed,
adjust_div_time = distance / exp(log(speed) - adjust_div),
adjust_all_split = 500 / exp(log(speed) - adjust_all),
|>
) arrange(event, adjust_all_split) |>
mutate(across(c(raw_time, raw_split, adjust_div_time, adjust_all_split), display_duration)) |>
DT::datatable()
So Catz did pretty poorly in Head to Head. They were faster at Fairbairns, but we downweight that, and Newnham Head, but were in a fast division.