Skip to content

Instantly share code, notes, and snippets.

@mhoehle
Last active July 4, 2023 08:48
Show Gist options
  • Select an option

  • Save mhoehle/11391bd19be82307547d5121ee70664d to your computer and use it in GitHub Desktop.

Select an option

Save mhoehle/11391bd19be82307547d5121ee70664d to your computer and use it in GitHub Desktop.
Statistical modelling of the 3x3 WR (single)
Date WR Cuber Nationality Event Date_numeric
2023-06-11 3.13 Max Park United States Pride in Long Beach 2023 19519
2018-11-24 3.47 Yusheng Du (杜宇生) China Wuhu Open 2018 17859
2018-05-06 4.22 Feliks Zemdegs Australia Cube for Cambodia 2018 17657
2018-01-27 4.59 Feliks Zemdegs Australia Hobart Summer 2018 17558
2017-10-28 4.59 SeungBeom Cho (조승범) Republic of Korea ChicaGhosts 2017 17467
2017-09-02 4.69 Patrick Ponce United States Rally In The Valley 2017 17411
2016-12-11 4.73 Feliks Zemdegs Australia POPS Open 2016 17146
2016-11-05 4.74 Mats Valk Netherlands Jawa Timur Open 2016 17110
2015-11-21 4.9 Lucas Etter United States River Hill Fall 2015 16760
2015-04-25 5.25 Collin Burns United States Doylestown Spring 2015 16550
2013-03-02 5.55 Mats Valk Netherlands Zonhoven Open 2013 15766
2011-06-25 5.66 Feliks Zemdegs Australia Melbourne Winter Open 2011 15150
2011-06-25 6.18 Feliks Zemdegs Australia Melbourne Winter Open 2011 15150
2011-05-07 6.24 Feliks Zemdegs Australia Kubaroo Open 2011 15101
2011-05-07 6.65 Feliks Zemdegs Australia Kubaroo Open 2011 15101
2011-01-29 6.65 Feliks Zemdegs Australia Melbourne Summer Open 2011 15003
2010-11-13 6.77 Feliks Zemdegs Australia Melbourne Cube Day 2010 14926
2010-11-13 7.03 Feliks Zemdegs Australia Melbourne Cube Day 2010 14926
2008-07-12 7.08 Erik Akkersdijk Netherlands Czech Open 2008 14072
2008-05-05 8.72 Yu Nakajima (中島悠) Japan Kashiwa Open 2008 14004
2008-05-05 8.72 Yu Nakajima (中島悠) Japan Kashiwa Open 2008 14004
2008-02-23 9.18 Edouard Chambon France Murcia Open 2008 13932
2007-11-24 9.55 Ron van Bruchem Netherlands Dutch Championship 2007 13841
2007-10-13 9.77 Erik Akkersdijk Netherlands Dutch Open 2007 13799
2007-05-05 9.86 Thibaut Jacquinot France Spanish Open 2007 13638
2007-02-24 10.36 Edouard Chambon France Belgian Open 2007 13568
2006-08-04 10.48 Toby Mao (毛台立) United States US Nationals 2006 13364
2006-01-14 11.13 Leyan Lo United States Caltech Winter competition 2006 13162
2005-10-16 11.75 Jean Pons France Dutch Open 2005 13072
# We use statistical modelling to describe the development of
# the 3x3 world record (single) over time.
# Data from the WCA database are used.
#
# This post was inspired by a recent analysis made
# in a Youtube video by Jperm - see https://youtu.be/B9MKizs9PUw?t=451
#
# Author: Michael Höhle (https://math-inf.uni-greifswald.de/en/michael-hoehle/)
# Date: 2023-07-04
library(tidyverse)
# The propagate package will be used later in the analysis
# install.packages("propagate")
# Data used by JPerm
wr <- tibble(
date = ymd( c("2018-11-24", "2018-05-06", "2018-01-27", "2017-10-28", "2017-09-02", "2016-12-11", "2016-11-05", "2015-11-21", "2015-04-25")),
wr = c(3.47, 4.22, 4.59, 4.59, 4.69, 4.73, 4.74, 4.90, 5.25)
)
# Write to file in order to import into Google Spreadsheets
# see also https://docs.google.com/spreadsheets/d/1X516G6a3Om4GxqfuFLKMryUVRx4gUKrBfYMApZ_iqug/edit#gid=827209089
readr::write_csv(wr, file="wr3x3.csv")
# Show the data and an initial fit
ggplot(wr, aes(x=date, y=wr)) + geom_line() + geom_point() +
scale_y_continuous(limits=c(0,NA)) +
geom_smooth(method = "glm", formula = y~x,
method.args = list(family = gaussian(link = 'log')))
#Fit linear model using least squares
m <- lm(log(wr) ~ 1 + I(date - min(date)), data=wr)
coef(m)
min_date <- min(wr$date)
# \hat{\sigma}
summary(m)$sigma
sqrt(sum(m$residuals^2)/(nrow(wr)-2))
# Date when the median hits a certain value
date_predict <- function(wr_time) {
as.Date(min_date + (log(wr_time) - coef(m)[1]) / coef(m)[2], origin=ymd("1970-01-01"))
}
# Get some predictions from the log-linear model
date_predict(3.47)
date_predict(3.13)
# Read the 2005-2023 WCA data available from https://www.worldcubeassociation.org/results/records?show=history
# wr3x3 <- readxl::read_xlsx("wr3x3.xlsx", sheet="WR3x3") %>%
# mutate(Date = mdy(Date), WR = as.numeric(WR)) %>%
# filter(year(Date) >= 2005) %>%
# mutate(Date_numeric = as.numeric(Date)) %>%
# select(-`...3`)
# # Export as CSV
# readr::write_csv(wr3x3, file="wr3x3_all.csv")
# Load WCA data
wr3x3 <- readr::read_csv("wr3x3_all.csv")
# Initial descriptive plot
ggplot(wr3x3, aes(x=Date, y=WR)) + geom_step() +
geom_point() +
scale_y_continuous(limit=c(0,NA), breaks=seq(0,23,by=2)) +
theme_minimal()
# Fit the model to these data
m_lm <- lm( log(WR) ~ 1 + I(Date - min(Date)), data=wr3x3)
coef(m_lm)
# Log-linear model fit to the data
ggplot(wr3x3, aes(x=Date, y=WR)) + geom_step() +
geom_point() +
scale_y_continuous(limit=c(0,NA), breaks=seq(0,23,by=2)) +
theme_minimal() +
geom_smooth(method = "glm", formula = y~x,
method.args = list(family = gaussian(link = 'log')))
# Gompertz model start values
Asym <- 4.5; b2 <- 2.3; b3 <- 0.7
# Fit Gompertz model
m_gomp <- nls(WR ~ SSgompertz(as.numeric(Date), Asym, b2, b3), data = wr3x3)
m_gomp
## Make a tibble with the predictions
wr3x3_pred <- bind_rows(
wr3x3 %>% select(Date, WR),
tibble(Date = seq(ymd("2023-01-01"),ymd("2030-01-01"), by="3 month"), WR=NA)
) %>% arrange(Date) %>%
mutate(#gompertz = predict(m_gomp, newdata=., interval = "prediction"), #Note: the interval argument is ignored for nls. :-() We need to add that manually
lm = exp(predict(m_lm, newdata=., interval = "prediction")),
future = Date > Sys.Date())
#head(wr3x3_pred)
# Use simulation based PIs from the propagate package for the gompertz model.
m_gomp_pred <- propagate::predictNLS(m_gomp, newdata=wr3x3_pred %>% select(Date), interval = "prediction")
pi <- m_gomp_pred$prop %>% map_df(function(x) x$sim[c("Median","2.5%","97.5%")]) %>%
setNames(c('fit', 'lwr', 'upr')) %>%
as.matrix()
wr3x3_pred$gompertz <- pi
wr3x3_pred <- wr3x3_pred %>% unnest(c(lm,gompertz))
#View(wr3x3_pred)
# Convert to long format
wr3x3_long <- wr3x3_pred %>%
pivot_longer(-c(Date, future), names_to="Model", values_to="WR") %>%
mutate(fit = WR[,1], pi_lower = WR[,2], pi_upper = WR[,3]) %>%
select(-WR)
wr3x3_long
# wr3x3_long %>% filter(Model == "gompertz")
# names(wr3x3_long)
make_poly <- function(model_str) {
p1 <- wr3x3_long %>% filter(Model == model_str) %>% select(Date, future, Model, pi_lower) %>% rename(fit = pi_lower)
p2 <- wr3x3_long %>% filter(Model == model_str) %>% arrange(desc(Date)) %>% select(Date, future, Model, pi_upper) %>% rename(fit = pi_upper)
return(bind_rows(p1, p2))
}
lm_poly <- make_poly("lm")
gompertz_poly <- make_poly("gompertz")
polys <- bind_rows(lm_poly, gompertz_poly)
# Show the plot
ggplot(wr3x3_long %>% filter(Model=="WR"), aes(x=Date, y=fit, color=Model)) + geom_step() +
geom_polygon(data=polys, aes(fill=Model), alpha=0.1, color=NA) +
geom_point() +
geom_line(data = wr3x3_long %>% filter(Model !="WR"), aes(y = fit, linetype=future)) +
scale_y_continuous(limit=c(0,NA), breaks=seq(0,23,by=2)) +
theme_minimal() +
ylab("WR 3x3 (seconds)") +
guides(linetype = FALSE) +
theme(legend.position="bottom") +
scale_fill_discrete(limits = c('lm', 'gompertz')) +
scale_color_discrete(limits = c('lm', 'gompertz'))
# Store image to disk.
ggsave(file="wr3x3.png", dpi=300, width=8, height=4, bg="white")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment