Last active
July 4, 2023 08:48
-
-
Save mhoehle/11391bd19be82307547d5121ee70664d to your computer and use it in GitHub Desktop.
Statistical modelling of the 3x3 WR (single)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Date | WR | Cuber | Nationality | Event | Date_numeric | |
|---|---|---|---|---|---|---|
| 2023-06-11 | 3.13 | Max Park | United States | Pride in Long Beach 2023 | 19519 | |
| 2018-11-24 | 3.47 | Yusheng Du (杜宇生) | China | Wuhu Open 2018 | 17859 | |
| 2018-05-06 | 4.22 | Feliks Zemdegs | Australia | Cube for Cambodia 2018 | 17657 | |
| 2018-01-27 | 4.59 | Feliks Zemdegs | Australia | Hobart Summer 2018 | 17558 | |
| 2017-10-28 | 4.59 | SeungBeom Cho (조승범) | Republic of Korea | ChicaGhosts 2017 | 17467 | |
| 2017-09-02 | 4.69 | Patrick Ponce | United States | Rally In The Valley 2017 | 17411 | |
| 2016-12-11 | 4.73 | Feliks Zemdegs | Australia | POPS Open 2016 | 17146 | |
| 2016-11-05 | 4.74 | Mats Valk | Netherlands | Jawa Timur Open 2016 | 17110 | |
| 2015-11-21 | 4.9 | Lucas Etter | United States | River Hill Fall 2015 | 16760 | |
| 2015-04-25 | 5.25 | Collin Burns | United States | Doylestown Spring 2015 | 16550 | |
| 2013-03-02 | 5.55 | Mats Valk | Netherlands | Zonhoven Open 2013 | 15766 | |
| 2011-06-25 | 5.66 | Feliks Zemdegs | Australia | Melbourne Winter Open 2011 | 15150 | |
| 2011-06-25 | 6.18 | Feliks Zemdegs | Australia | Melbourne Winter Open 2011 | 15150 | |
| 2011-05-07 | 6.24 | Feliks Zemdegs | Australia | Kubaroo Open 2011 | 15101 | |
| 2011-05-07 | 6.65 | Feliks Zemdegs | Australia | Kubaroo Open 2011 | 15101 | |
| 2011-01-29 | 6.65 | Feliks Zemdegs | Australia | Melbourne Summer Open 2011 | 15003 | |
| 2010-11-13 | 6.77 | Feliks Zemdegs | Australia | Melbourne Cube Day 2010 | 14926 | |
| 2010-11-13 | 7.03 | Feliks Zemdegs | Australia | Melbourne Cube Day 2010 | 14926 | |
| 2008-07-12 | 7.08 | Erik Akkersdijk | Netherlands | Czech Open 2008 | 14072 | |
| 2008-05-05 | 8.72 | Yu Nakajima (中島悠) | Japan | Kashiwa Open 2008 | 14004 | |
| 2008-05-05 | 8.72 | Yu Nakajima (中島悠) | Japan | Kashiwa Open 2008 | 14004 | |
| 2008-02-23 | 9.18 | Edouard Chambon | France | Murcia Open 2008 | 13932 | |
| 2007-11-24 | 9.55 | Ron van Bruchem | Netherlands | Dutch Championship 2007 | 13841 | |
| 2007-10-13 | 9.77 | Erik Akkersdijk | Netherlands | Dutch Open 2007 | 13799 | |
| 2007-05-05 | 9.86 | Thibaut Jacquinot | France | Spanish Open 2007 | 13638 | |
| 2007-02-24 | 10.36 | Edouard Chambon | France | Belgian Open 2007 | 13568 | |
| 2006-08-04 | 10.48 | Toby Mao (毛台立) | United States | US Nationals 2006 | 13364 | |
| 2006-01-14 | 11.13 | Leyan Lo | United States | Caltech Winter competition 2006 | 13162 | |
| 2005-10-16 | 11.75 | Jean Pons | France | Dutch Open 2005 | 13072 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # We use statistical modelling to describe the development of | |
| # the 3x3 world record (single) over time. | |
| # Data from the WCA database are used. | |
| # | |
| # This post was inspired by a recent analysis made | |
| # in a Youtube video by Jperm - see https://youtu.be/B9MKizs9PUw?t=451 | |
| # | |
| # Author: Michael Höhle (https://math-inf.uni-greifswald.de/en/michael-hoehle/) | |
| # Date: 2023-07-04 | |
| library(tidyverse) | |
| # The propagate package will be used later in the analysis | |
| # install.packages("propagate") | |
| # Data used by JPerm | |
| wr <- tibble( | |
| date = ymd( c("2018-11-24", "2018-05-06", "2018-01-27", "2017-10-28", "2017-09-02", "2016-12-11", "2016-11-05", "2015-11-21", "2015-04-25")), | |
| wr = c(3.47, 4.22, 4.59, 4.59, 4.69, 4.73, 4.74, 4.90, 5.25) | |
| ) | |
| # Write to file in order to import into Google Spreadsheets | |
| # see also https://docs.google.com/spreadsheets/d/1X516G6a3Om4GxqfuFLKMryUVRx4gUKrBfYMApZ_iqug/edit#gid=827209089 | |
| readr::write_csv(wr, file="wr3x3.csv") | |
| # Show the data and an initial fit | |
| ggplot(wr, aes(x=date, y=wr)) + geom_line() + geom_point() + | |
| scale_y_continuous(limits=c(0,NA)) + | |
| geom_smooth(method = "glm", formula = y~x, | |
| method.args = list(family = gaussian(link = 'log'))) | |
| #Fit linear model using least squares | |
| m <- lm(log(wr) ~ 1 + I(date - min(date)), data=wr) | |
| coef(m) | |
| min_date <- min(wr$date) | |
| # \hat{\sigma} | |
| summary(m)$sigma | |
| sqrt(sum(m$residuals^2)/(nrow(wr)-2)) | |
| # Date when the median hits a certain value | |
| date_predict <- function(wr_time) { | |
| as.Date(min_date + (log(wr_time) - coef(m)[1]) / coef(m)[2], origin=ymd("1970-01-01")) | |
| } | |
| # Get some predictions from the log-linear model | |
| date_predict(3.47) | |
| date_predict(3.13) | |
| # Read the 2005-2023 WCA data available from https://www.worldcubeassociation.org/results/records?show=history | |
| # wr3x3 <- readxl::read_xlsx("wr3x3.xlsx", sheet="WR3x3") %>% | |
| # mutate(Date = mdy(Date), WR = as.numeric(WR)) %>% | |
| # filter(year(Date) >= 2005) %>% | |
| # mutate(Date_numeric = as.numeric(Date)) %>% | |
| # select(-`...3`) | |
| # # Export as CSV | |
| # readr::write_csv(wr3x3, file="wr3x3_all.csv") | |
| # Load WCA data | |
| wr3x3 <- readr::read_csv("wr3x3_all.csv") | |
| # Initial descriptive plot | |
| ggplot(wr3x3, aes(x=Date, y=WR)) + geom_step() + | |
| geom_point() + | |
| scale_y_continuous(limit=c(0,NA), breaks=seq(0,23,by=2)) + | |
| theme_minimal() | |
| # Fit the model to these data | |
| m_lm <- lm( log(WR) ~ 1 + I(Date - min(Date)), data=wr3x3) | |
| coef(m_lm) | |
| # Log-linear model fit to the data | |
| ggplot(wr3x3, aes(x=Date, y=WR)) + geom_step() + | |
| geom_point() + | |
| scale_y_continuous(limit=c(0,NA), breaks=seq(0,23,by=2)) + | |
| theme_minimal() + | |
| geom_smooth(method = "glm", formula = y~x, | |
| method.args = list(family = gaussian(link = 'log'))) | |
| # Gompertz model start values | |
| Asym <- 4.5; b2 <- 2.3; b3 <- 0.7 | |
| # Fit Gompertz model | |
| m_gomp <- nls(WR ~ SSgompertz(as.numeric(Date), Asym, b2, b3), data = wr3x3) | |
| m_gomp | |
| ## Make a tibble with the predictions | |
| wr3x3_pred <- bind_rows( | |
| wr3x3 %>% select(Date, WR), | |
| tibble(Date = seq(ymd("2023-01-01"),ymd("2030-01-01"), by="3 month"), WR=NA) | |
| ) %>% arrange(Date) %>% | |
| mutate(#gompertz = predict(m_gomp, newdata=., interval = "prediction"), #Note: the interval argument is ignored for nls. :-() We need to add that manually | |
| lm = exp(predict(m_lm, newdata=., interval = "prediction")), | |
| future = Date > Sys.Date()) | |
| #head(wr3x3_pred) | |
| # Use simulation based PIs from the propagate package for the gompertz model. | |
| m_gomp_pred <- propagate::predictNLS(m_gomp, newdata=wr3x3_pred %>% select(Date), interval = "prediction") | |
| pi <- m_gomp_pred$prop %>% map_df(function(x) x$sim[c("Median","2.5%","97.5%")]) %>% | |
| setNames(c('fit', 'lwr', 'upr')) %>% | |
| as.matrix() | |
| wr3x3_pred$gompertz <- pi | |
| wr3x3_pred <- wr3x3_pred %>% unnest(c(lm,gompertz)) | |
| #View(wr3x3_pred) | |
| # Convert to long format | |
| wr3x3_long <- wr3x3_pred %>% | |
| pivot_longer(-c(Date, future), names_to="Model", values_to="WR") %>% | |
| mutate(fit = WR[,1], pi_lower = WR[,2], pi_upper = WR[,3]) %>% | |
| select(-WR) | |
| wr3x3_long | |
| # wr3x3_long %>% filter(Model == "gompertz") | |
| # names(wr3x3_long) | |
| make_poly <- function(model_str) { | |
| p1 <- wr3x3_long %>% filter(Model == model_str) %>% select(Date, future, Model, pi_lower) %>% rename(fit = pi_lower) | |
| p2 <- wr3x3_long %>% filter(Model == model_str) %>% arrange(desc(Date)) %>% select(Date, future, Model, pi_upper) %>% rename(fit = pi_upper) | |
| return(bind_rows(p1, p2)) | |
| } | |
| lm_poly <- make_poly("lm") | |
| gompertz_poly <- make_poly("gompertz") | |
| polys <- bind_rows(lm_poly, gompertz_poly) | |
| # Show the plot | |
| ggplot(wr3x3_long %>% filter(Model=="WR"), aes(x=Date, y=fit, color=Model)) + geom_step() + | |
| geom_polygon(data=polys, aes(fill=Model), alpha=0.1, color=NA) + | |
| geom_point() + | |
| geom_line(data = wr3x3_long %>% filter(Model !="WR"), aes(y = fit, linetype=future)) + | |
| scale_y_continuous(limit=c(0,NA), breaks=seq(0,23,by=2)) + | |
| theme_minimal() + | |
| ylab("WR 3x3 (seconds)") + | |
| guides(linetype = FALSE) + | |
| theme(legend.position="bottom") + | |
| scale_fill_discrete(limits = c('lm', 'gompertz')) + | |
| scale_color_discrete(limits = c('lm', 'gompertz')) | |
| # Store image to disk. | |
| ggsave(file="wr3x3.png", dpi=300, width=8, height=4, bg="white") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment