Skip to content

Instantly share code, notes, and snippets.

@MichaelChirico
Created October 28, 2025 16:18
Show Gist options
  • Select an option

  • Save MichaelChirico/6132deadc98882303efbbdc8dc18fcd3 to your computer and use it in GitHub Desktop.

Select an option

Save MichaelChirico/6132deadc98882303efbbdc8dc18fcd3 to your computer and use it in GitHub Desktop.
Get NFL game durations
library(rvest)
library(xml2)
PFR_URL = 'https://www.pro-football-reference.com'
read_with_backoff = function(url, sleep = 0.1) {
tryCatch(read_html(url), error = function(.) {
Sys.sleep(sleep)
sleep = 2 * sleep
message(sprintf("Failed, retrying in %.2fs", sleep))
read_with_backoff(url, sleep = sleep)
})
}
game_data = data.frame(
summary = character(),
start_time = character(),
duration = character()
)
for (year in 2024:2025) {
games = file.path(PFR_URL, 'years', year, 'games.htm') |>
read_html() |>
xml_find_all("//a[text() = 'boxscore']") |>
xml_attr("href")
for (game in games) {
boxscore = read_with_backoff(paste0(PFR_URL, game))
game_data = rbind(game_data, data.frame(
summary = xml_text(xml_find_first(boxscore, "//h1")),
start_time = xml_text(xml_find_first(boxscore, "//strong[text() = 'Start Time']/parent::*")),
duration = xml_text(xml_find_first(boxscore, "//strong[text() = 'Time of Game']/parent::*"))
))
Sys.sleep(1)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment