Last active
June 13, 2025 14:31
-
-
Save francisbarton/5d9d177978a2279cf225abdc772adef9 to your computer and use it in GitHub Desktop.
Pull England 2019 IMD data from ONS API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # https://gist.github.com/francisbarton/5d9d177978a2279cf225abdc772adef9 | |
| ons_api_base <- "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest" | |
| imd_dataset <- "Index_of_Multiple_Deprivation_Dec_2019_Lookup_in_England_2022" | |
| req <- httr2::request(ons_api_base) |> | |
| httr2::req_url_path_append("services") |> | |
| httr2::req_url_path_append(imd_dataset) |> | |
| httr2::req_url_path_append("FeatureServer/0/query") |> | |
| httr2::req_url_query(f = "json") | |
| # just get IDs only from API initially (no maxRecordCount for ID queries) | |
| ids <- req |> | |
| httr2::req_url_query(returnIdsonly = TRUE) |> | |
| httr2::req_url_query(where = "1=1") |> # get all rows (no filter) | |
| httr2::req_perform() |> | |
| httr2::resp_body_json() |> | |
| purrr::pluck("objectIds") | |
| # check equal to number of LSOAs (2011) in England | |
| assertthat::assert_that(length(ids) == 32844L) | |
| batch_it <- function(x, size) { | |
| unname(split(x, rep(1:ceiling(length(x) / size), each = size)[seq_along(x)])) | |
| } | |
| # split vector of IDs into a list of batches of 100 | |
| ids_batched <- batch_it(ids, 100L) | |
| # use batched IDs to retrieve table data | |
| retrieve_data <- function(req, ids_vec) { | |
| ids <- stringr::str_flatten(ids_vec, collapse = ",") | |
| req |> | |
| httr2::req_url_query(objectIds = ids) |> | |
| httr2::req_url_query(outFields = "*") |> # returns all columns | |
| httr2::req_url_query(returnGeometry = FALSE) |> # superfluous tbf | |
| httr2::req_retry(max_tries = 3) |> # shouldn't be needed | |
| httr2::req_perform() | |
| } | |
| poss_retrieve_data <- purrr::possibly(retrieve_data) # safely handle any errors | |
| resps <- ids_batched |> | |
| purrr::map(\(x) poss_retrieve_data(req, x)) |> | |
| purrr::compact() | |
| # check for any batches that have got dropped due to error | |
| assertthat::assert_that(length(resps) == length(ids_batched)) | |
| # pull actual data out from API JSON response | |
| pull_table_data <- function(resp) { | |
| resp |> | |
| httr2::resp_check_status() |> | |
| httr2::resp_body_json() |> | |
| purrr::pluck("features") |> | |
| purrr::map_df("attributes") |> | |
| janitor::clean_names() |> | |
| dplyr::select(c("lsoa11cd", "lsoa11nm", imd_rank = "imd19")) | |
| } | |
| poss_pull_table_data <- purrr::possibly(pull_table_data) | |
| imd2019 <- resps |> | |
| purrr::map(poss_pull_table_data) |> | |
| purrr::list_rbind() | |
| assertthat::assert_that(nrow(imd2019) == 32844L) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment