-
-
Save aronlindberg/2a9e9802579b2d239655 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' Digest the Link header in a paginated result. | |
| #' | |
| #' Converts the Link header from a monolithic string to a usable data.frame. | |
| #' | |
| #' The GitHub API automatically paginates when the number of requested items | |
| #' exceeds the number of items per page. When this occurs, the result returned | |
| #' by the server will include a Link header that provides the URLs for other | |
| #' pages of results, such as the next page and the last page. These assorted | |
| #' URLs are catenated in a single string and this function converts that | |
| #' information into a data.frame that is useful for traversing the pages. | |
| #' | |
| #' @param x Output of a function that gets potentially paginated results, e.g., | |
| #' \code{get.*.repositories()} | |
| #' | |
| #' @return A data.frame, one row per URL = page. Maximum number of rows is four: | |
| #' one each for "next", "last", "first", and "prev" page, indicated by the | |
| #' \code{rel} variable. The \code{per_page} variable will be constant across | |
| #' all rows and gives the number of items per page. If header contains no | |
| #' links at all, the return value is NULL and a message is given. | |
| #' | |
| #' @references | |
| #' \url{https://developer.github.com/guides/traversing-with-pagination/} | |
| #' \url{https://developer.github.com/v3/#pagination} | |
| #' | |
| #' @examples | |
| #' repos <- get.organization.repositories(org = "STAT545-UBC", per_page = 1) | |
| #' digest_header_links(repos) | |
| digest_header_links <- function(x) { | |
| y <- x$headers$link | |
| if(is.null(y)) { | |
| # message("No links found in header.") | |
| m <- matrix(0, ncol = 3, nrow = 4) | |
| links <- as.data.frame(m) | |
| names(links) <- c("rel", "per_page", "page") | |
| return(links) | |
| } | |
| y %>% | |
| str_split(", ") %>% unlist %>% # split into e.g. next, last, first, prev | |
| str_split_fixed("; ", 2) %>% # separate URL from the relation | |
| plyr::alply(2) %>% # workaround: make into a list | |
| as.data.frame() %>% # convert to data.frame, no factors! | |
| setNames(c("URL", "rel")) %>% # sane names | |
| dplyr::mutate_(rel = ~ str_match(rel, "next|last|first|prev"), | |
| per_page = ~ str_match(URL, "per_page=([0-9]+)") %>% | |
| `[`( , 2) %>% as.integer, | |
| page = ~ str_match(URL, "&page=([0-9]+)") %>% | |
| `[`( , 2) %>% as.integer, | |
| URL = ~ str_replace_all(URL, "<|>", "")) | |
| } | |
| pull <- function(i){ | |
| files <- get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100) | |
| links <- digest_header_links(files) | |
| number_of_pages <- links[2,]$page | |
| if (number_of_pages != 0) | |
| try_default(for (n in 1:number_of_pages){ | |
| get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100, page = n) | |
| }, default = NULL) | |
| else | |
| return(files) | |
| } | |
| pull <- function(i){ | |
| get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100) | |
| } | |
| list <- read.csv("django_c1.csv", header = FALSE) | |
| pull_lists <- lapply(list$V1, pull) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment