aronlindberg · February 13, 2015 19:46
diff --git a/digest_header_links.R b/digest_header_links.R
 #' Digest the Link header in a paginated result.
 #' 
 #' Converts the Link header from a monolithic string to a usable data.frame.
 #' 
 #' The GitHub API automatically paginates when the number of requested items 
 #' exceeds the number of items per page. When this occurs, the result returned 
 #' by the server will include a Link header that provides the URLs for other 
 #' pages of results, such as the next page and the last page. These assorted 
 #' URLs are catenated in a single string and this function converts that 
 #' information into a data.frame that is useful for traversing the pages.
 #' 
 #' @param x Output of a function that gets potentially paginated results, e.g., 
 #'   \code{get.*.repositories()}
 #'   
 #' @return A data.frame, one row per URL = page. Maximum number of rows is four:
 #'   one each for "next", "last", "first", and "prev" page, indicated by the 
 #'   \code{rel} variable. The \code{per_page} variable will be constant across 
 #'   all rows and gives the number of items per page. If header contains no
 #'   links at all, the return value is NULL and a message is given.
 #'   
 #' @references 
 #' \url{https://developer.github.com/guides/traversing-with-pagination/} 
 #' \url{https://developer.github.com/v3/#pagination}
 #' 
 #' @examples
 #' repos  <- get.organization.repositories(org = "STAT545-UBC", per_page = 1)
 #' digest_header_links(repos)

 digest_header_links <- function(x) {
  y <- x$headers$link
  if(is.null(y)) {
    # message("No links found in header.")
    m <- matrix(0, ncol = 3, nrow = 4)
    links <- as.data.frame(m)
    names(links) <- c("rel", "per_page", "page")
    return(links)
  }
  y %>%
    str_split(", ") %>% unlist %>%  # split into e.g. next, last, first, prev
    str_split_fixed("; ", 2) %>%    # separate URL from the relation
    plyr::alply(2) %>%              # workaround: make into a list
    as.data.frame() %>%        # convert to data.frame, no factors!
    setNames(c("URL", "rel")) %>%   # sane names
    dplyr::mutate_(rel = ~ str_match(rel, "next|last|first|prev"),
                   per_page = ~ str_match(URL, "per_page=([0-9]+)") %>%
                     `[`( , 2) %>% as.integer,
                   page = ~ str_match(URL, "&page=([0-9]+)") %>%
                     `[`( , 2) %>% as.integer,
                   URL = ~ str_replace_all(URL, "<|>", ""))
 }

 pull <- function(i){
  files <- get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100)
  links <- digest_header_links(files)
  number_of_pages <- links[2,]$page
  if (number_of_pages != 0)
  try_default(for (n in 1:number_of_pages){
    get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100, page = n)
  }, default = NULL)
  else 
    return(files)
 }

 pull <- function(i){
  get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100)
 }

 list <- read.csv("django_c1.csv", header = FALSE)

 pull_lists <- lapply(list$V1, pull)
	#' Digest the Link header in a paginated result.
	#'
	#' Converts the Link header from a monolithic string to a usable data.frame.
	#'
	#' The GitHub API automatically paginates when the number of requested items
	#' exceeds the number of items per page. When this occurs, the result returned
	#' by the server will include a Link header that provides the URLs for other
	#' pages of results, such as the next page and the last page. These assorted
	#' URLs are catenated in a single string and this function converts that
	#' information into a data.frame that is useful for traversing the pages.
	#'
	#' @param x Output of a function that gets potentially paginated results, e.g.,
	#' \code{get.*.repositories()}
	#'
	#' @return A data.frame, one row per URL = page. Maximum number of rows is four:
	#' one each for "next", "last", "first", and "prev" page, indicated by the
	#' \code{rel} variable. The \code{per_page} variable will be constant across
	#' all rows and gives the number of items per page. If header contains no
	#' links at all, the return value is NULL and a message is given.
	#'
	#' @references
	#' \url{https://developer.github.com/guides/traversing-with-pagination/}
	#' \url{https://developer.github.com/v3/#pagination}
	#'
	#' @examples
	#' repos <- get.organization.repositories(org = "STAT545-UBC", per_page = 1)
	#' digest_header_links(repos)

	digest_header_links <- function(x) {
	y <- x$headers$link
	if(is.null(y)) {
	# message("No links found in header.")
	m <- matrix(0, ncol = 3, nrow = 4)
	links <- as.data.frame(m)
	names(links) <- c("rel", "per_page", "page")
	return(links)
	}
	y %>%
	str_split(", ") %>% unlist %>% # split into e.g. next, last, first, prev
	str_split_fixed("; ", 2) %>% # separate URL from the relation
	plyr::alply(2) %>% # workaround: make into a list
	as.data.frame() %>% # convert to data.frame, no factors!
	setNames(c("URL", "rel")) %>% # sane names
	dplyr::mutate_(rel = ~ str_match(rel, "next\|last\|first\|prev"),
	per_page = ~ str_match(URL, "per_page=([0-9]+)") %>%
	`[`( , 2) %>% as.integer,
	page = ~ str_match(URL, "&page=([0-9]+)") %>%
	`[`( , 2) %>% as.integer,
	URL = ~ str_replace_all(URL, "<\|>", ""))
	}

	pull <- function(i){
	files <- get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100)
	links <- digest_header_links(files)
	number_of_pages <- links[2,]$page
	if (number_of_pages != 0)
	try_default(for (n in 1:number_of_pages){
	get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100, page = n)
	}, default = NULL)
	else
	return(files)
	}

	pull <- function(i){
	get.pull.request.files(owner = "django", repo = "django", id = i, ctx = get.github.context(), per_page=100)
	}

	list <- read.csv("django_c1.csv", header = FALSE)

	pull_lists <- lapply(list$V1, pull)
No results found