Skip to content

Instantly share code, notes, and snippets.

@Lextuga007
Created March 13, 2025 16:54
Show Gist options
  • Select an option

  • Save Lextuga007/9e24862df3d22bff80c656b2d6e5ea2f to your computer and use it in GitHub Desktop.

Select an option

Save Lextuga007/9e24862df3d22bff80c656b2d6e5ea2f to your computer and use it in GitHub Desktop.
#' Check for integers
#'
#' @param df data frame to check
#' @param select_int data frame returns just the columns of data that are
#' integer64. The default returns all columns.
#' @param tidy logical set to TRUE to change set any integer or integer64 to
#' integer and give a message. FALSE will return the original data with no changes
#' and message.
#'
#' @returns data frame and log where integer64 found
#' @export
#'
#' @examples
#' \dontrun{
#' df <- tibble::tribble(
#' ~colmA, ~colmB,
#' 100, bit64::as.integer64(1)
#' )
#' check_int64(df)
#' }
check_int64 <- function(df,
select_int = FALSE,
tidy = TRUE) {
# Gets data type, pivots and filters to find integer64
int_cols <- df |>
dplyr::summarise_all(class) |>
tidyr::pivot_longer(cols = dplyr::everything()) |>
dplyr::filter(value == "integer64")
columns <- paste0(int_cols$name, collapse = ", ")
if (select_int == FALSE | nrow(int_cols) == 0) {
df
}
if (select_int == TRUE & nrow(int_cols) > 0) {
cols <- int_cols$name |>
dput()
df <- df |>
dplyr::select(dplyr::any_of(cols))
}
if (select_int == TRUE & nrow(int_cols) == 0) {
df
}
if (tidy == TRUE) {
df <- df |>
dplyr::mutate(dplyr::across(dplyr::where(is.numeric), as.numeric))
df
}
if (tidy == FALSE) {
df
}
df
}
@Lextuga007
Copy link
Author

Lextuga007 commented Mar 13, 2025

Tests (note that some of these relate to messages that were removed when I took out the loggit reference:

df_int64 <- tibble::tribble(
  ~integer_data, ~integer64_data, ~character_data, ~second_int64,
  100, bit64::as.integer64(1), "Text", bit64::as.integer64(1)
)

df_doubles <- tibble::tribble(
  ~colmA, ~colmB, ~colmC,
  100, 200, "Text"
)

# check_int64() -----------------------------------------------------------

# Although regexp checks are noted as fragile this is a specific requirement
# for the error message
test_that("check_int64 - expect messages", {
  testthat::expect_warning(
    check_int64(df_int64),
    "integer64"
  ) # default
  testthat::expect_warning(
    check_int64(df_int64, select_int = TRUE),
    "integer64"
  )
  testthat::expect_warning(
    check_int64(df_int64, select_int = FALSE),
    "integer64"
  )
  testthat::expect_warning(
    check_int64(df_int64, tidy = FALSE),
    "integer64"
  )
  testthat::expect_warning(
    check_int64(df_int64, tidy = TRUE),
    "integer64"
  )
  testthat::expect_warning(
    check_int64(df_int64, tidy = TRUE, select_int = TRUE),
    "integer64"
  )
  testthat::expect_warning(
    check_int64(df_int64, tidy = TRUE, select_int = FALSE),
    "integer64"
  )
  testthat::expect_warning(
    check_int64(df_int64, tidy = FALSE, select_int = TRUE),
    "integer64"
  )
  testthat::expect_warning(
    check_int64(df_int64, tidy = FALSE, select_int = FALSE),
    "integer64"
  )
})

test_that("check_int64 - don't expect messages", {
  # No integer64 tests
  testthat::expect_no_message(check_int64(df_doubles))
  testthat::expect_no_message(check_int64(df_doubles, select_int = TRUE))
  testthat::expect_no_message(check_int64(df_doubles, select_int = FALSE))
  testthat::expect_no_message(check_int64(df_doubles, tidy = FALSE))
  testthat::expect_no_message(check_int64(df_doubles, tidy = TRUE))
  testthat::expect_no_message(check_int64(df_doubles, tidy = TRUE, select_int = TRUE))
  testthat::expect_no_message(check_int64(df_doubles, tidy = TRUE, select_int = FALSE))
  testthat::expect_no_message(check_int64(df_doubles, tidy = FALSE, select_int = TRUE))
  testthat::expect_no_message(check_int64(df_doubles, tidy = FALSE, select_int = FALSE))
})

test_that("check_int64 - returns data", {
  testthat::expect_s3_class(check_int64(df_doubles), "data.frame")
  testthat::expect_s3_class(check_int64(df_doubles, tidy = TRUE), "data.frame")
  testthat::expect_s3_class(check_int64(df_doubles, tidy = FALSE), "data.frame")
  testthat::expect_s3_class(check_int64(df_doubles, select_int = TRUE), "data.frame")
  testthat::expect_s3_class(check_int64(df_doubles, select_int = FALSE), "data.frame")
  testthat::expect_s3_class(check_int64(df_doubles, tidy = TRUE, select_int = TRUE), "data.frame")
  testthat::expect_s3_class(check_int64(df_doubles, tidy = TRUE, select_int = FALSE), "data.frame")
  testthat::expect_s3_class(check_int64(df_doubles, tidy = FALSE, select_int = TRUE), "data.frame")
  testthat::expect_s3_class(check_int64(df_doubles, tidy = FALSE, select_int = FALSE), "data.frame")

  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64)), "data.frame")
  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64, tidy = TRUE)), "data.frame")
  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64, tidy = FALSE)), "data.frame")
  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64, select_int = TRUE)), "data.frame")
  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64, select_int = FALSE)), "data.frame")
  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64, tidy = TRUE, select_int = TRUE)), "data.frame")
  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64, tidy = TRUE, select_int = FALSE)), "data.frame")
  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64, tidy = FALSE, select_int = TRUE)), "data.frame")
  testthat::expect_s3_class(suppressWarnings(check_int64(df_int64, tidy = FALSE, select_int = FALSE)), "data.frame")
})

test_that("check_int64 - check `tidy` argument", {
  df_tidy <- suppressWarnings(check_int64(df_int64, tidy = TRUE))
  df_tidy_select <- suppressWarnings(check_int64(df_int64, tidy = TRUE, select_int = TRUE))
  df_orig <- suppressWarnings(check_int64(df_int64, tidy = FALSE))
  df_orig_select <- suppressWarnings(check_int64(df_int64, tidy = FALSE, select_int = TRUE))

  testthat::expect_equal(
    janitor::describe_class(df_tidy$integer64_data),
    "numeric"
  )
  testthat::expect_equal(
    janitor::describe_class(df_tidy_select$integer64_data),
    "numeric"
  )
  testthat::expect_equal(
    janitor::describe_class(df_orig$integer64_data),
    "integer64"
  )
  testthat::expect_equal(
    janitor::describe_class(df_orig_select$integer64_data),
    "integer64"
  )
})

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment