Last active
March 12, 2025 15:25
-
-
Save martinctc/cd37684a680830b05dd1b113044f322f to your computer and use it in GitHub Desktop.
[Apply Noise to Specified Columns in a Data Frame] #R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' @title Apply Noise to Specified Columns in a Data Frame | |
| #' | |
| #' @description This function applies a normal distribution-based noise to | |
| #' specified columns in a data frame, grouped by a specified variable. The | |
| #' noise is scaled to a range of -0.2 to 0.2. | |
| #' | |
| #' @param df Data frame to apply the normal distribution to for creating noise. | |
| #' @param group_var String specifying the grouping variable. | |
| #' @param cols Vector of column names to apply the noise to. | |
| #' @param scale_from Numeric value specifying the lower bound of the scaling range. | |
| #' @param scale_to Numeric value specifying the upper bound of the scaling range. | |
| #' | |
| #' @return Data frame with noise applied to specified columns. | |
| #' | |
| #' @examples | |
| #' \dontrun{ | |
| #' library(tidyverse) | |
| #' df <- tibble( | |
| #' PersonId = rep(LETTERS[1:10], each = 10), | |
| #' MetricDate = rep(seq.Date(from = as.Date('2023-01-01'), by = 'day', length.out = 10), 10), | |
| #' Emails_sent = rnorm(100, mean = 30, sd = 5) %>% round() %>% abs() | |
| #' ) | |
| #' | |
| #' apply_noise(df, group_var = "PersonId", cols = c("Emails_sent")) | |
| #' } | |
| apply_noise <- function(df, | |
| group_var = "PersonId", | |
| cols, | |
| scale_from = 1, | |
| scale_to = 2) { | |
| full_df_with_noise <- | |
| df %>% | |
| group_split(!!sym(group_var)) %>% | |
| map_dfr(function(x){ | |
| total_rows <- nrow(x) | |
| set.seed(total_rows) | |
| x$normdist <- rnorm(total_rows, mean = 5, sd = 2) %>% | |
| scales::rescale(to = c(scale_from, scale_to)) | |
| x %>% | |
| mutate(across(.cols = all_of(cols), .fns = ~. * normdist)) %>% | |
| select(-normdist) | |
| }) | |
| full_df_with_noise | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment