Created
November 22, 2024 18:45
-
-
Save darenr/89ae99212bef8d2692e7c0885aa52dda to your computer and use it in GitHub Desktop.
Generate synthetic time series sales data for time series modeling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| def generate_weekly_sales_data( | |
| customer_id, start_date="2024-01-01", days=60, weekly_pattern=None, noise_level=10, random_seed=None | |
| ): | |
| """ | |
| Generate synthetic weekly time series sales data for a given customer. | |
| Parameters: | |
| customer_id (str): Identifier for the customer. | |
| start_date (str): Start date for the time series (YYYY-MM-DD). | |
| days (int): Number of days for the time series. | |
| weekly_pattern (list): List of 7 sales values for Monday to Sunday. | |
| noise_level (float): Standard deviation of the random noise. | |
| random_seed (int): Seed for random number generation. | |
| Returns: | |
| pd.DataFrame: DataFrame containing the synthetic time series. | |
| """ | |
| np.random.seed(random_seed) | |
| if weekly_pattern is None: | |
| weekly_pattern = [20, 25, 30, 40, 50, 70, 80] # Default pattern for Monday to Sunday | |
| if len(weekly_pattern) != 7: | |
| raise ValueError("weekly_pattern must contain exactly 7 values (one for each day of the week).") | |
| # Generate date range | |
| date_range = pd.date_range(start=start_date, periods=days, freq="D") | |
| # Repeat weekly pattern to cover all days | |
| trend = np.tile(weekly_pattern, len(date_range) // 7 + 1)[:days] | |
| # Add random noise | |
| noise = np.random.normal(0, noise_level, days) | |
| # Combine trend and noise | |
| values = trend + noise | |
| # Create DataFrame | |
| df = pd.DataFrame( | |
| { | |
| "customer_id": customer_id, | |
| "ds": date_range, # Date column | |
| "y": values, # Sales values | |
| } | |
| ) | |
| return df | |
| if __name__ == "__main__": | |
| customer_a_data = generate_weekly_sales_data( | |
| customer_id="Customer_A", | |
| start_date="2024-01-01", | |
| days=60, | |
| weekly_pattern=[15, 20, 25, 35, 45, 65, 75], # Custom weekly pattern for Customer A | |
| noise_level=8, | |
| random_seed=1, | |
| ) | |
| customer_b_data = generate_weekly_sales_data( | |
| customer_id="Customer_B", | |
| start_date="2024-01-01", | |
| days=60, | |
| weekly_pattern=[10, 15, 20, 30, 40, 60, 70], # Custom weekly pattern for Customer B | |
| noise_level=12, | |
| random_seed=2, | |
| ) | |
| # Plot the generated data for Customer A | |
| plt.figure(figsize=(12, 6)) | |
| plt.plot(customer_a_data["ds"], customer_a_data["y"], label="Customer A Sales Data") | |
| plt.title("Synthetic Weekly Sales Data for Customer A") | |
| plt.xlabel("Date") | |
| plt.ylabel("Sales") | |
| plt.grid() | |
| plt.legend() | |
| plt.show() | |
| # Plot the generated data for Customer B | |
| plt.figure(figsize=(12, 6)) | |
| plt.plot(customer_b_data["ds"], customer_b_data["y"], label="Customer B Sales Data") | |
| plt.title("Synthetic Weekly Sales Data for Customer B") | |
| plt.xlabel("Date") | |
| plt.ylabel("Sales") | |
| plt.grid() | |
| plt.legend() | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
NIT: Can we change the seed to something friendlier?