Skip to content

Instantly share code, notes, and snippets.

@darenr
Created November 22, 2024 18:45
Show Gist options
  • Select an option

  • Save darenr/89ae99212bef8d2692e7c0885aa52dda to your computer and use it in GitHub Desktop.

Select an option

Save darenr/89ae99212bef8d2692e7c0885aa52dda to your computer and use it in GitHub Desktop.
Generate synthetic time series sales data for time series modeling
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def generate_weekly_sales_data(
customer_id, start_date="2024-01-01", days=60, weekly_pattern=None, noise_level=10, random_seed=None
):
"""
Generate synthetic weekly time series sales data for a given customer.
Parameters:
customer_id (str): Identifier for the customer.
start_date (str): Start date for the time series (YYYY-MM-DD).
days (int): Number of days for the time series.
weekly_pattern (list): List of 7 sales values for Monday to Sunday.
noise_level (float): Standard deviation of the random noise.
random_seed (int): Seed for random number generation.
Returns:
pd.DataFrame: DataFrame containing the synthetic time series.
"""
np.random.seed(random_seed)
if weekly_pattern is None:
weekly_pattern = [20, 25, 30, 40, 50, 70, 80] # Default pattern for Monday to Sunday
if len(weekly_pattern) != 7:
raise ValueError("weekly_pattern must contain exactly 7 values (one for each day of the week).")
# Generate date range
date_range = pd.date_range(start=start_date, periods=days, freq="D")
# Repeat weekly pattern to cover all days
trend = np.tile(weekly_pattern, len(date_range) // 7 + 1)[:days]
# Add random noise
noise = np.random.normal(0, noise_level, days)
# Combine trend and noise
values = trend + noise
# Create DataFrame
df = pd.DataFrame(
{
"customer_id": customer_id,
"ds": date_range, # Date column
"y": values, # Sales values
}
)
return df
if __name__ == "__main__":
customer_a_data = generate_weekly_sales_data(
customer_id="Customer_A",
start_date="2024-01-01",
days=60,
weekly_pattern=[15, 20, 25, 35, 45, 65, 75], # Custom weekly pattern for Customer A
noise_level=8,
random_seed=1,
)
customer_b_data = generate_weekly_sales_data(
customer_id="Customer_B",
start_date="2024-01-01",
days=60,
weekly_pattern=[10, 15, 20, 30, 40, 60, 70], # Custom weekly pattern for Customer B
noise_level=12,
random_seed=2,
)
# Plot the generated data for Customer A
plt.figure(figsize=(12, 6))
plt.plot(customer_a_data["ds"], customer_a_data["y"], label="Customer A Sales Data")
plt.title("Synthetic Weekly Sales Data for Customer A")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.grid()
plt.legend()
plt.show()
# Plot the generated data for Customer B
plt.figure(figsize=(12, 6))
plt.plot(customer_b_data["ds"], customer_b_data["y"], label="Customer B Sales Data")
plt.title("Synthetic Weekly Sales Data for Customer B")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.grid()
plt.legend()
plt.show()
@ahosler
Copy link

ahosler commented Nov 26, 2024

NIT: Can we change the seed to something friendlier?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment