Skip to content

Instantly share code, notes, and snippets.

@ebobby
Last active April 22, 2020 05:47
Show Gist options
  • Select an option

  • Save ebobby/cff9fa8a0cd5beaaaa2d216522e09e39 to your computer and use it in GitHub Desktop.

Select an option

Save ebobby/cff9fa8a0cd5beaaaa2d216522e09e39 to your computer and use it in GitHub Desktop.
Generador de base de datos de una empresa retail
require 'securerandom'
require 'gsl'
PRODUCT_COUNT = 80
MIN_PRICE_RANGE = 100
MAX_PRICE_RANGE = 20_000
SALE_ERROR_RANGE = 5_000
AVERAGE_SALE_PER_CUSTOMER = 15_000
EMPLOYEES_PER_POPULATION_RATIO = 0.000013
CITIES_POPULATION = {
'CDMX' => 8_855_000,
'GDL' => 1_460_148,
'MTY' => 1_109_171,
'HMO' => 812_229,
}.freeze
SALES_PER_POPULATION_MULITPLIER = (37..71).to_a.freeze
# 2018 US Electronic Store SALES DISTRIBUTION https://www.census.gov/retail/index.html
SALES_PER_MONTH_DISTRIBUTION = [
0.07863728596347584,
0.06450784948916023,
0.08096630000118662,
0.07248196931401517,
0.07732337759424727,
0.07083038458343716,
0.08134710998777781,
0.08593608866422224,
0.07660816631661387,
0.07840644097160419,
0.1048497146179678,
0.1281053124962918
]
class Gaussian
@rng = GSL::Rng.alloc
def self.rnd
((@rng.gaussian(0.2) + 1.0) / 2.0).clamp(0.0, 1.0)
end
def self.random_int(min, max)
(rnd * (max - min)).ceil + min
end
end
def generate_random_distribution(n)
distribution = n.times.map { SecureRandom.random_number(256).to_f }
total = distribution.sum
distribution.map { |d| d / total }
end
def sales_distribution(distribution, amount)
distribution.map { |d| (d * amount).round(2) }
end
def cities_total_sales_per_year
CITIES_POPULATION.map do |k, v|
[k, v.to_f * SALES_PER_POPULATION_MULITPLIER[SecureRandom.random_number(SALES_PER_POPULATION_MULITPLIER.count)]]
end.to_h
end
def sales_by_month(amount)
sales_distribution(SALES_PER_MONTH_DISTRIBUTION, amount)
end
def cities_monthly_sales_per_year
cities_total_sales_per_year.map do |k, v|
[k, sales_by_month(v)]
end.to_h
end
def generate_random_amount(average)
(Gaussian.rnd * (average * 2)).round
end
def generate_all_sales_per_city(years)
results = CITIES_POPULATION.keys.map { |city| [city, []] }.to_h
((2019 - years)..2019).each do |year|
cities_monthly_sales_per_year.each do |sales|
city, sales_per_month = sales
sales_per_month.each_with_index do |amount, i|
month = i + 1
days_in_month = if month == 12
(Date.new(year + 1, 1, 1) - Date.new(year, month, 1)).numerator
else
(Date.new(year, month + 1, 1) - Date.new(year, month, 1)).numerator
end
sales_distribution(generate_random_distribution(days_in_month), amount).each_with_index do |day_sales, j|
day = j + 1
results[city] << [Date.new(year, month, day), day_sales]
end
end
end
end
results
end
# 41% of an ecommerce store’s revenue is created by only 8% of its customers;
# this 8% is made up of your repeat customers, making it clear that they are extremely profitable!
def generate_sales_per_day(city, date, amount)
num_of_sales = (amount / AVERAGE_SALE_PER_CUSTOMER).ceil
num_of_sales.times { generate_sale(city, date) }
end
def generate_sale(city, date)
this_sale_amount = generate_random_amount(AVERAGE_SALE_PER_CUSTOMER)
total_so_far = 0.0
products = []
iterations = 0
while true
iterations += 1
# Grab random product
product = PRODUCT_CATALOG[SecureRandom.random_number(PRODUCT_CATALOG.size)]
id = product[0]
price = product[1]
# If price exceeds our "budget" iterate again
next if price + total_so_far > this_sale_amount + SALE_ERROR_RANGE
# We can afford it, buy it.
products << id
total_so_far += price
# If we are between range, stop buying.
break if (total_so_far - this_sale_amount).abs <= SALE_ERROR_RANGE
# We don't want to loop forever.
break if iterations >= 25
end
# Buy a random product if we ran out of iterations and didnt select anything.
products << PRODUCT_CATALOG[SecureRandom.random_number(PRODUCT_CATALOG.size)][0] if products.empty?
shopping_list = products.group_by(&:itself).map { |k, v| [k, v.size] }.to_h
customer = get_customer(city)
employee = get_employee(city)
sale = Sale.create(
sale_date: date,
total: total_so_far,
subtotal: total_so_far,
customer_id: customer.id,
employee_id: employee.id,
branch_id: BRANCHES[city].id
)
shopping_list.each do |product, how_many|
SaleItem.create(
sale_id: sale.id,
product_id: product,
quantity: how_many,
total: PRODUCT_PRICES[product] * how_many
)
end
end
def get_customer(city)
query = Customer.where(branch_id: BRANCHES[city].id)
new_customer = query.count < 100 || SecureRandom.random_number > 0.4
if new_customer
generate_customer(city)
else
minmax = query.select('max(id), min(id)').take
min, max = minmax.min, minmax.max
percentual_max = ((max - min) * 0.08).ceil + min
query.find(SecureRandom.random_number(min..percentual_max))
end
end
def get_employee(city)
Employee.where(branch_id: BRANCHES[city].id).order("random()").first
end
def generate_customer(city)
Customer.create(
name: Faker::Name.name,
address: Faker::Address.street_address,
city: BRANCHES[city].city,
state: BRANCHES[city].state,
country: BRANCHES[city].country,
date_of_birth: Date.new(Date.today.year - Gaussian.random_int(18, 65),
SecureRandom.random_number(1..12),
SecureRandom.random_number(1..27)),
gender: GENDERS[SecureRandom.random_number(2)],
branch_id: BRANCHES[city].id
)
end
if Category.count.zero?
20.times.map { Faker::Commerce.department }.uniq.each do |name|
Category.create!(description: name)
end
end
if Product.count.zero?
categories = Category.all.pluck(:id)
brands = 20.times.map { Faker::Company.name }.uniq
PRODUCT_COUNT.times.map { Faker::Commerce.product_name }.uniq.each do |name|
price = ((SecureRandom.random_number * (MAX_PRICE_RANGE - MIN_PRICE_RANGE)) + MIN_PRICE_RANGE).round
price = price - price % 50 - 0.01
Product.create!(
name: name,
price: price,
brand: brands[SecureRandom.random_number(brands.count)],
category_id: categories[SecureRandom.random_number * categories.count]
)
end
end
if Branch.count.zero?
Branch.create(
name: "CDMX",
country: "México",
state: "Ciudad de México",
city: "Ciudad de México",
address: Faker::Address.street_address,
manager: Faker::Name.name
)
Branch.create(
name: "HMO",
country: "México",
state: "Sonora",
city: "Hermosillo",
address: Faker::Address.street_address,
manager: Faker::Name.name
)
Branch.create(
name: "GDL",
country: "México",
state: "Jalisco",
city: "Guadalajara",
address: Faker::Address.street_address,
manager: Faker::Name.name
)
Branch.create(
name: "MTY",
country: "México",
state: "Monterrey",
city: "Nuevo León",
address: Faker::Address.street_address,
manager: Faker::Name.name
)
end
BRANCHES = Branch.all.map { |b| [b.name, b] }.to_h
NUM_EMPLOYEES = CITIES_POPULATION.map { |k, v| [k, (v * EMPLOYEES_PER_POPULATION_RATIO).ceil] }.to_h
GENDERS = ['F', 'M']
PRODUCT_CATALOG = Product.pluck(:id, :price)
PRODUCT_PRICES = Product.pluck(:id, :price).to_h
if Employee.count.zero?
NUM_EMPLOYEES.each do |city, employees|
employees.times do
Employee.create(
name: Faker::Name.name,
city: BRANCHES[city].city,
state: BRANCHES[city].state,
country: BRANCHES[city].country,
date_of_birth: Date.new(Date.today.year - Gaussian.random_int(18, 65), SecureRandom.random_number(12) + 1, SecureRandom.random_number(27) + 1),
gender: GENDERS[SecureRandom.random_number(2)],
branch_id: BRANCHES[city].id
)
end
end
end
if Sale.count.zero?
sales = generate_all_sales_per_city(4)
how_many_total = sales.values.map(&:count).sum
how_many_generated = 0
how_many_still_left = 0
elapsed_seconds = 0
sales.each do |city, sales_list|
sales_list.each do |sale|
date, amount = sale
start = Time.now.to_i
generate_sales_per_day(city, date, amount)
finish = Time.now.to_i
elapsed_seconds += finish - start
how_many_generated += 1
how_many_still_left = how_many_total - how_many_generated
print "\e[2J\e[f"
puts "(#{how_many_generated} / #{how_many_total}): processing: #{city} #{date}, elapsed: #{(elapsed_seconds / 60.0).round(2)} mins, ETA: #{(elapsed_seconds / how_many_generated.to_f * how_many_still_left / 60.0).round(2)} mins."
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment