Last active
April 22, 2020 05:47
-
-
Save ebobby/cff9fa8a0cd5beaaaa2d216522e09e39 to your computer and use it in GitHub Desktop.
Generador de base de datos de una empresa retail
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'securerandom' | |
| require 'gsl' | |
| PRODUCT_COUNT = 80 | |
| MIN_PRICE_RANGE = 100 | |
| MAX_PRICE_RANGE = 20_000 | |
| SALE_ERROR_RANGE = 5_000 | |
| AVERAGE_SALE_PER_CUSTOMER = 15_000 | |
| EMPLOYEES_PER_POPULATION_RATIO = 0.000013 | |
| CITIES_POPULATION = { | |
| 'CDMX' => 8_855_000, | |
| 'GDL' => 1_460_148, | |
| 'MTY' => 1_109_171, | |
| 'HMO' => 812_229, | |
| }.freeze | |
| SALES_PER_POPULATION_MULITPLIER = (37..71).to_a.freeze | |
| # 2018 US Electronic Store SALES DISTRIBUTION https://www.census.gov/retail/index.html | |
| SALES_PER_MONTH_DISTRIBUTION = [ | |
| 0.07863728596347584, | |
| 0.06450784948916023, | |
| 0.08096630000118662, | |
| 0.07248196931401517, | |
| 0.07732337759424727, | |
| 0.07083038458343716, | |
| 0.08134710998777781, | |
| 0.08593608866422224, | |
| 0.07660816631661387, | |
| 0.07840644097160419, | |
| 0.1048497146179678, | |
| 0.1281053124962918 | |
| ] | |
| class Gaussian | |
| @rng = GSL::Rng.alloc | |
| def self.rnd | |
| ((@rng.gaussian(0.2) + 1.0) / 2.0).clamp(0.0, 1.0) | |
| end | |
| def self.random_int(min, max) | |
| (rnd * (max - min)).ceil + min | |
| end | |
| end | |
| def generate_random_distribution(n) | |
| distribution = n.times.map { SecureRandom.random_number(256).to_f } | |
| total = distribution.sum | |
| distribution.map { |d| d / total } | |
| end | |
| def sales_distribution(distribution, amount) | |
| distribution.map { |d| (d * amount).round(2) } | |
| end | |
| def cities_total_sales_per_year | |
| CITIES_POPULATION.map do |k, v| | |
| [k, v.to_f * SALES_PER_POPULATION_MULITPLIER[SecureRandom.random_number(SALES_PER_POPULATION_MULITPLIER.count)]] | |
| end.to_h | |
| end | |
| def sales_by_month(amount) | |
| sales_distribution(SALES_PER_MONTH_DISTRIBUTION, amount) | |
| end | |
| def cities_monthly_sales_per_year | |
| cities_total_sales_per_year.map do |k, v| | |
| [k, sales_by_month(v)] | |
| end.to_h | |
| end | |
| def generate_random_amount(average) | |
| (Gaussian.rnd * (average * 2)).round | |
| end | |
| def generate_all_sales_per_city(years) | |
| results = CITIES_POPULATION.keys.map { |city| [city, []] }.to_h | |
| ((2019 - years)..2019).each do |year| | |
| cities_monthly_sales_per_year.each do |sales| | |
| city, sales_per_month = sales | |
| sales_per_month.each_with_index do |amount, i| | |
| month = i + 1 | |
| days_in_month = if month == 12 | |
| (Date.new(year + 1, 1, 1) - Date.new(year, month, 1)).numerator | |
| else | |
| (Date.new(year, month + 1, 1) - Date.new(year, month, 1)).numerator | |
| end | |
| sales_distribution(generate_random_distribution(days_in_month), amount).each_with_index do |day_sales, j| | |
| day = j + 1 | |
| results[city] << [Date.new(year, month, day), day_sales] | |
| end | |
| end | |
| end | |
| end | |
| results | |
| end | |
| # 41% of an ecommerce store’s revenue is created by only 8% of its customers; | |
| # this 8% is made up of your repeat customers, making it clear that they are extremely profitable! | |
| def generate_sales_per_day(city, date, amount) | |
| num_of_sales = (amount / AVERAGE_SALE_PER_CUSTOMER).ceil | |
| num_of_sales.times { generate_sale(city, date) } | |
| end | |
| def generate_sale(city, date) | |
| this_sale_amount = generate_random_amount(AVERAGE_SALE_PER_CUSTOMER) | |
| total_so_far = 0.0 | |
| products = [] | |
| iterations = 0 | |
| while true | |
| iterations += 1 | |
| # Grab random product | |
| product = PRODUCT_CATALOG[SecureRandom.random_number(PRODUCT_CATALOG.size)] | |
| id = product[0] | |
| price = product[1] | |
| # If price exceeds our "budget" iterate again | |
| next if price + total_so_far > this_sale_amount + SALE_ERROR_RANGE | |
| # We can afford it, buy it. | |
| products << id | |
| total_so_far += price | |
| # If we are between range, stop buying. | |
| break if (total_so_far - this_sale_amount).abs <= SALE_ERROR_RANGE | |
| # We don't want to loop forever. | |
| break if iterations >= 25 | |
| end | |
| # Buy a random product if we ran out of iterations and didnt select anything. | |
| products << PRODUCT_CATALOG[SecureRandom.random_number(PRODUCT_CATALOG.size)][0] if products.empty? | |
| shopping_list = products.group_by(&:itself).map { |k, v| [k, v.size] }.to_h | |
| customer = get_customer(city) | |
| employee = get_employee(city) | |
| sale = Sale.create( | |
| sale_date: date, | |
| total: total_so_far, | |
| subtotal: total_so_far, | |
| customer_id: customer.id, | |
| employee_id: employee.id, | |
| branch_id: BRANCHES[city].id | |
| ) | |
| shopping_list.each do |product, how_many| | |
| SaleItem.create( | |
| sale_id: sale.id, | |
| product_id: product, | |
| quantity: how_many, | |
| total: PRODUCT_PRICES[product] * how_many | |
| ) | |
| end | |
| end | |
| def get_customer(city) | |
| query = Customer.where(branch_id: BRANCHES[city].id) | |
| new_customer = query.count < 100 || SecureRandom.random_number > 0.4 | |
| if new_customer | |
| generate_customer(city) | |
| else | |
| minmax = query.select('max(id), min(id)').take | |
| min, max = minmax.min, minmax.max | |
| percentual_max = ((max - min) * 0.08).ceil + min | |
| query.find(SecureRandom.random_number(min..percentual_max)) | |
| end | |
| end | |
| def get_employee(city) | |
| Employee.where(branch_id: BRANCHES[city].id).order("random()").first | |
| end | |
| def generate_customer(city) | |
| Customer.create( | |
| name: Faker::Name.name, | |
| address: Faker::Address.street_address, | |
| city: BRANCHES[city].city, | |
| state: BRANCHES[city].state, | |
| country: BRANCHES[city].country, | |
| date_of_birth: Date.new(Date.today.year - Gaussian.random_int(18, 65), | |
| SecureRandom.random_number(1..12), | |
| SecureRandom.random_number(1..27)), | |
| gender: GENDERS[SecureRandom.random_number(2)], | |
| branch_id: BRANCHES[city].id | |
| ) | |
| end | |
| if Category.count.zero? | |
| 20.times.map { Faker::Commerce.department }.uniq.each do |name| | |
| Category.create!(description: name) | |
| end | |
| end | |
| if Product.count.zero? | |
| categories = Category.all.pluck(:id) | |
| brands = 20.times.map { Faker::Company.name }.uniq | |
| PRODUCT_COUNT.times.map { Faker::Commerce.product_name }.uniq.each do |name| | |
| price = ((SecureRandom.random_number * (MAX_PRICE_RANGE - MIN_PRICE_RANGE)) + MIN_PRICE_RANGE).round | |
| price = price - price % 50 - 0.01 | |
| Product.create!( | |
| name: name, | |
| price: price, | |
| brand: brands[SecureRandom.random_number(brands.count)], | |
| category_id: categories[SecureRandom.random_number * categories.count] | |
| ) | |
| end | |
| end | |
| if Branch.count.zero? | |
| Branch.create( | |
| name: "CDMX", | |
| country: "México", | |
| state: "Ciudad de México", | |
| city: "Ciudad de México", | |
| address: Faker::Address.street_address, | |
| manager: Faker::Name.name | |
| ) | |
| Branch.create( | |
| name: "HMO", | |
| country: "México", | |
| state: "Sonora", | |
| city: "Hermosillo", | |
| address: Faker::Address.street_address, | |
| manager: Faker::Name.name | |
| ) | |
| Branch.create( | |
| name: "GDL", | |
| country: "México", | |
| state: "Jalisco", | |
| city: "Guadalajara", | |
| address: Faker::Address.street_address, | |
| manager: Faker::Name.name | |
| ) | |
| Branch.create( | |
| name: "MTY", | |
| country: "México", | |
| state: "Monterrey", | |
| city: "Nuevo León", | |
| address: Faker::Address.street_address, | |
| manager: Faker::Name.name | |
| ) | |
| end | |
| BRANCHES = Branch.all.map { |b| [b.name, b] }.to_h | |
| NUM_EMPLOYEES = CITIES_POPULATION.map { |k, v| [k, (v * EMPLOYEES_PER_POPULATION_RATIO).ceil] }.to_h | |
| GENDERS = ['F', 'M'] | |
| PRODUCT_CATALOG = Product.pluck(:id, :price) | |
| PRODUCT_PRICES = Product.pluck(:id, :price).to_h | |
| if Employee.count.zero? | |
| NUM_EMPLOYEES.each do |city, employees| | |
| employees.times do | |
| Employee.create( | |
| name: Faker::Name.name, | |
| city: BRANCHES[city].city, | |
| state: BRANCHES[city].state, | |
| country: BRANCHES[city].country, | |
| date_of_birth: Date.new(Date.today.year - Gaussian.random_int(18, 65), SecureRandom.random_number(12) + 1, SecureRandom.random_number(27) + 1), | |
| gender: GENDERS[SecureRandom.random_number(2)], | |
| branch_id: BRANCHES[city].id | |
| ) | |
| end | |
| end | |
| end | |
| if Sale.count.zero? | |
| sales = generate_all_sales_per_city(4) | |
| how_many_total = sales.values.map(&:count).sum | |
| how_many_generated = 0 | |
| how_many_still_left = 0 | |
| elapsed_seconds = 0 | |
| sales.each do |city, sales_list| | |
| sales_list.each do |sale| | |
| date, amount = sale | |
| start = Time.now.to_i | |
| generate_sales_per_day(city, date, amount) | |
| finish = Time.now.to_i | |
| elapsed_seconds += finish - start | |
| how_many_generated += 1 | |
| how_many_still_left = how_many_total - how_many_generated | |
| print "\e[2J\e[f" | |
| puts "(#{how_many_generated} / #{how_many_total}): processing: #{city} #{date}, elapsed: #{(elapsed_seconds / 60.0).round(2)} mins, ETA: #{(elapsed_seconds / how_many_generated.to_f * how_many_still_left / 60.0).round(2)} mins." | |
| end | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment