Last active
January 15, 2019 16:06
-
-
Save imamdigmi/f9c2247cdb4b7acf20acc9f99f7a0c17 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import csv | |
| import json | |
| import pytz | |
| from pytz import timezone | |
| from datetime import datetime | |
| from dateutil import parser | |
| BASE_DIR = 'path/to/dataset/dir' | |
| csv_set = os.path.join(BASE_DIR, 'driver_registration.csv') | |
| json_set = os.path.join(BASE_DIR, 'driver_registration.json') | |
| fields = ("id", "date_created", "date_last_modified", "active_date", "name", \ | |
| "phone", "resign_date", "resign_reason", "status", "tipe", "area", \ | |
| "CONCAT('operator_',id)", "modified_by", "vehicle_type", "helmet_qty", \ | |
| "jacket_qty", "vehicle_brand", "vehicle_year", "bike_type", \ | |
| "first_ride_bonus_awarded", "is_doc_completed") | |
| def date_format(d): | |
| """Date format ISO 8601""" | |
| if d is None or d == "": | |
| return None | |
| date = parser.parse(d) | |
| timezone = pytz.timezone("Asia/Jakarta") | |
| date_aware = timezone.localize(date) | |
| return date_aware.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3]+"Z" | |
| with open(csv_set, 'r') as csv_data: | |
| reader = csv.DictReader(csv_data, fields) | |
| with open(json_set, 'w') as jsonfile: | |
| for row in reader: | |
| # Wrangling data | |
| for key in fields: | |
| if row[key] == 'NULL': | |
| row[key] = None | |
| if isinstance(row['id'], str) and row['id'] is not None: | |
| row['id'] = int(row['id']) | |
| if isinstance(row['vehicle_year'], str) and row['vehicle_year'] is not None: | |
| row['vehicle_year'] = float(row['vehicle_year']) | |
| if isinstance(row['status'], str) and row['status'] is not None: | |
| row['status'] = int(row['status']) | |
| if isinstance(row['tipe'], str) and row['tipe'] is not None: | |
| row['tipe'] = int(row['tipe']) | |
| if isinstance(row['area'], str) and row['area'] is not None: | |
| row['area'] = int(row['area']) | |
| if isinstance(row['first_ride_bonus_awarded'], str) and row['first_ride_bonus_awarded'] is not None: | |
| row['first_ride_bonus_awarded'] = row['first_ride_bonus_awarded'].replace(u'\\0', u'\000') | |
| row['date_created'] = date_format(row['date_created']) | |
| row['date_last_modified'] = date_format(row['date_last_modified']) | |
| row['active_date'] = date_format(row['active_date']) | |
| row['resign_date'] = date_format(row['resign_date']) | |
| # Dump and save to json file | |
| json.dump(row, jsonfile) | |
| jsonfile.write('\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment