Skip to content

Instantly share code, notes, and snippets.

@imamdigmi
Last active January 15, 2019 16:06
Show Gist options
  • Select an option

  • Save imamdigmi/f9c2247cdb4b7acf20acc9f99f7a0c17 to your computer and use it in GitHub Desktop.

Select an option

Save imamdigmi/f9c2247cdb4b7acf20acc9f99f7a0c17 to your computer and use it in GitHub Desktop.
import os
import csv
import json
import pytz
from pytz import timezone
from datetime import datetime
from dateutil import parser
BASE_DIR = 'path/to/dataset/dir'
csv_set = os.path.join(BASE_DIR, 'driver_registration.csv')
json_set = os.path.join(BASE_DIR, 'driver_registration.json')
fields = ("id", "date_created", "date_last_modified", "active_date", "name", \
"phone", "resign_date", "resign_reason", "status", "tipe", "area", \
"CONCAT('operator_',id)", "modified_by", "vehicle_type", "helmet_qty", \
"jacket_qty", "vehicle_brand", "vehicle_year", "bike_type", \
"first_ride_bonus_awarded", "is_doc_completed")
def date_format(d):
"""Date format ISO 8601"""
if d is None or d == "":
return None
date = parser.parse(d)
timezone = pytz.timezone("Asia/Jakarta")
date_aware = timezone.localize(date)
return date_aware.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3]+"Z"
with open(csv_set, 'r') as csv_data:
reader = csv.DictReader(csv_data, fields)
with open(json_set, 'w') as jsonfile:
for row in reader:
# Wrangling data
for key in fields:
if row[key] == 'NULL':
row[key] = None
if isinstance(row['id'], str) and row['id'] is not None:
row['id'] = int(row['id'])
if isinstance(row['vehicle_year'], str) and row['vehicle_year'] is not None:
row['vehicle_year'] = float(row['vehicle_year'])
if isinstance(row['status'], str) and row['status'] is not None:
row['status'] = int(row['status'])
if isinstance(row['tipe'], str) and row['tipe'] is not None:
row['tipe'] = int(row['tipe'])
if isinstance(row['area'], str) and row['area'] is not None:
row['area'] = int(row['area'])
if isinstance(row['first_ride_bonus_awarded'], str) and row['first_ride_bonus_awarded'] is not None:
row['first_ride_bonus_awarded'] = row['first_ride_bonus_awarded'].replace(u'\\0', u'\000')
row['date_created'] = date_format(row['date_created'])
row['date_last_modified'] = date_format(row['date_last_modified'])
row['active_date'] = date_format(row['active_date'])
row['resign_date'] = date_format(row['resign_date'])
# Dump and save to json file
json.dump(row, jsonfile)
jsonfile.write('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment