Skip to content

Instantly share code, notes, and snippets.

@selik
Forked from jschombe/yelpclusterfinder.py
Last active December 23, 2015 05:09
Show Gist options
  • Select an option

  • Save selik/6585371 to your computer and use it in GitHub Desktop.

Select an option

Save selik/6585371 to your computer and use it in GitHub Desktop.
#yelp cluster finder
import re
import collections
def yelp_find(filename,kw):
open_filename = open (filename, "rU")
kw_list=[]
business_dict={}
for line in open_filename:
if kw in line:
print line
kw_list.append(line)
def yelp_find_busid_and_date(filename,kw):
open_filename = open (filename, "rU")
yelp_business_id_and_dates = re.findall('(\d{4}[-/]\d{2}[-/]\d{2})(.+)(\"business_id\"\: \"......................\"\})', open_filename.read())
for tuple in yelp_business_id_and_dates:
business_id = tuple[2]
date = tuple[0]
print tuple[2]+tuple[0]
#print result
def yelp_cluster_size(filename, kw):
#"this line filters out all reviews with the defined keyword"
#keyword_list =[]
import re
import collections
from collections import defaultdict
business_dict = collections.defaultdict(list)
#for line in filename:
#if keyword in line:
#keyword_list.append(line)
#print line
open_filename = open (filename, "rU")
yelp_business_id_and_dates = re.findall(r'(\d{4}[-/]\d{2}[-/]\d{2})(.+)(\"business_id\"\: \"......................\"\})',open_filename.read())
for tuple in yelp_business_id_and_dates:
businessid = tuple[2]
date = tuple[0]
business_dict[businessid].append(date)
date_clusters = dict([k,v] for k,v in business_dict.items() if len(v) > 1)
print date_clusters
def days_between_events(filename, numberdays):
import time
import re
import collections
from collections import defaultdict
business_dict = collections.defaultdict(list)
open_filename = open (filename, "rU")
yelp_business_id_and_dates = re.findall(r'(\d{4}[-/]\d{2}[-/]\d{2})(.+)(\"business_id\"\: \"......................\"\})',open_filename.read())
for tuple in yelp_business_id_and_dates:
businessid = tuple[2]
dates = tuple[0]
#print dates + businessid
for businessid, dates in business_dict.iteritems():
business_dict[businessid]=sorted(dates)
{businessid:dates for businessid,dates in business_dict.iteritems()if dates[0].day-dates[-1].day<2}
#date_object =datetime.strptime(dates, '(%Y[-/]%m[-/]%d)'
#print date_object
#print result
#
#if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment