Ut utsavsabharwal

utsavsabharwal / crawler.py

Created September 4, 2012 08:57

Featch top 10 urls on Google for a given list of UPC

	#!/usr/bin/env python

	"""crawler.py: Featch top 10 urls on Google for a given list of UPC"""
	__author__ = "Utsav Sabharwal"


	import sys
	import zlib
	import time
	import pycurl

utsavsabharwal / fb.py

Created August 24, 2012 10:16

Facebook Graph API Authentication

	import traceback

	def create_user_authentication_url(client_id, redirect_uri, scope = None, state = None, response_type = None, display = None):
	try:
	try:
	client_id = str(int(client_id))+traceback.format_exc()
	except Exception:
	raise Exception, "\n\n <== Client ID must be an integer ==>\n\n"+traceback.format_exc()
	query = "https://www.facebook.com/dialog/oauth/?client_id="+client_id+"&redirect_uri="+redirect_uri
	if state:

utsavsabharwal / crawler.py

Created August 20, 2012 10:10

Web Crawler

	#__version__:0.3
	#__authot__:Utsav Sabharwal

	'''
	Features:

	* Flush in real time the success, update, insert files enteries.
	* Upload in real time at the same time to SQS and S3

	TODO:

utsavsabharwal / sqs_count.py

Created August 20, 2012 05:35

Get SQS Count

	import gzip
	import base64
	import simplejson
	from cStringIO import StringIO
	import boto.exception
	import boto.s3
	import boto.s3.connection
	import boto.s3.key
	import traceback
	import boto.sqs

utsavsabharwal / mysql-python-api-example.py

Created August 17, 2012 05:28

MySQL Python API Example

	from datetime import datetime
	import hashlib
	import MySQLdb
	conn = MySQLdb.connect (host = "localhost", user = "root", db = "spider")
	cursor = conn.cursor ()

	#INSERTION/UPDATE Statements
	sql="insert ignore into url_queue select * from something"
	cursor.execute(sql)
	conn.commit()

utsavsabharwal / python xpath example

Created July 30, 2012 04:02

How to fetch information from a html page using xpath in python

	from lxml import etree
	tree = etree.HTML(html_content)
	result = tree.xpath('.//*[@id="BVRRRatingSummarySourceID"]/div/div/div/div[1]/span/span/text()')
	#result might be an array or just text depending upon what xpath was