Skip to content

Instantly share code, notes, and snippets.

@tanimislam
Created October 26, 2016 19:39
Show Gist options
  • Select an option

  • Save tanimislam/55fd45c3936a97d2748c75796bcfa6d5 to your computer and use it in GitHub Desktop.

Select an option

Save tanimislam/55fd45c3936a97d2748c75796bcfa6d5 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from icalendar import Calendar, Event
from bs4 import BeautifulSoup
import pytz, datetime, os, sys
from requests.compat import urljoin
from optparse import OptionParser
def get_events_from_html( htmlfile ):
html = BeautifulSoup( open( htmlfile, 'r' ), 'lxml' )
all_times_elems = html.find_all('p')[::2][:-1]
all_event_elems = html.find_all('p')[1::2][:-1]
#
all_elems_to_ical = [ ]
tz = pytz.timezone('US/Pacific')
for time_elem, event_elem in filter(None, zip(all_times_elems, all_event_elems)):
time_strings = filter(lambda line: line.strip() != '',
time_elem.text.splitlines() )
day = time_strings[0].split(',')[0].strip()
if day not in ('Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday' ):
continue
day_string = ', '.join( map(lambda tok: tok.strip(), time_strings[0].split(',')[1:] ) )
start, stop = time_strings[1].split(u'\u2013')
start = ' '.join( start.split())
stop = ' '.join( stop.split())
string_start = '%s %s' % ( day_string, start )
string_stop = '%s %s' % ( day_string, stop )
dtstart = datetime.datetime.strptime( string_start, '%B %d, %Y %I:%M %p' )
dtend = datetime.datetime.strptime( string_stop, '%B %d, %Y %I:%M %p' )
tz.localize( dtstart )
tz.localize( dtend )
#
event_strings = filter(lambda line: line.strip() != '',
event_elem.text.splitlines() )
try:
href = max( event_elem.findAll('a', href=True ) )
event_name = href.text.strip( )
link = urljoin('http://meetings.aps.org/', href.attrs['href'])
except:
event_name = event_strings[0].strip( )
link = None
event_loc = event_strings[1].strip( )
all_elems_to_ical.append( ( dtstart, dtend, event_name, event_loc, link ) )
return all_elems_to_ical
def create_ical_from_elems( all_elems_to_ical, icalFile ):
assert( icalFile.endswith('.ics') )
cal = Calendar( )
cal.add('prodid', '-//My calendar product//[email protected]//')
cal.add('version', '2.0')
dtnow = datetime.datetime.now( )
tz = pytz.timezone('US/Pacific')
tz.localize( dtnow )
for elem in all_elems_to_ical:
dtstart, dtend, event_name, event_loc, link = elem
event = Event( )
event.add('summary', event_name )
event.add('dtstart', dtstart )
event.add('dtend', dtend )
event.add('dtstamp', dtnow )
event.add('location', event_loc )
if link is not None:
event.add('url', link )
cal.add_component( event )
with open( icalFile, 'wb') as openfile:
openfile.write( cal.to_ical( ) )
if __name__=='__main__':
parser = OptionParser( )
parser.add_option( '--html', dest='html', type=str, action='store',
help = 'Name of the APS DPP 2016 HTML prettified HTML file.')
parser.add_option( '--ics', dest='ics', type=str, action='store',
help = 'Name of the APS DPP 2016 ICS file to create.')
opts, args = parser.parse_args( )
assert(all(map(lambda tok: tok is not None, ( opts.html, opts.ics ) ) ) )
assert( os.path.isfile( opts.html ) )
assert( opts.html.endswith( '.html' ) )
assert( opts.ics.endswith( '.ics' ) )
#
all_elems = get_events_from_html( opts.html )
create_ical_from_elems( all_elems, opts.ics )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment