Created
October 26, 2016 19:39
-
-
Save tanimislam/55fd45c3936a97d2748c75796bcfa6d5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| from icalendar import Calendar, Event | |
| from bs4 import BeautifulSoup | |
| import pytz, datetime, os, sys | |
| from requests.compat import urljoin | |
| from optparse import OptionParser | |
| def get_events_from_html( htmlfile ): | |
| html = BeautifulSoup( open( htmlfile, 'r' ), 'lxml' ) | |
| all_times_elems = html.find_all('p')[::2][:-1] | |
| all_event_elems = html.find_all('p')[1::2][:-1] | |
| # | |
| all_elems_to_ical = [ ] | |
| tz = pytz.timezone('US/Pacific') | |
| for time_elem, event_elem in filter(None, zip(all_times_elems, all_event_elems)): | |
| time_strings = filter(lambda line: line.strip() != '', | |
| time_elem.text.splitlines() ) | |
| day = time_strings[0].split(',')[0].strip() | |
| if day not in ('Monday', 'Tuesday', 'Wednesday', 'Thursday', | |
| 'Friday', 'Saturday', 'Sunday' ): | |
| continue | |
| day_string = ', '.join( map(lambda tok: tok.strip(), time_strings[0].split(',')[1:] ) ) | |
| start, stop = time_strings[1].split(u'\u2013') | |
| start = ' '.join( start.split()) | |
| stop = ' '.join( stop.split()) | |
| string_start = '%s %s' % ( day_string, start ) | |
| string_stop = '%s %s' % ( day_string, stop ) | |
| dtstart = datetime.datetime.strptime( string_start, '%B %d, %Y %I:%M %p' ) | |
| dtend = datetime.datetime.strptime( string_stop, '%B %d, %Y %I:%M %p' ) | |
| tz.localize( dtstart ) | |
| tz.localize( dtend ) | |
| # | |
| event_strings = filter(lambda line: line.strip() != '', | |
| event_elem.text.splitlines() ) | |
| try: | |
| href = max( event_elem.findAll('a', href=True ) ) | |
| event_name = href.text.strip( ) | |
| link = urljoin('http://meetings.aps.org/', href.attrs['href']) | |
| except: | |
| event_name = event_strings[0].strip( ) | |
| link = None | |
| event_loc = event_strings[1].strip( ) | |
| all_elems_to_ical.append( ( dtstart, dtend, event_name, event_loc, link ) ) | |
| return all_elems_to_ical | |
| def create_ical_from_elems( all_elems_to_ical, icalFile ): | |
| assert( icalFile.endswith('.ics') ) | |
| cal = Calendar( ) | |
| cal.add('prodid', '-//My calendar product//[email protected]//') | |
| cal.add('version', '2.0') | |
| dtnow = datetime.datetime.now( ) | |
| tz = pytz.timezone('US/Pacific') | |
| tz.localize( dtnow ) | |
| for elem in all_elems_to_ical: | |
| dtstart, dtend, event_name, event_loc, link = elem | |
| event = Event( ) | |
| event.add('summary', event_name ) | |
| event.add('dtstart', dtstart ) | |
| event.add('dtend', dtend ) | |
| event.add('dtstamp', dtnow ) | |
| event.add('location', event_loc ) | |
| if link is not None: | |
| event.add('url', link ) | |
| cal.add_component( event ) | |
| with open( icalFile, 'wb') as openfile: | |
| openfile.write( cal.to_ical( ) ) | |
| if __name__=='__main__': | |
| parser = OptionParser( ) | |
| parser.add_option( '--html', dest='html', type=str, action='store', | |
| help = 'Name of the APS DPP 2016 HTML prettified HTML file.') | |
| parser.add_option( '--ics', dest='ics', type=str, action='store', | |
| help = 'Name of the APS DPP 2016 ICS file to create.') | |
| opts, args = parser.parse_args( ) | |
| assert(all(map(lambda tok: tok is not None, ( opts.html, opts.ics ) ) ) ) | |
| assert( os.path.isfile( opts.html ) ) | |
| assert( opts.html.endswith( '.html' ) ) | |
| assert( opts.ics.endswith( '.ics' ) ) | |
| # | |
| all_elems = get_events_from_html( opts.html ) | |
| create_ical_from_elems( all_elems, opts.ics ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment