Skip to content

Instantly share code, notes, and snippets.

@appendjeff
Created December 12, 2016 03:37
Show Gist options
  • Select an option

  • Save appendjeff/8f473ff9e745b4e15a365e92d963aac0 to your computer and use it in GitHub Desktop.

Select an option

Save appendjeff/8f473ff9e745b4e15a365e92d963aac0 to your computer and use it in GitHub Desktop.
Comparing java to python for education!
import re
class MatchDate(object):
"""A date object matched from a syllabus."""
day = 0
month = 0
year = 0
value_words = ["exam","project","assignment","homework","presentation"]
def __init__(self, start_index, end_index, match):
self.start_index = start_index
self.end_index = end_index
self.match = match
self.ass_words = []
@staticmethod
def get_dates(document):
all_matches = []
for m in re.finditer(r"\d{1,4}[-./]\d{1,4}[-./]\d{1,4}", document):
date = MatchDate(m.start(0), m.end(0), m.group(0))
date.parse_match_date()
date.determine_value_words(document)
all_matches.append(date)
return all_matches
def parse_match_date(self):
comma_match = self.match.replace('-',',').replace('/',',').replace('.',',')
split_match = map(int, comma_match.split(','))
for i, el in enumerate(split_match):
if el > 31:
split_match[i] = 0
self.year = el
break
if self.year == 0:
self.year = split_match[2]
split_match[2] = 0
if self.year < 100:
self.year = 2000 + self.year
for el in split_match:
if el != 0:
if self.month == 0:
self.month = el
else:
self.day = el
if self.month > 12:
self.month, self.day = self.day, self.month
def determine_value_words(self, doc):
char_distance = 25
start_loc = self.start_index - char_distance
end_loc = self.end_index + char_distance
if start_loc < 0:
start_loc = 0
if end_loc > len(doc):
end_loc = len(doc)
search_doc = doc[start_loc:end_loc].lower()
for value_word in self.value_words:
if search_doc.find(value_word) != -1:
self.ass_words.append(value_word)
# ===========================
# The Extractor.java file
# ===========================
def print_date_results(dates):
for date in dates:
print("========")
print("Year: %s Month: %s Day: %s" % (date.year,
date.month, date.day))
print(','.join(date.ass_words))
def main():
document = "this is a string 5 6 3256 12:15 thwuioan exam 6/12/1996 17/12/2016 18-09-2016 project 17/12/2016 18.09.2016 02/12/2222 assignment/ 2/22/2222 34/31/11";
dates = MatchDate.get_dates(document)
print_date_results(dates)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment