appendjeff · December 12, 2016 03:37
diff --git a/match_date.py b/match_date.py
 import re

 class MatchDate(object):
    """A date object matched from a syllabus."""

    day = 0
    month = 0
    year = 0
    value_words = ["exam","project","assignment","homework","presentation"]


    def __init__(self, start_index, end_index, match):
        self.start_index = start_index
        self.end_index = end_index
        self.match = match
        self.ass_words = []

    @staticmethod
    def get_dates(document):
        all_matches = []
        for m in re.finditer(r"\d{1,4}[-./]\d{1,4}[-./]\d{1,4}", document):
            date = MatchDate(m.start(0), m.end(0), m.group(0))
            date.parse_match_date()
            date.determine_value_words(document)
            all_matches.append(date)
        return all_matches

    def parse_match_date(self):
        comma_match = self.match.replace('-',',').replace('/',',').replace('.',',')
        split_match = map(int, comma_match.split(','))
        for i, el in enumerate(split_match):
            if el > 31:
                split_match[i] = 0
                self.year = el
                break
        if self.year == 0:
            self.year = split_match[2]
            split_match[2] = 0
        if self.year < 100:
            self.year = 2000 + self.year

        for el in split_match:
            if el != 0:
                if self.month == 0:
                    self.month = el
                else:
                    self.day = el
        if self.month > 12:
            self.month, self.day = self.day, self.month

    def determine_value_words(self, doc):
        char_distance = 25
        start_loc = self.start_index - char_distance
        end_loc = self.end_index + char_distance

        if start_loc < 0:
            start_loc = 0
        if end_loc > len(doc):
            end_loc = len(doc)

        search_doc = doc[start_loc:end_loc].lower()
        for value_word in self.value_words:
            if search_doc.find(value_word) != -1:
                self.ass_words.append(value_word)

                
                

 # ===========================
 # The Extractor.java file
 # ===========================
 def print_date_results(dates):
    for date in dates:
        print("========")
        print("Year: %s Month: %s Day: %s" % (date.year,
            date.month, date.day))
        print(','.join(date.ass_words))

 def main():
    document =  "this is a string 5 6 3256 12:15 thwuioan exam 6/12/1996 17/12/2016 18-09-2016 project 17/12/2016 18.09.2016 02/12/2222 assignment/ 2/22/2222 34/31/11"; 
    dates = MatchDate.get_dates(document)
    print_date_results(dates)


 if __name__ == "__main__":
    main()
	import re

	class MatchDate(object):
	"""A date object matched from a syllabus."""

	day = 0
	month = 0
	year = 0
	value_words = ["exam","project","assignment","homework","presentation"]


	def __init__(self, start_index, end_index, match):
	self.start_index = start_index
	self.end_index = end_index
	self.match = match
	self.ass_words = []

	@staticmethod
	def get_dates(document):
	all_matches = []
	for m in re.finditer(r"\d{1,4}[-./]\d{1,4}[-./]\d{1,4}", document):
	date = MatchDate(m.start(0), m.end(0), m.group(0))
	date.parse_match_date()
	date.determine_value_words(document)
	all_matches.append(date)
	return all_matches

	def parse_match_date(self):
	comma_match = self.match.replace('-',',').replace('/',',').replace('.',',')
	split_match = map(int, comma_match.split(','))
	for i, el in enumerate(split_match):
	if el > 31:
	split_match[i] = 0
	self.year = el
	break
	if self.year == 0:
	self.year = split_match[2]
	split_match[2] = 0
	if self.year < 100:
	self.year = 2000 + self.year

	for el in split_match:
	if el != 0:
	if self.month == 0:
	self.month = el
	else:
	self.day = el
	if self.month > 12:
	self.month, self.day = self.day, self.month

	def determine_value_words(self, doc):
	char_distance = 25
	start_loc = self.start_index - char_distance
	end_loc = self.end_index + char_distance

	if start_loc < 0:
	start_loc = 0
	if end_loc > len(doc):
	end_loc = len(doc)

	search_doc = doc[start_loc:end_loc].lower()
	for value_word in self.value_words:
	if search_doc.find(value_word) != -1:
	self.ass_words.append(value_word)




	# ===========================
	# The Extractor.java file
	# ===========================
	def print_date_results(dates):
	for date in dates:
	print("========")
	print("Year: %s Month: %s Day: %s" % (date.year,
	date.month, date.day))
	print(','.join(date.ass_words))

	def main():
	document = "this is a string 5 6 3256 12:15 thwuioan exam 6/12/1996 17/12/2016 18-09-2016 project 17/12/2016 18.09.2016 02/12/2222 assignment/ 2/22/2222 34/31/11";
	dates = MatchDate.get_dates(document)
	print_date_results(dates)


	if __name__ == "__main__":
	main()
No results found