Skip to content

Instantly share code, notes, and snippets.

@nolanlum
Created April 25, 2013 22:53
Show Gist options
  • Select an option

  • Save nolanlum/5463896 to your computer and use it in GitHub Desktop.

Select an option

Save nolanlum/5463896 to your computer and use it in GitHub Desktop.
import sqlite3, datetime, HTMLParser, re, sys
from datetime import datetime
CHAT_ID = '#newyorkkid618/$68805f313f49f8a'
IN_FILE = 'main.db'
OUT_FILE = 'dump.txt'
def format_time(t):
TIME_FORMAT = '{}/{}/{} {}:{:02n}:{:02n} {}'
return TIME_FORMAT.format(t.month, t.day, t.year, t.hour if t.hour < 13 else t.hour - 12, t.minute, t.second, "AM" if t.hour < 13 else "PM")
def format_line(line):
LINE_FORMAT_NORMAL = u"[{timestamp}] {author} ({from_dispname}): {message}\n"
LINE_FORMAT_SELF_EDIT = u"[{timestamp} (edited {edited_timestamp})] {author} ({from_dispname}): {message}\n"
LINE_FORMAT_OTHER_EDIT = u"[{timestamp} (edited by {edited_by} at {edited_timestamp})] {author} ({from_dispname}): {message}\n"
if line['edited_by'] is not None:
fmt = LINE_FORMAT_SELF_EDIT if line['edited_by'] == line['author'] else LINE_FORMAT_OTHER_EDIT
else:
fmt = LINE_FORMAT_NORMAL
return fmt.format(**line)
if __name__ == "__main__":
db = sqlite3.Connection(IN_FILE)
c = db.cursor()
h = HTMLParser.HTMLParser()
count = c.execute("SELECT COUNT(*) FROM Messages WHERE chatname = '{}' AND chatmsg_type = 3".format(CHAT_ID)).fetchone()[0]
rows = c.execute("SELECT timestamp, author, from_dispname, edited_by, edited_timestamp, body_xml FROM Messages WHERE chatname = '{}' AND chatmsg_type = 3 ORDER BY id ASC".format(CHAT_ID))
f = open(OUT_FILE, 'w')
for x in enumerate(rows):
timestamp, author, from_dispname, edited_by, edited_timestamp, message = x[1]
message = message.replace("\r\n", "\n")
message = re.sub('<[^<]+?>', '', message)
message = h.unescape(message)
timestamp = format_time(datetime.fromtimestamp(timestamp))
line = {u'timestamp': timestamp, u'author': author, u'from_dispname': from_dispname, u'edited_by': edited_by,
u'edited_timestamp': edited_timestamp, u'message': message}
f.write(format_line(line).encode('utf-8'))
if x[0] % 1000 == 0:
sys.stdout.write("\r{}/{}".format(x[0], count))
print "\rProcessed {} lines.".format(count)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment