Skip to content

Instantly share code, notes, and snippets.

@Amareis
Created April 3, 2016 00:19
Show Gist options
  • Select an option

  • Save Amareis/b0a723e698783929c5b1f6c81d412f85 to your computer and use it in GitHub Desktop.

Select an option

Save Amareis/b0a723e698783929c5b1f6c81d412f85 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib notebook\n",
"\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd, numpy as np, requests, json\n",
"\n",
"from tqdm import tqdm\n",
"from pandas.io.json import json_normalize\n",
"\n",
"matplotlib.style.use('ggplot')"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from html.parser import HTMLParser\n",
"\n",
"class MLStripper(HTMLParser):\n",
" def __init__(self):\n",
" self.reset()\n",
" self.strict = False\n",
" self.convert_charrefs= True\n",
" self.fed = []\n",
" def handle_data(self, d):\n",
" self.fed.append(d)\n",
" def get_data(self):\n",
" return ''.join(self.fed)\n",
"\n",
"def strip_tags(html):\n",
" html = html.replace('<br>', '\\n')\n",
" s = MLStripper()\n",
" s.feed(html)\n",
" return s.get_data()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"('https://2ch.hk/b/catalog.json', 200)"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"board = 'b'\n",
"url = 'https://2ch.hk/{}/catalog.json'.format(board)\n",
"resp = requests.get(url)\n",
"url, resp.status_code"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ops_json = json.loads(resp.text)['threads']"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>comment</th>\n",
" <th>date</th>\n",
" <th>files_count</th>\n",
" <th>num</th>\n",
" <th>posts_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td></td>\n",
" <td>2016-04-02 11:37:27</td>\n",
" <td>0</td>\n",
" <td>122120582</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td></td>\n",
" <td>2016-04-02 16:45:30</td>\n",
" <td>62</td>\n",
" <td>122150646</td>\n",
" <td>189</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Камвхоры и неймфаги двачей/тиречей/сосачей/хар...</td>\n",
" <td>2016-04-02 18:58:13</td>\n",
" <td>67</td>\n",
" <td>122164310</td>\n",
" <td>242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td></td>\n",
" <td>2016-04-02 20:15:14</td>\n",
" <td>1</td>\n",
" <td>122171916</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ЦУЬИ WEBM ВЕЧЕРНИЙ/НОЧНОЙ</td>\n",
" <td>2016-04-02 19:20:06</td>\n",
" <td>291</td>\n",
" <td>122166381</td>\n",
" <td>470</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" comment date \\\n",
"0 2016-04-02 11:37:27 \n",
"1 2016-04-02 16:45:30 \n",
"2 Камвхоры и неймфаги двачей/тиречей/сосачей/хар... 2016-04-02 18:58:13 \n",
"3 2016-04-02 20:15:14 \n",
"4 ЦУЬИ WEBM ВЕЧЕРНИЙ/НОЧНОЙ 2016-04-02 19:20:06 \n",
"\n",
" files_count num posts_count \n",
"0 0 122120582 63 \n",
"1 62 122150646 189 \n",
"2 67 122164310 242 \n",
"3 1 122171916 3 \n",
"4 291 122166381 470 "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"op_droplist = ['subject', 'tags', 'email', 'banned', 'closed', 'op', 'parent',\n",
" 'sticky', 'hidden_num', 'trip', 'name', 'files', 'lasthit', 'timestamp']\n",
"ops = json_normalize(ops_json)\n",
"ops['date'] = pd.to_datetime(ops['timestamp'], unit='s')\n",
"ops['comment'] = ops['comment'].apply(strip_tags)\n",
"ops.drop(op_droplist, 1, inplace=1)\n",
"ops.head()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>duration</th>\n",
" <th>height</th>\n",
" <th>name</th>\n",
" <th>size</th>\n",
" <th>tn_height</th>\n",
" <th>tn_width</th>\n",
" <th>type</th>\n",
" <th>width</th>\n",
" <th>num</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td></td>\n",
" <td>640</td>\n",
" <td>14595970473540.jpg</td>\n",
" <td>60</td>\n",
" <td>220</td>\n",
" <td>165</td>\n",
" <td>1</td>\n",
" <td>480</td>\n",
" <td>122120582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>00:00:32</td>\n",
" <td>1080</td>\n",
" <td>14596155301500.webm</td>\n",
" <td>3298</td>\n",
" <td>123</td>\n",
" <td>220</td>\n",
" <td>6</td>\n",
" <td>1920</td>\n",
" <td>122150646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td></td>\n",
" <td>2592</td>\n",
" <td>14596234938300.jpg</td>\n",
" <td>765</td>\n",
" <td>220</td>\n",
" <td>164</td>\n",
" <td>1</td>\n",
" <td>1936</td>\n",
" <td>122164310</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td></td>\n",
" <td>336</td>\n",
" <td>14596281143980.jpg</td>\n",
" <td>26</td>\n",
" <td>120</td>\n",
" <td>220</td>\n",
" <td>1</td>\n",
" <td>615</td>\n",
" <td>122171916</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>00:01:27</td>\n",
" <td>360</td>\n",
" <td>14596248064220.webm</td>\n",
" <td>5525</td>\n",
" <td>165</td>\n",
" <td>220</td>\n",
" <td>6</td>\n",
" <td>480</td>\n",
" <td>122166381</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duration height name size tn_height tn_width type \\\n",
"0 640 14595970473540.jpg 60 220 165 1 \n",
"1 00:00:32 1080 14596155301500.webm 3298 123 220 6 \n",
"2 2592 14596234938300.jpg 765 220 164 1 \n",
"3 336 14596281143980.jpg 26 120 220 1 \n",
"4 00:01:27 360 14596248064220.webm 5525 165 220 6 \n",
"\n",
" width num \n",
"0 480 122120582 \n",
"1 1920 122150646 \n",
"2 1936 122164310 \n",
"3 615 122171916 \n",
"4 480 122166381 "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"opfiles = json_normalize(ops_json, 'files', ['num']).drop(['md5', 'nsfw', 'path', 'thumbnail'], 1)\n",
"opfiles.head()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
},
{
"data": {
"text/plain": [
"'17869 posts loaded from 147 threads!'"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"posts_json = []\n",
"exist_ops = 0\n",
"for i, n in tqdm(enumerate(reversed(ops.num)), mininterval=0.1, total=len(ops), leave=True):\n",
" turl = 'https://2ch.hk/{}/res/{}.json'.format(board, n)\n",
" resp = requests.get(turl)\n",
" if resp.status_code == 200:\n",
" ps = json.loads(resp.text)['threads'][0]['posts']\n",
" posts_json.extend(ps)\n",
" exist_ops += 1\n",
"'{} posts loaded from {} threads'.format(len(posts_json), exist_ops)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>comment</th>\n",
" <th>date</th>\n",
" <th>num</th>\n",
" <th>number</th>\n",
" <th>parent</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>http://slither.io</td>\n",
" <td>2016-04-02 16:06:27</td>\n",
" <td>122146512</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td></td>\n",
" <td>2016-04-02 16:25:44</td>\n",
" <td>122148604</td>\n",
" <td>2</td>\n",
" <td>122146512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td></td>\n",
" <td>2016-04-02 16:33:31</td>\n",
" <td>122149359</td>\n",
" <td>3</td>\n",
" <td>122146512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>&gt;&gt;122149359</td>\n",
" <td>2016-04-02 16:34:54</td>\n",
" <td>122149503</td>\n",
" <td>4</td>\n",
" <td>122146512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td></td>\n",
" <td>2016-04-02 16:35:33</td>\n",
" <td>122149579</td>\n",
" <td>5</td>\n",
" <td>122146512</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" comment date num number parent\n",
"0 http://slither.io 2016-04-02 16:06:27 122146512 1 0\n",
"1 2016-04-02 16:25:44 122148604 2 122146512\n",
"2 2016-04-02 16:33:31 122149359 3 122146512\n",
"3 >>122149359 2016-04-02 16:34:54 122149503 4 122146512\n",
"4 2016-04-02 16:35:33 122149579 5 122146512"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"post_droplist = ['tags', 'subject', 'email', 'banned', 'closed', 'lasthit', 'op',\n",
" 'sticky', 'hidden_num', 'trip', 'name', 'files', 'timestamp']\n",
"posts = json_normalize(posts_json)\n",
"posts['date'] = pd.to_datetime(posts['timestamp'], unit='s')\n",
"posts['comment'] = posts['comment'].apply(strip_tags)\n",
"posts.drop(post_droplist, 1, inplace=1)\n",
"posts.head()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>duration</th>\n",
" <th>height</th>\n",
" <th>name</th>\n",
" <th>size</th>\n",
" <th>tn_height</th>\n",
" <th>tn_width</th>\n",
" <th>type</th>\n",
" <th>width</th>\n",
" <th>num</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td></td>\n",
" <td>661</td>\n",
" <td>14596131871290.png</td>\n",
" <td>1372</td>\n",
" <td>111</td>\n",
" <td>220</td>\n",
" <td>2</td>\n",
" <td>1302</td>\n",
" <td>122146512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td></td>\n",
" <td>589</td>\n",
" <td>14596196055160.jpg</td>\n",
" <td>87</td>\n",
" <td>125</td>\n",
" <td>170</td>\n",
" <td>1</td>\n",
" <td>800</td>\n",
" <td>122157798</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td></td>\n",
" <td>147</td>\n",
" <td>14596251721600.jpg</td>\n",
" <td>9</td>\n",
" <td>147</td>\n",
" <td>220</td>\n",
" <td>1</td>\n",
" <td>220</td>\n",
" <td>122166974</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td></td>\n",
" <td>726</td>\n",
" <td>14595554375820.png</td>\n",
" <td>204</td>\n",
" <td>220</td>\n",
" <td>93</td>\n",
" <td>2</td>\n",
" <td>308</td>\n",
" <td>122084428</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td></td>\n",
" <td>1068</td>\n",
" <td>14595555467730.jpg</td>\n",
" <td>257</td>\n",
" <td>113</td>\n",
" <td>170</td>\n",
" <td>1</td>\n",
" <td>1600</td>\n",
" <td>122084533</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duration height name size tn_height tn_width type \\\n",
"0 661 14596131871290.png 1372 111 220 2 \n",
"1 589 14596196055160.jpg 87 125 170 1 \n",
"2 147 14596251721600.jpg 9 147 220 1 \n",
"3 726 14595554375820.png 204 220 93 2 \n",
"4 1068 14595555467730.jpg 257 113 170 1 \n",
"\n",
" width num \n",
"0 1302 122146512 \n",
"1 800 122157798 \n",
"2 220 122166974 \n",
"3 308 122084428 \n",
"4 1600 122084533 "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"postfiles = json_normalize(posts_json, 'files', ['num']).drop(['md5', 'nsfw', 'path', 'thumbnail'], 1)\n",
"postfiles.head()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(Timestamp('2016-04-02 20:21:21'), 17869, 6776)"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"posts.date[len(posts)-1], len(posts), len(postfiles)"
]
},
{
"cell_type": "code",
"execution_count": 317,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"import re\n",
"#[\\s\\,\\!\\?\\.\\>\\\"\\'\\%\\«\\:]|\n",
"spl = posts.comment.str.split(r'https?://.+|\\d+|\\(OP\\)|[^\\w+]').apply(lambda x: x if x else None).dropna()\n",
"t = []\n",
"for l in spl:\n",
" t.extend(x.lower() for x in l)\n",
"spl = pd.Series(t).apply(\n",
" lambda x: None\n",
" if not x\n",
" or x.startswith('>>')\n",
" or len(x) < 5\n",
" else x\n",
").dropna()"
]
},
{
"cell_type": "code",
"execution_count": 326,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>comment</th>\n",
" <th>date</th>\n",
" <th>num</th>\n",
" <th>number</th>\n",
" <th>parent</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>517</th>\n",
" <td>Буду отписывать в 30 и 00 минут каждого часа (...</td>\n",
" <td>2016-04-02 15:29:52</td>\n",
" <td>122143012</td>\n",
" <td>39</td>\n",
" <td>122135985</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3460</th>\n",
" <td>Сап, двачмурмурмур</td>\n",
" <td>2016-04-02 19:04:39</td>\n",
" <td>122164866</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4016</th>\n",
" <td>Сап, двач. Самки совсем не привлекаюткуны, соб...</td>\n",
" <td>2016-04-02 19:17:06</td>\n",
" <td>122166050</td>\n",
" <td>43</td>\n",
" <td>122162369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4469</th>\n",
" <td>&gt;&gt;122145686 (OP)\\n&gt;2007 – это когда ты молод, ...</td>\n",
" <td>2016-04-02 18:26:16</td>\n",
" <td>122161114</td>\n",
" <td>58</td>\n",
" <td>122145686</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5949</th>\n",
" <td>Сап, братья.\\n\\nhttps://clyp.it/4kxvftup</td>\n",
" <td>2016-04-01 18:38:07</td>\n",
" <td>122054030</td>\n",
" <td>127</td>\n",
" <td>122036249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6524</th>\n",
" <td>&gt;&gt;122081897\\nСап, братья. Иногда мне кажется, ...</td>\n",
" <td>2016-04-01 23:35:12</td>\n",
" <td>122082655</td>\n",
" <td>192</td>\n",
" <td>122053833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7849</th>\n",
" <td>Сап б, сегодня вспомнил про свой аккаунт в Ori...</td>\n",
" <td>2016-04-02 19:24:13</td>\n",
" <td>122166782</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9101</th>\n",
" <td>Трейнсёрфер в треде. Катаю сапсаны раз в недел...</td>\n",
" <td>2016-04-02 13:51:34</td>\n",
" <td>122133188</td>\n",
" <td>302</td>\n",
" <td>122106787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11383</th>\n",
" <td>НЕТ СИЛ НА ДОСТИЖЕНИЯ\\nСап, /b/. Хуй 26 уровня...</td>\n",
" <td>2016-04-02 18:14:00</td>\n",
" <td>122159945</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12464</th>\n",
" <td>сап двач. я нюхнул одну дорожку фена. больше ...</td>\n",
" <td>2016-04-02 19:47:11</td>\n",
" <td>122169166</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13167</th>\n",
" <td>ВкатилсяДля разговоров с Мэри и Кюри по душам\\...</td>\n",
" <td>2016-04-02 17:38:44</td>\n",
" <td>122156392</td>\n",
" <td>8</td>\n",
" <td>122155330</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13420</th>\n",
" <td>&gt;&gt;122169408\\nИнвентарь:\\n5 серебряных монет\\nС...</td>\n",
" <td>2016-04-02 19:54:12</td>\n",
" <td>122169845</td>\n",
" <td>261</td>\n",
" <td>122155330</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13426</th>\n",
" <td>&gt;&gt;122169801\\nДворф второй ботинок поставил тор...</td>\n",
" <td>2016-04-02 19:59:49</td>\n",
" <td>122170393</td>\n",
" <td>267</td>\n",
" <td>122155330</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13452</th>\n",
" <td>&gt;&gt;122170719\\nДворф угомонился, поймал бутылку ...</td>\n",
" <td>2016-04-02 20:18:07</td>\n",
" <td>122172154</td>\n",
" <td>293</td>\n",
" <td>122155330</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13990</th>\n",
" <td>Сап, анон.\\nСегодня прикупил себе аккаунтов.\\n...</td>\n",
" <td>2016-04-02 20:02:52</td>\n",
" <td>122170690</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14306</th>\n",
" <td>&gt;&gt;122161413\\nиз сапога резинового</td>\n",
" <td>2016-04-02 18:36:09</td>\n",
" <td>122162105</td>\n",
" <td>36</td>\n",
" <td>122160914</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" comment date \\\n",
"517 Буду отписывать в 30 и 00 минут каждого часа (... 2016-04-02 15:29:52 \n",
"3460 Сап, двачмурмурмур 2016-04-02 19:04:39 \n",
"4016 Сап, двач. Самки совсем не привлекаюткуны, соб... 2016-04-02 19:17:06 \n",
"4469 >>122145686 (OP)\\n>2007 – это когда ты молод, ... 2016-04-02 18:26:16 \n",
"5949 Сап, братья.\\n\\nhttps://clyp.it/4kxvftup 2016-04-01 18:38:07 \n",
"6524 >>122081897\\nСап, братья. Иногда мне кажется, ... 2016-04-01 23:35:12 \n",
"7849 Сап б, сегодня вспомнил про свой аккаунт в Ori... 2016-04-02 19:24:13 \n",
"9101 Трейнсёрфер в треде. Катаю сапсаны раз в недел... 2016-04-02 13:51:34 \n",
"11383 НЕТ СИЛ НА ДОСТИЖЕНИЯ\\nСап, /b/. Хуй 26 уровня... 2016-04-02 18:14:00 \n",
"12464 сап двач. я нюхнул одну дорожку фена. больше ... 2016-04-02 19:47:11 \n",
"13167 ВкатилсяДля разговоров с Мэри и Кюри по душам\\... 2016-04-02 17:38:44 \n",
"13420 >>122169408\\nИнвентарь:\\n5 серебряных монет\\nС... 2016-04-02 19:54:12 \n",
"13426 >>122169801\\nДворф второй ботинок поставил тор... 2016-04-02 19:59:49 \n",
"13452 >>122170719\\nДворф угомонился, поймал бутылку ... 2016-04-02 20:18:07 \n",
"13990 Сап, анон.\\nСегодня прикупил себе аккаунтов.\\n... 2016-04-02 20:02:52 \n",
"14306 >>122161413\\nиз сапога резинового 2016-04-02 18:36:09 \n",
"\n",
" num number parent \n",
"517 122143012 39 122135985 \n",
"3460 122164866 1 0 \n",
"4016 122166050 43 122162369 \n",
"4469 122161114 58 122145686 \n",
"5949 122054030 127 122036249 \n",
"6524 122082655 192 122053833 \n",
"7849 122166782 1 0 \n",
"9101 122133188 302 122106787 \n",
"11383 122159945 1 0 \n",
"12464 122169166 1 0 \n",
"13167 122156392 8 122155330 \n",
"13420 122169845 261 122155330 \n",
"13426 122170393 267 122155330 \n",
"13452 122172154 293 122155330 \n",
"13990 122170690 1 0 \n",
"14306 122162105 36 122160914 "
]
},
"execution_count": 326,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"posts[posts.comment.str.contains('сап', case=False)]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment