$ python3 -m venv scraping
$ . venv/bin/activate
(venv) $ pip install scrapy
(venv) $ scrapy runspider tablespider.py
...
{'会社名': '○○株式会社', '住所': '東京都新宿区'}
...
| import scrapy | |
| class TableSpider(scrapy.Spider): | |
| name = 'table' | |
| start_urls = [ | |
| 'https://gist.github.com/orangain/55f67efccdbb394f714471b08d4c33eb/raw/29eb49720d46e327a9d7e4dd6fac6aee1ae4b3da/test.html', | |
| ] | |
| def parse(self, response): | |
| table_contents = {} | |
| for tr in response.css('table tr'): | |
| key = tr.css('th::text').extract_first().strip() | |
| value = tr.css('td::text').extract_first().strip() | |
| table_contents[key] = value | |
| print(table_contents) |
| <table> | |
| <tr> | |
| <th>会社名</th> | |
| <td>○○株式会社</td> | |
| </tr> | |
| <tr> | |
| <th>住所</th> | |
| <td>東京都新宿区 <a href="#">Google マップで見る</a> | |
| </tr> | |
| </table> |