Created
October 23, 2016 22:11
-
-
Save Dr-ZeeD/be6cf5975ecf5d1e6044351c93686ba6 to your computer and use it in GitHub Desktop.
Scrapy one-file script with custom logging and click.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| import logging | |
| import click | |
| from twisted.internet import reactor | |
| from scrapy import Item, Spider | |
| from scrapy.crawler import CrawlerRunner | |
| from scrapy.settings import Settings | |
| from scrapy.utils.log import configure_logging | |
| class MyItem(Item): | |
| """The Item""" | |
| class MySpider(Spider): | |
| """The Spider""" | |
| class MyPipeline: | |
| """The Pipeline""" | |
| logger = logging.getLogger('my_pipeline') | |
| def process_items(self, item, spider): | |
| """Process the item provided by spider.""" | |
| return item | |
| def setup_loggers(loggers, log_level, logging_handlers): | |
| for logger in loggers: | |
| logger.setLevel(log_level) | |
| for logging_handler in logging_handlers: | |
| logger.addHandler(logging_handler) | |
| @click.command() | |
| @click.option('-v', '--verbose', count=True, help='Log verbose.') | |
| def main(verbose): | |
| """Run my spider.""" | |
| configure_logging(install_root_handler=False) | |
| if verbose >= 2: | |
| module_log_level = logging.DEBUG | |
| spider_log_level = logging.DEBUG | |
| pipeline_log_level = logging.DEBUG | |
| elif verbose == 1: | |
| module_log_level = logging.INFO | |
| spider_log_level = logging.INFO | |
| pipeline_log_level = logging.INFO | |
| else: | |
| module_log_level = logging.WARNING | |
| spider_log_level = logging.INFO | |
| pipeline_log_level = logging.INFO | |
| logging_handlers = [ | |
| logging.StreamHandler() | |
| ] | |
| formatter = logging.Formatter( | |
| '[%(asctime)s] [%(levelname)-8s] %(name)s - %(message)s', | |
| datefmt='%H:%M:%S') | |
| for handler in handlers: | |
| handler.setFormatter(formatter) | |
| module_loggers = [ | |
| logging.getLogger('scrapy'), | |
| logging.getLogger('twisted') | |
| ] | |
| setup_loggers(module_loggers, module_log_level, logging_handlers) | |
| spider_loggers = [ | |
| MySpider.logger | |
| ] | |
| setup_loggers(spider_loggers, spider_log_level, logging_handlers) | |
| pipeline_loggers = [ | |
| MyPipeline.logger | |
| ] | |
| setup_loggers(pipeline_loggers, pipeline_log_level, logging_handlers) | |
| settings = Settings({ | |
| 'ITEM_PIPELINES': { | |
| '__main__.MyPipeline': 1, | |
| } | |
| }) | |
| runner = CrawlerRunner(settings) | |
| runner.crawl(MySpider) | |
| d = runner.join() | |
| d.addBoth(lambda _: reactor.stop()) | |
| reactor.run() | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment