Created
July 30, 2019 17:34
-
-
Save popunbom/14efecea47fc20c80b925ecdd37116f7 to your computer and use it in GitHub Desktop.
[Python] Porting Script: Growi <--> esa.io
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| import pymongo as mongo | |
| import re | |
| import json | |
| import requests as r | |
| from pprint import pprint | |
| import time | |
| ################################# | |
| ######## MongoDB(Growi) ######### | |
| ################################# | |
| # DB_NAME: Growi のデータベース名 | |
| DB_NAME = "crowi" | |
| # USING_COLLECTIONS: 使用するコレクション名一覧 | |
| USING_COLLECTIONS = { | |
| "pages", "comments", "revisions", "users" | |
| } | |
| PIPELINES = [ | |
| { | |
| "$lookup": { | |
| "from": "comments", | |
| "localField": "_id", | |
| "foreignField": "page", | |
| "as": "comments" | |
| } | |
| }, { | |
| "$unwind": { | |
| "path": "$comments", | |
| "preserveNullAndEmptyArrays": True | |
| } | |
| }, { | |
| "$lookup": { | |
| "from": "users", | |
| "localField": "comments.creator", | |
| "foreignField": "_id", | |
| "as": "comments.creator" | |
| } | |
| }, { | |
| "$unwind": { | |
| "path": "$comments.creator", | |
| "preserveNullAndEmptyArrays": True | |
| } | |
| }, { | |
| "$project": { | |
| "creator": "$creator", | |
| "revision": "$revision", | |
| "comments": { | |
| "creator": "$creator.name", | |
| # "creator": { | |
| # "$concat": [ | |
| # "$creator.name", " <", "$creator.email", ">" | |
| # ] | |
| # }, | |
| "createdAt": "$comments.createdAt", | |
| "body": "$comments.comment" | |
| } | |
| } | |
| }, { | |
| "$group": { | |
| "_id": "$_id", | |
| "creator": { | |
| "$first": "$creator" | |
| }, | |
| "revision": { | |
| "$first": "$revision" | |
| }, | |
| "comments": { | |
| "$push": "$comments" | |
| } | |
| } | |
| }, { | |
| "$lookup": { | |
| "from": "revisions", | |
| "localField": "revision", | |
| "foreignField": "_id", | |
| "as": "content" | |
| } | |
| }, { | |
| "$unwind": { | |
| "path": "$content" | |
| } | |
| }, { | |
| "$lookup": { | |
| "from": "users", | |
| "localField": "creator", | |
| "foreignField": "_id", | |
| "as": "creator" | |
| } | |
| }, { | |
| "$unwind": { | |
| "path": "$creator" | |
| } | |
| }, { | |
| "$project": { | |
| "creator": "$creator.name", | |
| # "creator": { | |
| # "$concat": [ | |
| # "$creator.name", " <", "$creator.email", ">" | |
| # ] | |
| # }, | |
| "path": "$content.path", | |
| "body": "$content.body", | |
| "comments": "$comments" | |
| } | |
| }, { | |
| '$match': { | |
| 'path': { | |
| '$not': re.compile(r"user") | |
| } | |
| } | |
| } | |
| ] | |
| ################################# | |
| ############ esa.io ############# | |
| ################################# | |
| # API_URL: esa.io の API URL | |
| API_URL = "https://api.esa.io/v1" | |
| # LIMITATION: 一定時間内におけるアクセス数制限 | |
| LIMITATION = { | |
| "MAX_REQUESTS_IN_TIME": 75, | |
| "MINUTES_INTERVAL": 15 | |
| } | |
| SEC_OFFSET = 10 | |
| # TEAM_NAME: esa.io のチーム名 | |
| TEAM_NAME = "saji-lab" | |
| # ACCESS_TOKEN: esa.io へのアクセストークン | |
| ACCESS_TOKEN = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" | |
| # MEMBER_MAP: Growi <-> esa.io 間のメンバーマッピング | |
| MEMBER_MAP = { | |
| # Growi(page->creator->username) <-> esa.io(screen_name) | |
| 'Fumiya ENDOU': "fendou", | |
| 'Sana Katsumi': "katsumi", | |
| 'Sanosuke Kato': "skato", | |
| 'Saori Oda': "oda_saori", | |
| 'So Takeuchi': "stakeuchi", | |
| 'amemiya': "amemiya", | |
| 'spdbear': "spdbear", | |
| # 'Shun Nakayama': "---", | |
| # 'Yuki Suganuma': "---", | |
| # 'ikumi iwashita': "---", | |
| } | |
| # DEFAULT_AUTHOR: MEMBER_MAP 上にいないユーザーの | |
| # 投稿内容を代替するユーザー名 | |
| # (esa.io 上の screen_name) | |
| DEFAULT_AUTHOR = "fendou" | |
| client = mongo.MongoClient("localhost", 27017) | |
| if DB_NAME not in client.list_database_names(): | |
| raise Exception("Cannot find the database: " + DB_NAME) | |
| db = client[DB_NAME] | |
| if not (USING_COLLECTIONS <= set(db.list_collection_names())): | |
| raise Exception( | |
| "Cannot find all of the collections: " + str(USING_COLLECTIONS) | |
| ) | |
| pages = db["pages"].aggregate(PIPELINES) | |
| posts = list() | |
| for page in pages: | |
| category, title = re.match(r"(.*)/(.*)", page["path"]).groups() | |
| if not title: | |
| title = "TopPage(Growi)" | |
| elif not category: | |
| category = title | |
| title = "README" | |
| tags = page["path"].lstrip("/").split("/")[:-1] | |
| user = MEMBER_MAP.setdefault(page["creator"], DEFAULT_AUTHOR) | |
| post_data = { | |
| "post": { | |
| "name": title, | |
| "body_md": page["body"], | |
| "tags": tags, | |
| "category": category, | |
| "wip": False, | |
| "message": "Port from Growi", | |
| "user": user, | |
| } | |
| } | |
| if "C++" not in category: | |
| posts.append(post_data) | |
| sec_interval = ((LIMITATION["MINUTES_INTERVAL"] * 60) + SEC_OFFSET) / \ | |
| (LIMITATION["MAX_REQUESTS_IN_TIME"]) | |
| for post_data in posts[60:]: | |
| time_before = time.time() | |
| res = r.post( | |
| f"{API_URL}/teams/{TEAM_NAME}/posts", | |
| headers={"Authorization": f"Bearer {ACCESS_TOKEN}"}, | |
| json=post_data | |
| ) | |
| sec_sleep = sec_interval - (time.time() - time_before) | |
| print( | |
| "{resp} -- {categ}/{title}".format( | |
| categ=post_data["post"]["category"], | |
| title=post_data["post"]["name"], | |
| resp=str(res) | |
| ) | |
| ) | |
| print("Wait for {0} secs to avoid access limitation ... ".format( | |
| sec_sleep), flush=True) | |
| time.sleep(sec_sleep) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment