Skip to content

Instantly share code, notes, and snippets.

@popunbom
Created July 30, 2019 17:34
Show Gist options
  • Select an option

  • Save popunbom/14efecea47fc20c80b925ecdd37116f7 to your computer and use it in GitHub Desktop.

Select an option

Save popunbom/14efecea47fc20c80b925ecdd37116f7 to your computer and use it in GitHub Desktop.
[Python] Porting Script: Growi <--> esa.io
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymongo as mongo
import re
import json
import requests as r
from pprint import pprint
import time
#################################
######## MongoDB(Growi) #########
#################################
# DB_NAME: Growi のデータベース名
DB_NAME = "crowi"
# USING_COLLECTIONS: 使用するコレクション名一覧
USING_COLLECTIONS = {
"pages", "comments", "revisions", "users"
}
PIPELINES = [
{
"$lookup": {
"from": "comments",
"localField": "_id",
"foreignField": "page",
"as": "comments"
}
}, {
"$unwind": {
"path": "$comments",
"preserveNullAndEmptyArrays": True
}
}, {
"$lookup": {
"from": "users",
"localField": "comments.creator",
"foreignField": "_id",
"as": "comments.creator"
}
}, {
"$unwind": {
"path": "$comments.creator",
"preserveNullAndEmptyArrays": True
}
}, {
"$project": {
"creator": "$creator",
"revision": "$revision",
"comments": {
"creator": "$creator.name",
# "creator": {
# "$concat": [
# "$creator.name", " <", "$creator.email", ">"
# ]
# },
"createdAt": "$comments.createdAt",
"body": "$comments.comment"
}
}
}, {
"$group": {
"_id": "$_id",
"creator": {
"$first": "$creator"
},
"revision": {
"$first": "$revision"
},
"comments": {
"$push": "$comments"
}
}
}, {
"$lookup": {
"from": "revisions",
"localField": "revision",
"foreignField": "_id",
"as": "content"
}
}, {
"$unwind": {
"path": "$content"
}
}, {
"$lookup": {
"from": "users",
"localField": "creator",
"foreignField": "_id",
"as": "creator"
}
}, {
"$unwind": {
"path": "$creator"
}
}, {
"$project": {
"creator": "$creator.name",
# "creator": {
# "$concat": [
# "$creator.name", " <", "$creator.email", ">"
# ]
# },
"path": "$content.path",
"body": "$content.body",
"comments": "$comments"
}
}, {
'$match': {
'path': {
'$not': re.compile(r"user")
}
}
}
]
#################################
############ esa.io #############
#################################
# API_URL: esa.io の API URL
API_URL = "https://api.esa.io/v1"
# LIMITATION: 一定時間内におけるアクセス数制限
LIMITATION = {
"MAX_REQUESTS_IN_TIME": 75,
"MINUTES_INTERVAL": 15
}
SEC_OFFSET = 10
# TEAM_NAME: esa.io のチーム名
TEAM_NAME = "saji-lab"
# ACCESS_TOKEN: esa.io へのアクセストークン
ACCESS_TOKEN = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
# MEMBER_MAP: Growi <-> esa.io 間のメンバーマッピング
MEMBER_MAP = {
# Growi(page->creator->username) <-> esa.io(screen_name)
'Fumiya ENDOU': "fendou",
'Sana Katsumi': "katsumi",
'Sanosuke Kato': "skato",
'Saori Oda': "oda_saori",
'So Takeuchi': "stakeuchi",
'amemiya': "amemiya",
'spdbear': "spdbear",
# 'Shun Nakayama': "---",
# 'Yuki Suganuma': "---",
# 'ikumi iwashita': "---",
}
# DEFAULT_AUTHOR: MEMBER_MAP 上にいないユーザーの
# 投稿内容を代替するユーザー名
# (esa.io 上の screen_name)
DEFAULT_AUTHOR = "fendou"
client = mongo.MongoClient("localhost", 27017)
if DB_NAME not in client.list_database_names():
raise Exception("Cannot find the database: " + DB_NAME)
db = client[DB_NAME]
if not (USING_COLLECTIONS <= set(db.list_collection_names())):
raise Exception(
"Cannot find all of the collections: " + str(USING_COLLECTIONS)
)
pages = db["pages"].aggregate(PIPELINES)
posts = list()
for page in pages:
category, title = re.match(r"(.*)/(.*)", page["path"]).groups()
if not title:
title = "TopPage(Growi)"
elif not category:
category = title
title = "README"
tags = page["path"].lstrip("/").split("/")[:-1]
user = MEMBER_MAP.setdefault(page["creator"], DEFAULT_AUTHOR)
post_data = {
"post": {
"name": title,
"body_md": page["body"],
"tags": tags,
"category": category,
"wip": False,
"message": "Port from Growi",
"user": user,
}
}
if "C++" not in category:
posts.append(post_data)
sec_interval = ((LIMITATION["MINUTES_INTERVAL"] * 60) + SEC_OFFSET) / \
(LIMITATION["MAX_REQUESTS_IN_TIME"])
for post_data in posts[60:]:
time_before = time.time()
res = r.post(
f"{API_URL}/teams/{TEAM_NAME}/posts",
headers={"Authorization": f"Bearer {ACCESS_TOKEN}"},
json=post_data
)
sec_sleep = sec_interval - (time.time() - time_before)
print(
"{resp} -- {categ}/{title}".format(
categ=post_data["post"]["category"],
title=post_data["post"]["name"],
resp=str(res)
)
)
print("Wait for {0} secs to avoid access limitation ... ".format(
sec_sleep), flush=True)
time.sleep(sec_sleep)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment