Created
December 6, 2025 17:56
-
-
Save 3nws/7f4a28d6d9e6035c0bec5c7c15a9b359 to your computer and use it in GitHub Desktop.
Generates an epub file containing all chapters of an arc on https://witchculttranslation.com/ and uploads it to dropbox.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import html5lib # type: ignore | |
| import requests | |
| import os | |
| import dropbox | |
| import re | |
| from ebooklib import epub | |
| from os.path import basename, join | |
| from bs4 import BeautifulSoup | |
| from config import app_folder, app_key, app_secret, oauth2_refresh_token | |
| session = requests.session() | |
| dbx = dropbox.Dropbox( | |
| app_key=app_key, app_secret=app_secret, oauth2_refresh_token=oauth2_refresh_token | |
| ) | |
| try: | |
| dbx.files_get_metadata(app_folder) | |
| except dropbox.exceptions.ApiError as e: | |
| if type(e.error) == dropbox.files.GetMetadataError: | |
| dbx.files_create_folder(app_folder) | |
| arc = 9 | |
| chapters_url = f"https://witchculttranslation.com/arc-{arc}/" | |
| def upload_to_dbox(epub_file_name): | |
| with open( | |
| epub_file_name, | |
| "rb", | |
| ) as f: | |
| dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}") | |
| def _slugify(s: str): | |
| s = str(s) | |
| s = s.strip() | |
| s = re.sub(r'[^\w\s-]', '', s, flags=re.UNICODE) | |
| s = re.sub(r'[\s]+', '_', s) | |
| return s or "untitled" | |
| def create_epub(arc_title, titles, contents, images_, cover_url=None): | |
| book = epub.EpubBook() | |
| cover = session.get(cover_url, stream=True) if cover_url else None | |
| chapter_slug = list(filter(None, chapters_url.split("/")))[-1] | |
| arc_id = chapter_slug | |
| book.set_identifier(arc_id) | |
| book.set_title(arc_title) | |
| book.set_language("en") | |
| if cover_url and cover: | |
| book.set_cover("cover.jpg", cover.content) | |
| del cover | |
| chapters = [] | |
| css_item = epub.EpubItem( | |
| uid="style_nav", | |
| file_name="styles/style.css", | |
| media_type="text/css", | |
| content="body { color: #000; font-family: serif; }" | |
| ) | |
| book.add_item(css_item) | |
| for title, content, images in zip(titles, contents, images_): | |
| safe_title = _slugify(title) | |
| file_name = f"{safe_title}.xhtml" | |
| if "<html" in content.lower(): | |
| chapter_body = content | |
| else: | |
| chapter_body = f""" | |
| <html xmlns="http://www.w3.org/1999/xhtml"> | |
| <head> | |
| <title>{title}</title> | |
| <link rel="stylesheet" type="text/css" href="../styles/style.css"/> | |
| </head> | |
| <body> | |
| <h1>{title}</h1> | |
| <div>{content}</div> | |
| </body> | |
| </html> | |
| """ | |
| chapter = epub.EpubHtml(title=title, file_name=file_name, lang="en") | |
| chapter.content = chapter_body | |
| for idx, image_path in enumerate(images): | |
| image_basename = basename(image_path) | |
| epub_img_path = f"images/{image_basename}" | |
| with open(image_path, "rb") as f: | |
| img = epub.EpubImage( | |
| uid=f"img_{safe_title}_{idx}", | |
| file_name=epub_img_path, | |
| content=f.read(), | |
| ) | |
| book.add_item(img) | |
| try: | |
| os.remove(image_path) | |
| except Exception: | |
| pass | |
| chapter.content = chapter.content.replace(image_path, epub_img_path) | |
| chapter.content = chapter.content.replace(image_basename, image_basename) | |
| book.add_item(chapter) | |
| chapters.append(chapter) | |
| book.add_item(epub.EpubNcx()) | |
| book.add_item(epub.EpubNav()) | |
| book.toc = tuple(chapters) | |
| book.spine = ["nav"] + chapters | |
| epub_file_name = f"Re_Zero_Web_Novel_-_{arc_title}.epub".replace(":", "") | |
| epub.write_epub(epub_file_name, book) | |
| try: | |
| upload_to_dbox(epub_file_name) | |
| except dropbox.exceptions.ApiError as e: | |
| if type(e.error) != dropbox.files.UploadError: | |
| raise e | |
| def get_entry_content(url): | |
| res = session.get(url) | |
| if res.status_code != 200: | |
| return | |
| page = res.content | |
| soup = BeautifulSoup(page.decode("utf-8"), "html5lib") | |
| return soup.select_one(".entry-content") | |
| def get_entry_title(url): | |
| res = session.get(url) | |
| if res.status_code != 200: | |
| return | |
| page = res.content | |
| soup = BeautifulSoup(page.decode("utf-8"), "html5lib") | |
| return soup.select_one(".entry-title") | |
| def main(): | |
| entry_content = get_entry_content(chapters_url) | |
| chapters = entry_content.select("li") | |
| img_elem = entry_content.select_one("img:first-child") | |
| cover_url = img_elem.get("src") | |
| entry_title = get_entry_title(chapters_url).text | |
| chapter_titles = [] | |
| contents = [] | |
| images_ = [] | |
| for chapter in chapters: | |
| a_tag = chapter.findChild() | |
| chapter_url = a_tag.get("href") | |
| if chapter_url is None: | |
| continue | |
| chapter_title = a_tag.text | |
| entry_content = get_entry_content(chapter_url) | |
| images = [] | |
| for idx, tag in enumerate(entry_content.contents[:10]): | |
| if (tag.find("img")) and tag.find("img") != -1: | |
| image = tag.find("img") | |
| image_url = image.get("src") | |
| temp = session.get(image_url, stream=True) | |
| image_path = ( | |
| f"images/local_image_{idx}_{chapter_title.replace(' ', '')}.jpg" | |
| ) | |
| os.makedirs(os.path.dirname(image_path), exist_ok=True) | |
| with open(image_path, "wb") as f: | |
| f.write(temp.content) | |
| images.append(image_path) | |
| image["src"] = image_path | |
| del temp | |
| content = "".join([str(tag) for tag in entry_content.contents]) | |
| chapter_titles.append(chapter_title) | |
| contents.append(content) | |
| images_.append(images) | |
| create_epub(entry_title, chapter_titles, contents, images_, cover_url) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment