Skip to content

Instantly share code, notes, and snippets.

@3nws
Created December 6, 2025 17:56
Show Gist options
  • Select an option

  • Save 3nws/7f4a28d6d9e6035c0bec5c7c15a9b359 to your computer and use it in GitHub Desktop.

Select an option

Save 3nws/7f4a28d6d9e6035c0bec5c7c15a9b359 to your computer and use it in GitHub Desktop.
Generates an epub file containing all chapters of an arc on https://witchculttranslation.com/ and uploads it to dropbox.
import html5lib # type: ignore
import requests
import os
import dropbox
import re
from ebooklib import epub
from os.path import basename, join
from bs4 import BeautifulSoup
from config import app_folder, app_key, app_secret, oauth2_refresh_token
session = requests.session()
dbx = dropbox.Dropbox(
app_key=app_key, app_secret=app_secret, oauth2_refresh_token=oauth2_refresh_token
)
try:
dbx.files_get_metadata(app_folder)
except dropbox.exceptions.ApiError as e:
if type(e.error) == dropbox.files.GetMetadataError:
dbx.files_create_folder(app_folder)
arc = 9
chapters_url = f"https://witchculttranslation.com/arc-{arc}/"
def upload_to_dbox(epub_file_name):
with open(
epub_file_name,
"rb",
) as f:
dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}")
def _slugify(s: str):
s = str(s)
s = s.strip()
s = re.sub(r'[^\w\s-]', '', s, flags=re.UNICODE)
s = re.sub(r'[\s]+', '_', s)
return s or "untitled"
def create_epub(arc_title, titles, contents, images_, cover_url=None):
book = epub.EpubBook()
cover = session.get(cover_url, stream=True) if cover_url else None
chapter_slug = list(filter(None, chapters_url.split("/")))[-1]
arc_id = chapter_slug
book.set_identifier(arc_id)
book.set_title(arc_title)
book.set_language("en")
if cover_url and cover:
book.set_cover("cover.jpg", cover.content)
del cover
chapters = []
css_item = epub.EpubItem(
uid="style_nav",
file_name="styles/style.css",
media_type="text/css",
content="body { color: #000; font-family: serif; }"
)
book.add_item(css_item)
for title, content, images in zip(titles, contents, images_):
safe_title = _slugify(title)
file_name = f"{safe_title}.xhtml"
if "<html" in content.lower():
chapter_body = content
else:
chapter_body = f"""
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>{title}</title>
<link rel="stylesheet" type="text/css" href="../styles/style.css"/>
</head>
<body>
<h1>{title}</h1>
<div>{content}</div>
</body>
</html>
"""
chapter = epub.EpubHtml(title=title, file_name=file_name, lang="en")
chapter.content = chapter_body
for idx, image_path in enumerate(images):
image_basename = basename(image_path)
epub_img_path = f"images/{image_basename}"
with open(image_path, "rb") as f:
img = epub.EpubImage(
uid=f"img_{safe_title}_{idx}",
file_name=epub_img_path,
content=f.read(),
)
book.add_item(img)
try:
os.remove(image_path)
except Exception:
pass
chapter.content = chapter.content.replace(image_path, epub_img_path)
chapter.content = chapter.content.replace(image_basename, image_basename)
book.add_item(chapter)
chapters.append(chapter)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.toc = tuple(chapters)
book.spine = ["nav"] + chapters
epub_file_name = f"Re_Zero_Web_Novel_-_{arc_title}.epub".replace(":", "")
epub.write_epub(epub_file_name, book)
try:
upload_to_dbox(epub_file_name)
except dropbox.exceptions.ApiError as e:
if type(e.error) != dropbox.files.UploadError:
raise e
def get_entry_content(url):
res = session.get(url)
if res.status_code != 200:
return
page = res.content
soup = BeautifulSoup(page.decode("utf-8"), "html5lib")
return soup.select_one(".entry-content")
def get_entry_title(url):
res = session.get(url)
if res.status_code != 200:
return
page = res.content
soup = BeautifulSoup(page.decode("utf-8"), "html5lib")
return soup.select_one(".entry-title")
def main():
entry_content = get_entry_content(chapters_url)
chapters = entry_content.select("li")
img_elem = entry_content.select_one("img:first-child")
cover_url = img_elem.get("src")
entry_title = get_entry_title(chapters_url).text
chapter_titles = []
contents = []
images_ = []
for chapter in chapters:
a_tag = chapter.findChild()
chapter_url = a_tag.get("href")
if chapter_url is None:
continue
chapter_title = a_tag.text
entry_content = get_entry_content(chapter_url)
images = []
for idx, tag in enumerate(entry_content.contents[:10]):
if (tag.find("img")) and tag.find("img") != -1:
image = tag.find("img")
image_url = image.get("src")
temp = session.get(image_url, stream=True)
image_path = (
f"images/local_image_{idx}_{chapter_title.replace(' ', '')}.jpg"
)
os.makedirs(os.path.dirname(image_path), exist_ok=True)
with open(image_path, "wb") as f:
f.write(temp.content)
images.append(image_path)
image["src"] = image_path
del temp
content = "".join([str(tag) for tag in entry_content.contents])
chapter_titles.append(chapter_title)
contents.append(content)
images_.append(images)
create_epub(entry_title, chapter_titles, contents, images_, cover_url)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment