feihong · December 25, 2021 20:52 · Camouflager · Mar 29, 2024 · haoliplus · Mar 31, 2024
diff --git a/convert-epub-to-cbz.py b/convert-epub-to-cbz.py
 from pathlib import Path
 import zipfile
 import sys
 import xml.etree.ElementTree
 from typing import List

 if len(sys.argv) < 2:
  print('Please provide directory')
  sys.exit(0)
 else:
  source_dir = Path(sys.argv[1]).expanduser()

 ns = dict(opf='http://www.idpf.org/2007/opf', xhtml='http://www.w3.org/1999/xhtml')

 def get_page_paths(zf: zipfile.ZipFile):
  with zf.open('vol.opf') as fp:
    tree = xml.etree.ElementTree.parse(fp)
    for item in tree.findall(".//opf:item[@media-type='application/xhtml+xml']", ns):
      yield item.attrib['href']

 def get_image_paths(zf: zipfile.ZipFile, page_paths: List[str]):
  for page_path in page_paths:
    with zf.open(page_path) as fp:
      tree = xml.etree.ElementTree.parse(fp)
      img = tree.find('.//xhtml:img', ns)
      yield Path(img.attrib['src'][3:])  # chop off '../'

 def get_image_datas(zf: zipfile.ZipFile, image_paths: List[str]):
  counter = 1
  for image_path in image_paths:
    with zf.open(str(image_path)) as fp:
      if image_path.name.startswith('vol-'):
        name = f'p{counter:03}{image_path.suffix}'
        counter += 1
      else:
        name = image_path.name

      yield name, fp.read()

 def get_image_datas_from_epub_file(epub_file: Path):
  with zipfile.ZipFile(epub_file) as zf:
    page_paths = get_page_paths(zf)
    image_paths = get_image_paths(zf, page_paths)
    for pair in get_image_datas(zf, image_paths):
      yield pair

 def convert(epub_file: Path):
  output_file = epub_file.with_suffix('.cbz')
  with zipfile.ZipFile(output_file, 'w') as zf:
    for name, image_data in get_image_datas_from_epub_file(epub_file):
      zf.writestr(name, image_data, compress_type=zipfile.ZIP_STORED)
  print(f'Generated {output_file}')

 for epub_file in source_dir.glob('*.epub'):
  convert(epub_file)
	from pathlib import Path
	import zipfile
	import sys
	import xml.etree.ElementTree
	from typing import List

	if len(sys.argv) < 2:
	print('Please provide directory')
	sys.exit(0)
	else:
	source_dir = Path(sys.argv[1]).expanduser()

	ns = dict(opf='http://www.idpf.org/2007/opf', xhtml='http://www.w3.org/1999/xhtml')

	def get_page_paths(zf: zipfile.ZipFile):
	with zf.open('vol.opf') as fp:
	tree = xml.etree.ElementTree.parse(fp)
	for item in tree.findall(".//opf:item[@media-type='application/xhtml+xml']", ns):
	yield item.attrib['href']

	def get_image_paths(zf: zipfile.ZipFile, page_paths: List[str]):
	for page_path in page_paths:
	with zf.open(page_path) as fp:
	tree = xml.etree.ElementTree.parse(fp)
	img = tree.find('.//xhtml:img', ns)
	yield Path(img.attrib['src'][3:]) # chop off '../'

	def get_image_datas(zf: zipfile.ZipFile, image_paths: List[str]):
	counter = 1
	for image_path in image_paths:
	with zf.open(str(image_path)) as fp:
	if image_path.name.startswith('vol-'):
	name = f'p{counter:03}{image_path.suffix}'
	counter += 1
	else:
	name = image_path.name

	yield name, fp.read()

	def get_image_datas_from_epub_file(epub_file: Path):
	with zipfile.ZipFile(epub_file) as zf:
	page_paths = get_page_paths(zf)
	image_paths = get_image_paths(zf, page_paths)
	for pair in get_image_datas(zf, image_paths):
	yield pair

	def convert(epub_file: Path):
	output_file = epub_file.with_suffix('.cbz')
	with zipfile.ZipFile(output_file, 'w') as zf:
	for name, image_data in get_image_datas_from_epub_file(epub_file):
	zf.writestr(name, image_data, compress_type=zipfile.ZIP_STORED)
	print(f'Generated {output_file}')

	for epub_file in source_dir.glob('*.epub'):
	convert(epub_file)
No results found