Skip to content

Instantly share code, notes, and snippets.

@nobleknightt
Created June 16, 2024 10:59
Show Gist options
  • Select an option

  • Save nobleknightt/955d564275efdfda202a5d6ceec8085b to your computer and use it in GitHub Desktop.

Select an option

Save nobleknightt/955d564275efdfda202a5d6ceec8085b to your computer and use it in GitHub Desktop.
Download anime episodes from animepahe.ru
import argparse
from pathlib import Path
from time import sleep
import requests
from bs4 import BeautifulSoup, element
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
BASE_URL = "https://animepahe.ru"
def get_episode_urls(url: str) -> dict[str, str]:
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
anchors = soup.find_all("div", class_="theatre-settings")[0].find_all("div", class_="dropdown-menu")[1].find_all("a")
return {
anchor.contents[0]: f"{BASE_URL}{anchor.get('href')}" for anchor in anchors
}
def get_per_resolution_urls(url: str) -> dict[str, str]:
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
anchors = soup.find_all("div", class_="theatre-settings")[0].find_all("div", class_="dropdown-menu")[3].find_all("a")
def get_title(contents: list[element.NavigableString]) -> str:
title = []
for content in contents:
stripped_content = str(content).strip()
if issubclass(type(content), element.NavigableString) and stripped_content:
title.append(stripped_content)
elif issubclass(type(content), element.Tag):
title.append(str(content.contents[0]).strip())
return " ".join(title)
return {
get_title(anchor.contents): f"{anchor.get('href')}" for anchor in anchors
}
def get_actual_download_url(url: str) -> None:
options = Options()
options.add_argument("--headless")
browser = webdriver.Chrome(options=options)
browser.get(url)
# browser.implicitly_wait(30) # wait 5 seconds to load dynamic content
download_url = None
try:
sleep(5)
download_url = browser.find_element(By.TAG_NAME, "body").find_element(By.CLASS_NAME, "redirect").get_attribute("href")
except:
sleep(5)
download_url = browser.find_element(By.TAG_NAME, "body").find_element(By.CLASS_NAME, "redirect").get_attribute("href")
browser.quit()
return download_url
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--title", help="Title of the Anime", required=True)
parser.add_argument("-u", "--first-episode-url", required=True)
args = parser.parse_args()
with (Path(__file__).parent / f"{args.title}.txt").open("w") as f:
f.write(f"== {args.title} ==")
f.write("\n\n")
f.flush()
episode_urls = get_episode_urls(args.first_episode_url)
for episode, episode_url in list(episode_urls.items()):
f.write(episode)
f.write("\n")
per_resolution_urls = get_per_resolution_urls(episode_url)
for resolution, resolution_url in per_resolution_urls.items():
actual_download_url = get_actual_download_url(resolution_url)
f.write(actual_download_url)
f.write(f" [ {resolution} ]")
f.write("\n")
f.write("\n")
f.flush()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment