Created
June 16, 2024 10:59
-
-
Save nobleknightt/955d564275efdfda202a5d6ceec8085b to your computer and use it in GitHub Desktop.
Download anime episodes from animepahe.ru
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| from pathlib import Path | |
| from time import sleep | |
| import requests | |
| from bs4 import BeautifulSoup, element | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.common.by import By | |
| BASE_URL = "https://animepahe.ru" | |
| def get_episode_urls(url: str) -> dict[str, str]: | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| anchors = soup.find_all("div", class_="theatre-settings")[0].find_all("div", class_="dropdown-menu")[1].find_all("a") | |
| return { | |
| anchor.contents[0]: f"{BASE_URL}{anchor.get('href')}" for anchor in anchors | |
| } | |
| def get_per_resolution_urls(url: str) -> dict[str, str]: | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| anchors = soup.find_all("div", class_="theatre-settings")[0].find_all("div", class_="dropdown-menu")[3].find_all("a") | |
| def get_title(contents: list[element.NavigableString]) -> str: | |
| title = [] | |
| for content in contents: | |
| stripped_content = str(content).strip() | |
| if issubclass(type(content), element.NavigableString) and stripped_content: | |
| title.append(stripped_content) | |
| elif issubclass(type(content), element.Tag): | |
| title.append(str(content.contents[0]).strip()) | |
| return " ".join(title) | |
| return { | |
| get_title(anchor.contents): f"{anchor.get('href')}" for anchor in anchors | |
| } | |
| def get_actual_download_url(url: str) -> None: | |
| options = Options() | |
| options.add_argument("--headless") | |
| browser = webdriver.Chrome(options=options) | |
| browser.get(url) | |
| # browser.implicitly_wait(30) # wait 5 seconds to load dynamic content | |
| download_url = None | |
| try: | |
| sleep(5) | |
| download_url = browser.find_element(By.TAG_NAME, "body").find_element(By.CLASS_NAME, "redirect").get_attribute("href") | |
| except: | |
| sleep(5) | |
| download_url = browser.find_element(By.TAG_NAME, "body").find_element(By.CLASS_NAME, "redirect").get_attribute("href") | |
| browser.quit() | |
| return download_url | |
| def main() -> None: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("-t", "--title", help="Title of the Anime", required=True) | |
| parser.add_argument("-u", "--first-episode-url", required=True) | |
| args = parser.parse_args() | |
| with (Path(__file__).parent / f"{args.title}.txt").open("w") as f: | |
| f.write(f"== {args.title} ==") | |
| f.write("\n\n") | |
| f.flush() | |
| episode_urls = get_episode_urls(args.first_episode_url) | |
| for episode, episode_url in list(episode_urls.items()): | |
| f.write(episode) | |
| f.write("\n") | |
| per_resolution_urls = get_per_resolution_urls(episode_url) | |
| for resolution, resolution_url in per_resolution_urls.items(): | |
| actual_download_url = get_actual_download_url(resolution_url) | |
| f.write(actual_download_url) | |
| f.write(f" [ {resolution} ]") | |
| f.write("\n") | |
| f.write("\n") | |
| f.flush() | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment