nobleknightt · June 16, 2024 10:59
diff --git a/extract_download_urls.py b/extract_download_urls.py
 import argparse

 from pathlib import Path
 from time import sleep

 import requests

 from bs4 import BeautifulSoup, element
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.by import By

 BASE_URL = "https://animepahe.ru"


 def get_episode_urls(url: str) -> dict[str, str]:

    response = requests.get(url)

    soup = BeautifulSoup(response.text, "html.parser")
    anchors = soup.find_all("div", class_="theatre-settings")[0].find_all("div", class_="dropdown-menu")[1].find_all("a")

    return {
        anchor.contents[0]: f"{BASE_URL}{anchor.get('href')}" for anchor in anchors
    }


 def get_per_resolution_urls(url: str) -> dict[str, str]:

    response = requests.get(url)

    soup = BeautifulSoup(response.text, "html.parser")
    anchors = soup.find_all("div", class_="theatre-settings")[0].find_all("div", class_="dropdown-menu")[3].find_all("a")

    def get_title(contents: list[element.NavigableString]) -> str:

        title = []
        for content in contents:
            stripped_content = str(content).strip()
            if issubclass(type(content), element.NavigableString) and stripped_content:
                title.append(stripped_content)
            elif issubclass(type(content), element.Tag):
                title.append(str(content.contents[0]).strip())

        return " ".join(title)    

    return {
        get_title(anchor.contents): f"{anchor.get('href')}" for anchor in anchors
    }


 def get_actual_download_url(url: str) -> None:

    options = Options()
    options.add_argument("--headless")

    browser = webdriver.Chrome(options=options)
    browser.get(url)
    # browser.implicitly_wait(30) # wait 5 seconds to load dynamic content
    
    download_url = None

    try:
        sleep(5)
        download_url = browser.find_element(By.TAG_NAME, "body").find_element(By.CLASS_NAME, "redirect").get_attribute("href")
    except:
        sleep(5)
        download_url = browser.find_element(By.TAG_NAME, "body").find_element(By.CLASS_NAME, "redirect").get_attribute("href")

    browser.quit()

    return download_url


 def main() -> None:

    parser = argparse.ArgumentParser()
    parser.add_argument("-t", "--title", help="Title of the Anime", required=True)
    parser.add_argument("-u", "--first-episode-url", required=True)

    args = parser.parse_args()

    with (Path(__file__).parent / f"{args.title}.txt").open("w") as f:
        
        f.write(f"== {args.title} ==")
        f.write("\n\n")
        f.flush()

        episode_urls = get_episode_urls(args.first_episode_url)

        for episode, episode_url in list(episode_urls.items()):
            f.write(episode)
            f.write("\n") 
        
            per_resolution_urls = get_per_resolution_urls(episode_url)
        
            for resolution, resolution_url in per_resolution_urls.items():
                actual_download_url = get_actual_download_url(resolution_url)
                f.write(actual_download_url)
                f.write(f" [ {resolution} ]")
                f.write("\n")
        
            f.write("\n")
            f.flush()

 if __name__ == "__main__":
     
    main()
	import argparse

	from pathlib import Path
	from time import sleep

	import requests

	from bs4 import BeautifulSoup, element
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.common.by import By

	BASE_URL = "https://animepahe.ru"


	def get_episode_urls(url: str) -> dict[str, str]:

	response = requests.get(url)

	soup = BeautifulSoup(response.text, "html.parser")
	anchors = soup.find_all("div", class_="theatre-settings")[0].find_all("div", class_="dropdown-menu")[1].find_all("a")

	return {
	anchor.contents[0]: f"{BASE_URL}{anchor.get('href')}" for anchor in anchors
	}


	def get_per_resolution_urls(url: str) -> dict[str, str]:

	response = requests.get(url)

	soup = BeautifulSoup(response.text, "html.parser")
	anchors = soup.find_all("div", class_="theatre-settings")[0].find_all("div", class_="dropdown-menu")[3].find_all("a")

	def get_title(contents: list[element.NavigableString]) -> str:

	title = []
	for content in contents:
	stripped_content = str(content).strip()
	if issubclass(type(content), element.NavigableString) and stripped_content:
	title.append(stripped_content)
	elif issubclass(type(content), element.Tag):
	title.append(str(content.contents[0]).strip())

	return " ".join(title)

	return {
	get_title(anchor.contents): f"{anchor.get('href')}" for anchor in anchors
	}


	def get_actual_download_url(url: str) -> None:

	options = Options()
	options.add_argument("--headless")

	browser = webdriver.Chrome(options=options)
	browser.get(url)
	# browser.implicitly_wait(30) # wait 5 seconds to load dynamic content

	download_url = None

	try:
	sleep(5)
	download_url = browser.find_element(By.TAG_NAME, "body").find_element(By.CLASS_NAME, "redirect").get_attribute("href")
	except:
	sleep(5)
	download_url = browser.find_element(By.TAG_NAME, "body").find_element(By.CLASS_NAME, "redirect").get_attribute("href")

	browser.quit()

	return download_url


	def main() -> None:

	parser = argparse.ArgumentParser()
	parser.add_argument("-t", "--title", help="Title of the Anime", required=True)
	parser.add_argument("-u", "--first-episode-url", required=True)

	args = parser.parse_args()

	with (Path(__file__).parent / f"{args.title}.txt").open("w") as f:

	f.write(f"== {args.title} ==")
	f.write("\n\n")
	f.flush()

	episode_urls = get_episode_urls(args.first_episode_url)

	for episode, episode_url in list(episode_urls.items()):
	f.write(episode)
	f.write("\n")

	per_resolution_urls = get_per_resolution_urls(episode_url)

	for resolution, resolution_url in per_resolution_urls.items():
	actual_download_url = get_actual_download_url(resolution_url)
	f.write(actual_download_url)
	f.write(f" [ {resolution} ]")
	f.write("\n")

	f.write("\n")
	f.flush()

	if __name__ == "__main__":

	main()
No results found