Skip to content

Instantly share code, notes, and snippets.

@ilmich
Created June 27, 2024 12:00
Show Gist options
  • Select an option

  • Save ilmich/162cc2f89899a75824f22693d13e655f to your computer and use it in GitHub Desktop.

Select an option

Save ilmich/162cc2f89899a75824f22693d13e655f to your computer and use it in GitHub Desktop.
from pyzvlab import HTMLParser
import xml.etree.ElementTree as ET
import requests
from pprint import pprint
# https://definizionealta.com/8-greys-anatomy-serie-57/
response = requests.get('https://definizionealta.com/8-greys-anatomy-serie-57/')
parser = HTMLParser()
root = parser.feed(response.text)
parser.close()
panels = root.findall(".//div[@class='stagioni']/div/div[@class='single-season']") # scansiona tutte le stagioni
for panel in panels:
id = panel.attrib['onclick']
id = id.split('\'')[1].strip()
print(id)
season = panel.find('span')
split = season.text.split(':')[1]
# itera nel dom tutti i link agli episodi corrispondenti al'id della stagione
episodes = root.findall(".//div[@id='" + id+ "']/ul//li/a[@class='episodio-link']")
for episode in episodes:
img = episode.find('img') # recupera info episodio
title = img.find("span[@class='episode-title']") # titolo episodio
print(img.attrib['src']) # immagine
print(title.text)
print("--------------")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment