Skip to content

Instantly share code, notes, and snippets.

@foxy4096
Created June 8, 2021 07:51
Show Gist options
  • Select an option

  • Save foxy4096/41909f56d3366bc93a638efcf78bde99 to your computer and use it in GitHub Desktop.

Select an option

Save foxy4096/41909f56d3366bc93a638efcf78bde99 to your computer and use it in GitHub Desktop.
from os import name
import requests
from requests import get
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
header = {"Accept-Language": "en-us, en;q=0.5"}
url = "https://www.imdb.com/search/title/?groups=top_1000&ref_=adv_prv"
results = requests.get(url, headers=header)
soup = BeautifulSoup(results.text, "html.parser")
titles = []
years = []
time = []
ratings = []
scores = []
votes = []
gross = []
genres = []
movie_div = soup.find_all('div', class_="lister-item mode-advanced")
for container in movie_div:
name = container.h3.a.text
titles.append(name)
year = container.h3.find('span', class_="lister-item-year").text
years.append(year)
runtime = container.p.find('span', class_='runtime').text if container.p.find('span', class_='runtime').text else '-'
imdb = float(container.strong.text)
ratings.append(imdb)
#metascore
m_score = container.find('span', class_='metascore').text if container.find('span', class_='metascore') else '-'
scores.append(m_score)
#there are two NV containers, grab both of them as they hold both the votes and the grosses
nv = container.find_all('span', attrs={'name': 'nv'})
genre = container.p.find('span', class_="genre").text
genres.append(genre)
#filter nv for votes
vote = nv[0].text
votes.append(vote)
#filter nv for gross
grosses = nv[1].text if len(nv) > 1 else '-'
gross.append(grosses)
movies = pd.DataFrame({
'movie': titles,
'year': years,
# 'genre': genres,
'timeMin': runtime,
'imdb': ratings,
'metascore': scores,
'votes': votes,
'us_grossMillions': gross,
})
print(movies)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment