Skip to content

Instantly share code, notes, and snippets.

@mediaczar
Last active November 9, 2018 10:47
Show Gist options
  • Select an option

  • Save mediaczar/ba92b43878e1984b7614c1770e450086 to your computer and use it in GitHub Desktop.

Select an option

Save mediaczar/ba92b43878e1984b7614c1770e450086 to your computer and use it in GitHub Desktop.
#!/usr/bin/env/ python
import requests
from bs4 import BeautifulSoup
def collect(year):
url = "https://www.interbrand.com/best-brands/best-global-brands/%s/ranking" % str(year)
html = requests.get(url)
page = BeautifulSoup(html.text, features="html5lib")
return page
def scrape(year, page, attributes):
content = page.find_all("li", class_="brand-item")
for i in content:
scraped = {}
for attr in attributes:
try:
scraped[attr] = i.find("div", class_=attr).text.strip()
except:
scraped[attr] = ""
output = ('\t'.join([x.encode('utf-8') for x in scraped.values()]))
print output
attributes = ("brand-rank", "brand-name", "brand-region", "brand-country", "brand-sector", "brand-value", "brand-value-change")
for y in range(2000, 2019):
p = collect(y)
scrape(y, p, attributes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment