Skip to content

Instantly share code, notes, and snippets.

@appendjeff
Created March 30, 2017 03:44
Show Gist options
  • Select an option

  • Save appendjeff/853d48057dba46ea6969d6475f4a61ae to your computer and use it in GitHub Desktop.

Select an option

Save appendjeff/853d48057dba46ea6969d6475f4a61ae to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup as BS
def get_puppy_ids():
puppy_ids = []
url = 'https://www.puppyspot.com/puppies/all_puppies/'
headers = {
'set-cookie': 'AWSALB=0BxiczxoEuCoEq2Z1M+8B0cUp/jUMj8LnRXmNUc6aOqGB5Hbh8rtdVnuJRdQsuJjVBAmooGEiPtsmoh7y7SldkCa1WfDVflgMkRnaIwp9jFdDqjKzOrqooyd/WGt; Expires=Thu, 06 Apr 2017 03:37:04 GMT; Path=/',
'cf-ray': '3478073bed65213e-EWR'
}
r = requests.get(url,headers=headers)
soup = BS(r.text, 'html.parser')
for link in soup.find_all('a'):
href= link.get('href')
if href:
try:
puppy_id = href.split('/')[3]
puppy_ids.append(puppy_id)
except IndexError:
pass
return puppy_ids
puppy_ids = get_puppy_ids()
print(puppy_ids)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment