Created
September 10, 2025 03:48
-
-
Save writer0713/a79682bb3ae2b749ca3b15926b814be8 to your computer and use it in GitHub Desktop.
nomad_assignment10.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| from bs4 import BeautifulSoup | |
| def scrape_job_information(url): | |
| result = [] | |
| response = requests.get( | |
| url, | |
| headers={"User-Agent": "Mozilla/5.0"}, | |
| ) | |
| if response.status_code == 404: | |
| return None | |
| bs = BeautifulSoup(response.content, "html.parser") | |
| ul_tag = bs.find("ul", class_="jobs-list-items") | |
| if ul_tag is None: | |
| return None | |
| jobs = ul_tag.find_all("li", class_="bjs-jlid") | |
| for job in jobs: | |
| title = job.find("h4", class_="bjs-jlid__h").text | |
| company = job.find("a", class_="bjs-jlid__b").text | |
| description = job.find("div", class_="bjs-jlid__description").text | |
| link = job.find("h4", class_="bjs-jlid__h").find("a")["href"] | |
| result.append( | |
| { | |
| "title": title, | |
| "company": company, | |
| "description": description, | |
| "link": link, | |
| } | |
| ) | |
| return result | |
| result = [] | |
| skills = ["python", "typescript", "javascript", "rust"] | |
| for skill in skills: | |
| page = 1 | |
| while True: | |
| url = "" | |
| response = scrape_job_information( | |
| f"https://berlinstartupjobs.com/engineering/page/{page}/" | |
| ) | |
| if response is None: | |
| break | |
| result.extend(response) | |
| page += 1 | |
| def scrape_engineering_jobs(): | |
| result = [] | |
| page = 1 | |
| while True: | |
| response = scrape_job_information( | |
| f"https://berlinstartupjobs.com/engineering/page/{page}/" | |
| ) | |
| if response is None: | |
| break | |
| result.extend(response) | |
| page += 1 | |
| return result | |
| def scrape_specific_skill_jobs(): | |
| result = [] | |
| skills = ["python", "typescript", "javascript", "rust"] | |
| for skill in skills: | |
| response = scrape_job_information( | |
| f"https://berlinstartupjobs.com/skill-areas/{skill}/" | |
| ) | |
| result.append({skill: response}) | |
| return result | |
| if __name__ == "__main__": | |
| engineering_jobs = scrape_engineering_jobs() | |
| specific_skill_jobs = scrape_specific_skill_jobs() | |
| result = [ | |
| { | |
| "engineering_jobs": engineering_jobs, | |
| "specific_skill_jobs": specific_skill_jobs, | |
| } | |
| ] | |
| print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment