Prérequis:
- Je pars du principe que
brewest déjà installé (sinon suivre leurs instructions) - Avoir copié le fichier quelque part sur sa machine
Ouvrir un terminal, puis:
brew install uv
chmod +x scrap-mairie.py
./scrap-mairie.py
| #!/usr/bin/env -S uv run | |
| # Needs "uv" to be run. chmod +x and then ./scrap-mairies.py | |
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "requests", "bs4" | |
| # ] | |
| # /// | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import time | |
| import csv | |
| def get_email_from_page(url): | |
| print(f"Getting email from {url}...") | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| email_link = soup.select_one("a.send-mail") | |
| return email_link["href"].replace("mailto:", "") if email_link else None | |
| def scrape_mairies(): | |
| base_url = "https://lannuaire.service-public.fr/navigation/mairie" | |
| params = {"where": "Ille Et Vilaine 35", "page": 1} | |
| results = [] | |
| while True: | |
| print(f"Scraping page {params['page']}...") | |
| response = requests.get(base_url, params=params) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| # Extract mairie links | |
| mairie_links = soup.select("div.sp-link--label a.fr-link") | |
| for link in mairie_links: | |
| mairie_name = link.text.strip() | |
| mairie_url = link["href"] | |
| email = get_email_from_page(mairie_url) | |
| results.append({"name": mairie_name, "url": mairie_url, "email": email}) | |
| time.sleep(1) # Be polite, wait a second between requests | |
| # Check for next page | |
| next_link = soup.select_one("a.fr-pagination__link--next") | |
| if next_link: | |
| params["page"] += 1 | |
| else: | |
| break | |
| return results | |
| def save_to_csv(data, filename="mairies_emails.csv"): | |
| with open(filename, "w", newline="", encoding="utf-8") as csvfile: | |
| fieldnames = ["name", "url", "email"] | |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
| writer.writeheader() | |
| for row in data: | |
| writer.writerow(row) | |
| if __name__ == "__main__": | |
| mairies_data = scrape_mairies() | |
| save_to_csv(mairies_data) | |
| print(f"Scraping completed. Data saved to mairies_emails.csv") |