Last active
September 25, 2024 03:01
-
-
Save jkerhin/5dfc764409e8cb10abdee4bd4d4906dc to your computer and use it in GitHub Desktop.
Script for fetching mission data from all RocketLab missions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Simple script to grab all the RocketLab mission info | |
| There is plenty of room for improvement, but this works. | |
| Requirements defined below using inline script metadata (formerly PEP-723), and the file | |
| can be run standalone with `pipx`, `uv run`, etc. | |
| """ | |
| # /// script | |
| # requires-python = ">=3.9" | |
| # dependencies = [ | |
| # "requests<3", | |
| # "parsel", | |
| # ] | |
| # /// | |
| import json | |
| from typing import Dict, Iterable, Optional | |
| import requests | |
| from parsel.selector import Selector | |
| URL_ROOT = "https://www.rocketlabusa.com" | |
| OUT_FILE = "mission_info.json" | |
| def get_mission_urls(ses: Optional[requests.Session] = None) -> Iterable[str]: | |
| """Fetch the list of RocketLab completed missions""" | |
| if ses is None: | |
| ses = requests.Session() | |
| r_missions = ses.get(f"{URL_ROOT}/missions/missions-launched/") | |
| r_missions.raise_for_status() | |
| selector = Selector(text=r_missions.text) | |
| # There's only one table so don't need anything complicated | |
| links = selector.xpath( | |
| '//table//a[contains(@href, "missions-launched")]/@href' | |
| ).getall() | |
| uniq_links = set(links) | |
| return {URL_ROOT + link for link in uniq_links} | |
| def get_mission_data( | |
| url: str, ses: Optional[requests.Session] = None | |
| ) -> Dict[str, str]: | |
| """Access the mission page, extract mission data from the infobox""" | |
| if ses is None: | |
| ses = requests.Session() | |
| r_msn = ses.get(url=url) | |
| r_msn.raise_for_status() | |
| selector = Selector(text=r_msn.text) | |
| data = {} | |
| for item in selector.xpath('//div[contains(@class, "details-item")]'): | |
| key = item.css("span::text").get().rstrip(":") | |
| # Need to select the data _after_ the <span>, hence getall()[-1] | |
| val = item.css("div::text").getall()[-1].strip() | |
| data[key] = val | |
| return data | |
| def main(): | |
| ses = requests.Session() | |
| mission_urls = get_mission_urls(ses=ses) | |
| mission_data = [] | |
| for url in mission_urls: | |
| try: | |
| mission_data.append(get_mission_data(url=url, ses=ses)) | |
| except Exception as err: | |
| print(f"Failed to get mission data for {url}: {err}") | |
| print(f"Writing mission data to {OUT_FILE}") | |
| with open(OUT_FILE, "w") as hdl: | |
| json.dump(mission_data, hdl, indent=2) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment