Skip to content

Instantly share code, notes, and snippets.

@jkerhin
Last active September 25, 2024 03:01
Show Gist options
  • Select an option

  • Save jkerhin/5dfc764409e8cb10abdee4bd4d4906dc to your computer and use it in GitHub Desktop.

Select an option

Save jkerhin/5dfc764409e8cb10abdee4bd4d4906dc to your computer and use it in GitHub Desktop.
Script for fetching mission data from all RocketLab missions
"""Simple script to grab all the RocketLab mission info
There is plenty of room for improvement, but this works.
Requirements defined below using inline script metadata (formerly PEP-723), and the file
can be run standalone with `pipx`, `uv run`, etc.
"""
# /// script
# requires-python = ">=3.9"
# dependencies = [
# "requests<3",
# "parsel",
# ]
# ///
import json
from typing import Dict, Iterable, Optional
import requests
from parsel.selector import Selector
URL_ROOT = "https://www.rocketlabusa.com"
OUT_FILE = "mission_info.json"
def get_mission_urls(ses: Optional[requests.Session] = None) -> Iterable[str]:
"""Fetch the list of RocketLab completed missions"""
if ses is None:
ses = requests.Session()
r_missions = ses.get(f"{URL_ROOT}/missions/missions-launched/")
r_missions.raise_for_status()
selector = Selector(text=r_missions.text)
# There's only one table so don't need anything complicated
links = selector.xpath(
'//table//a[contains(@href, "missions-launched")]/@href'
).getall()
uniq_links = set(links)
return {URL_ROOT + link for link in uniq_links}
def get_mission_data(
url: str, ses: Optional[requests.Session] = None
) -> Dict[str, str]:
"""Access the mission page, extract mission data from the infobox"""
if ses is None:
ses = requests.Session()
r_msn = ses.get(url=url)
r_msn.raise_for_status()
selector = Selector(text=r_msn.text)
data = {}
for item in selector.xpath('//div[contains(@class, "details-item")]'):
key = item.css("span::text").get().rstrip(":")
# Need to select the data _after_ the <span>, hence getall()[-1]
val = item.css("div::text").getall()[-1].strip()
data[key] = val
return data
def main():
ses = requests.Session()
mission_urls = get_mission_urls(ses=ses)
mission_data = []
for url in mission_urls:
try:
mission_data.append(get_mission_data(url=url, ses=ses))
except Exception as err:
print(f"Failed to get mission data for {url}: {err}")
print(f"Writing mission data to {OUT_FILE}")
with open(OUT_FILE, "w") as hdl:
json.dump(mission_data, hdl, indent=2)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment