jkerhin · September 25, 2024 03:01
diff --git a/rocketlab_mission_info.py b/rocketlab_mission_info.py
 """Simple script to grab all the RocketLab mission info

 There is plenty of room for improvement, but this works.

 Requirements defined below using inline script metadata (formerly PEP-723), and the file
 can be run standalone with `pipx`, `uv run`, etc.
 """
 # /// script
 # requires-python = ">=3.9"
 # dependencies = [
 #   "requests<3",
 #   "parsel",
 # ]
 # ///

 import json
 from typing import Dict, Iterable, Optional

 import requests
 from parsel.selector import Selector

 URL_ROOT = "https://www.rocketlabusa.com"
 OUT_FILE = "mission_info.json"


 def get_mission_urls(ses: Optional[requests.Session] = None) -> Iterable[str]:
    """Fetch the list of RocketLab completed missions"""
    if ses is None:
        ses = requests.Session()
    r_missions = ses.get(f"{URL_ROOT}/missions/missions-launched/")
    r_missions.raise_for_status()
    selector = Selector(text=r_missions.text)
    # There's only one table so don't need anything complicated
    links = selector.xpath(
        '//table//a[contains(@href, "missions-launched")]/@href'
    ).getall()
    uniq_links = set(links)
    return {URL_ROOT + link for link in uniq_links}


 def get_mission_data(
    url: str, ses: Optional[requests.Session] = None
 ) -> Dict[str, str]:
    """Access the mission page, extract mission data from the infobox"""
    if ses is None:
        ses = requests.Session()
    r_msn = ses.get(url=url)
    r_msn.raise_for_status()
    selector = Selector(text=r_msn.text)

    data = {}
    for item in selector.xpath('//div[contains(@class, "details-item")]'):
        key = item.css("span::text").get().rstrip(":")
        # Need to select the data _after_ the <span>, hence getall()[-1]
        val = item.css("div::text").getall()[-1].strip()
        data[key] = val

    return data


 def main():
    ses = requests.Session()

    mission_urls = get_mission_urls(ses=ses)

    mission_data = []
    for url in mission_urls:
        try:
            mission_data.append(get_mission_data(url=url, ses=ses))
        except Exception as err:
            print(f"Failed to get mission data for {url}: {err}")

    print(f"Writing mission data to {OUT_FILE}")
    with open(OUT_FILE, "w") as hdl:
        json.dump(mission_data, hdl, indent=2)


 if __name__ == "__main__":
    main()
	"""Simple script to grab all the RocketLab mission info

	There is plenty of room for improvement, but this works.

	Requirements defined below using inline script metadata (formerly PEP-723), and the file
	can be run standalone with `pipx`, `uv run`, etc.
	"""
	# /// script
	# requires-python = ">=3.9"
	# dependencies = [
	# "requests<3",
	# "parsel",
	# ]
	# ///

	import json
	from typing import Dict, Iterable, Optional

	import requests
	from parsel.selector import Selector

	URL_ROOT = "https://www.rocketlabusa.com"
	OUT_FILE = "mission_info.json"


	def get_mission_urls(ses: Optional[requests.Session] = None) -> Iterable[str]:
	"""Fetch the list of RocketLab completed missions"""
	if ses is None:
	ses = requests.Session()
	r_missions = ses.get(f"{URL_ROOT}/missions/missions-launched/")
	r_missions.raise_for_status()
	selector = Selector(text=r_missions.text)
	# There's only one table so don't need anything complicated
	links = selector.xpath(
	'//table//a[contains(@href, "missions-launched")]/@href'
	).getall()
	uniq_links = set(links)
	return {URL_ROOT + link for link in uniq_links}


	def get_mission_data(
	url: str, ses: Optional[requests.Session] = None
	) -> Dict[str, str]:
	"""Access the mission page, extract mission data from the infobox"""
	if ses is None:
	ses = requests.Session()
	r_msn = ses.get(url=url)
	r_msn.raise_for_status()
	selector = Selector(text=r_msn.text)

	data = {}
	for item in selector.xpath('//div[contains(@class, "details-item")]'):
	key = item.css("span::text").get().rstrip(":")
	# Need to select the data _after_ the <span>, hence getall()[-1]
	val = item.css("div::text").getall()[-1].strip()
	data[key] = val

	return data


	def main():
	ses = requests.Session()

	mission_urls = get_mission_urls(ses=ses)

	mission_data = []
	for url in mission_urls:
	try:
	mission_data.append(get_mission_data(url=url, ses=ses))
	except Exception as err:
	print(f"Failed to get mission data for {url}: {err}")

	print(f"Writing mission data to {OUT_FILE}")
	with open(OUT_FILE, "w") as hdl:
	json.dump(mission_data, hdl, indent=2)


	if __name__ == "__main__":
	main()
No results found