Skip to content

Instantly share code, notes, and snippets.

@sparkiegeek
Created March 3, 2022 07:28
Show Gist options
  • Select an option

  • Save sparkiegeek/9398b8ae4adba199311aaba5d7a264f2 to your computer and use it in GitHub Desktop.

Select an option

Save sparkiegeek/9398b8ae4adba199311aaba5d7a264f2 to your computer and use it in GitHub Desktop.
Redirect lint for discourse based docs
#!/usr/bin/fades -q
"""
Script for identifying redirects in Discourse that are already present in a redirects.yaml file
We also need some dependencies installed for speed, installed by fades:
lxml
"""
import bs4 # fades beautifulsoup4
import requests # fades
import yaml # fades PyYAML
import argparse
import dataclasses
import json
import sys
from typing import Iterable
@dataclasses.dataclass
class Redirect:
source: str
destination: str
def grab_discourse_content(url: str) -> str:
r = requests.get(url + ".json")
return str(r.json()["post_stream"]["posts"][0]["cooked"])
def get_website_redirects(fh) -> list[Redirect]:
data = yaml.safe_load(fh)
return [Redirect(source, dest) for source, dest in data.items()]
def generate_redirects_from_discourse(str) -> Iterable[Redirect]:
soup = bs4.BeautifulSoup(
str, "lxml", parse_only=bs4.SoupStrainer(["h2", "tbody"])
)
header = soup.find("h2", text="Redirects")
tbody = header.find_next_sibling()
for row in tbody("tr", recursive=False):
source, destination = row.stripped_strings
# Redirects in Discourse have a leading /, let's strip it
yield Redirect(source[1:], destination)
def check_for_redundant_redirects(website_redirects: Iterable[Redirect], discourse_redirects: Iterable[Redirect]) -> None:
"""Do we have any redirects in Discourse that are already in the website?"""
sources = {redirect.source: redirect for redirect in website_redirects}
for redirect in discourse_redirects:
if redirect.source in sources:
print(
f"[Discourse] redirect for {redirect.source} to {redirect.destination} is already in the website, and is redundant in Discourse"
)
def check_for_non_terminals(redirects: Iterable[Redirect], prefix: str) -> None:
"""Do we have any redirects with destinations that themselves are redirects?"""
sources = {redirect.source: redirect for redirect in redirects}
for redirect in redirects:
next_redirect = sources.get(redirect.destination[1:])
if next_redirect:
print(
f"{prefix} non-terminal redirect found for {redirect.source} to {redirect.destination} on to {next_redirect.destination}"
)
def check_for_valid_destination(redirects: Iterable[Redirect]) -> None:
for redirect in redirects:
url = f"https://maas.io/{redirect.destination}"
response = requests.head(url)
if response.status_code != 200:
print(f"Got {response.status_code} for {url} from {redirect.source}")
def main(argv: list[str]) -> int:
parser = argparse.ArgumentParser()
parser.add_argument(
"discourse_url", help="URL for Discourse topic containing redirects", metavar="URL"
)
parser.add_argument("redirects_yaml", help="Path to redirects.yaml file", type=open, metavar="redirects.yaml")
args = parser.parse_args(argv)
discourse_contents = grab_discourse_content(args.discourse_url)
website_redirects = get_website_redirects(args.redirects_yaml)
discourse_redirects = list(generate_redirects_from_discourse(discourse_contents))
check_for_redundant_redirects(website_redirects, discourse_redirects)
check_for_non_terminals(website_redirects, prefix="[Website]")
check_for_non_terminals(discourse_redirects, prefix="[Discourse]")
# check_for_valid_destination(website_redirects)
# check_for_valid_destination(discourse_redirects)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment