Created
November 2, 2023 18:21
-
-
Save Realiserad/a4b039b400812d1405559ea96e948adf to your computer and use it in GitHub Desktop.
Try to resolve a list of links extracted using find and grep
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # !/usr/bin/env python3 | |
| # Provide input over stdin, one file and one link per line separated by a semicolon. | |
| # | |
| # For example: | |
| # | |
| # > echo "./README.md:https://helix.stormhub.org" | python3 check_links.py | |
| # > find . -name "*.md" -type f -print0 | xargs -0 grep -o "https://[^ )`\"<]*" | python3 check-links.py | jq . | |
| from fileinput import input | |
| import requests | |
| import json | |
| import sys | |
| from concurrent.futures import ThreadPoolExecutor | |
| broken_links = [] | |
| def process_line(line): | |
| file = line.split(':')[0] | |
| link = line[len(file) + 1:].strip() | |
| # Perform a HTTP GET on the link and check for a 200 response code | |
| try: | |
| response = requests.get(link) | |
| if response.status_code != 200: | |
| # Add the file name, the link and the status code to broken_links as a JSON object | |
| broken_links.append( | |
| { | |
| 'file': file, | |
| 'link': link, | |
| 'status_code': response.status_code | |
| }) | |
| except Exception as e: | |
| broken_links.append( | |
| { | |
| 'file': file, | |
| 'link': link, | |
| #'error': str(e) | |
| 'error': type(e).__name__ | |
| }) | |
| # Create a ThreadPoolExecutor with a maximum of 20 threads | |
| with ThreadPoolExecutor(max_workers=20) as executor: | |
| # Process each line in parallel | |
| executor.map(process_line, input()) | |
| if len(broken_links) > 0: | |
| print(json.dumps(broken_links)) | |
| exit("found broken links") | |
| print('[]') |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
One-liner to run it for a quick and dirty check of some markdown files in the current working directory: