Last active
November 25, 2025 10:17
-
-
Save severo/e6099ab29a5ec17214145f834afbd1a7 to your computer and use it in GitHub Desktop.
Save to wayback machine (from https://foxrow.com/til-api-for-saving-webpages-in-the-wayback-machine)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| *.jsonl | |
| *.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| usage() { | |
| cat <<'USAGE' >&2 | |
| Usage: | |
| save.sh <file> | |
| Examples: | |
| save.sh urls.txt | |
| Pass the keys as AIS3_ACCESS and AIS3_SECRET environment variables. | |
| Create them at https://archive.org/account/s3.php. | |
| USAGE | |
| exit 2 | |
| } | |
| if [ "$#" -eq 0 ]; then | |
| usage | |
| fi | |
| # Expect format: save.sh <file> | |
| file="$1" | |
| shift | |
| if [ ! -f "$file" ]; then | |
| echo "File not found: $file" >&2 | |
| exit 2 | |
| fi | |
| OUTPUT_FILE=saved_responses.jsonl | |
| while IFS= read -r line || [ -n "$line" ]; do | |
| # trim leading/trailing whitespace | |
| trimmed="$line" | |
| trimmed="${trimmed%%[$'\t\r\n']}" | |
| trimmed="${trimmed#"${trimmed%%[![:space:]]*}"}" | |
| trimmed="${trimmed%"${trimmed##*[![:space:]]}"}" | |
| [ -z "$trimmed" ] && continue | |
| case "$trimmed" in | |
| \#*) continue ;; | |
| esac | |
| echo "Processing URL: $trimmed" | |
| # Send a request to save the URL to the Wayback Machine, appending the response to a file | |
| curl -X POST -H "Accept: application/json" -H "Authorization: LOW $AIS3_ACCESS:$AIS3_SECRET" -d"url=$trimmed&delay_wb_availability=1&skip_first_archive=1" "https://web.archive.org/save" >> $OUTPUT_FILE | |
| # Add a newline | |
| echo >> $OUTPUT_FILE | |
| # Wait for 20 seconds between requests to avoid rate limiting | |
| sleep 20 | |
| done < "$file" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment