Skip to content

Instantly share code, notes, and snippets.

@severo
Last active November 25, 2025 10:17
Show Gist options
  • Select an option

  • Save severo/e6099ab29a5ec17214145f834afbd1a7 to your computer and use it in GitHub Desktop.

Select an option

Save severo/e6099ab29a5ec17214145f834afbd1a7 to your computer and use it in GitHub Desktop.
*.jsonl
*.txt
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat <<'USAGE' >&2
Usage:
save.sh <file>
Examples:
save.sh urls.txt
Pass the keys as AIS3_ACCESS and AIS3_SECRET environment variables.
Create them at https://archive.org/account/s3.php.
USAGE
exit 2
}
if [ "$#" -eq 0 ]; then
usage
fi
# Expect format: save.sh <file>
file="$1"
shift
if [ ! -f "$file" ]; then
echo "File not found: $file" >&2
exit 2
fi
OUTPUT_FILE=saved_responses.jsonl
while IFS= read -r line || [ -n "$line" ]; do
# trim leading/trailing whitespace
trimmed="$line"
trimmed="${trimmed%%[$'\t\r\n']}"
trimmed="${trimmed#"${trimmed%%[![:space:]]*}"}"
trimmed="${trimmed%"${trimmed##*[![:space:]]}"}"
[ -z "$trimmed" ] && continue
case "$trimmed" in
\#*) continue ;;
esac
echo "Processing URL: $trimmed"
# Send a request to save the URL to the Wayback Machine, appending the response to a file
curl -X POST -H "Accept: application/json" -H "Authorization: LOW $AIS3_ACCESS:$AIS3_SECRET" -d"url=$trimmed&delay_wb_availability=1&skip_first_archive=1" "https://web.archive.org/save" >> $OUTPUT_FILE
# Add a newline
echo >> $OUTPUT_FILE
# Wait for 20 seconds between requests to avoid rate limiting
sleep 20
done < "$file"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment