Usage:
```
$ mkdir RHEL9Doc
$ cd RHEL9Doc
$ fetchdoc.sh https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/9
```
Prerequirement: curl, GNU Parallel
| #!/bin/bash | |
| if [ -z "$1" ]; then | |
| echo "Usage: $0 <URL>" | |
| echo "URL: product document index pagehttps://docs.redhat.com/en/documentation/red_hat_enterprise_linux/9" | |
| exit 1 | |
| fi | |
| # BASE_URL | |
| URL="$1" | |
| BASE_URL=$(echo "$URL" | awk -F/ '{print $1"//"$3}') | |
| # Download index page | |
| curl -s "$URL" | grep -oP '(?<=href=")[^"]*' | grep '/html/' | parallel -j 10 ' | |
| RELATIVE_URL={} | |
| FULL_URL='"$BASE_URL"'"$RELATIVE_URL" | |
| FULL_URL="${FULL_URL%/}/index" | |
| # Find pdf link from first page | |
| PDF_URL=$(curl -s "$FULL_URL" | grep -oP "(?<=href=\")[^\"]*" | grep "\.pdf$") | |
| if [ -n "$PDF_URL" ]; then | |
| # Download PDF | |
| FILENAME=$(basename "$PDF_URL") | |
| curl -s -o "$FILENAME" '"$BASE_URL"'"$PDF_URL" | |
| fi | |
| ' |