rvtr · June 17, 2025 21:27
diff --git a/SCRAPE_MEETINGS.SH b/SCRAPE_MEETINGS.SH
 #!/bin/bash
 # Hey folks, please do not run this script more than necessary.
 # Too many search requests will temporarily block searches for everyone, not just you.
 # I do not want to DDOS London. I just want to allow for personal backups. Cheers!
 echo -e "\n-========================================================================-"
 echo -e "-=-                                                                    -=-"
 echo -e "-=-     SCRAPE_MEETINGS.SH: Downloads committee videos and agendas     -=-"
 echo -e "-=-                                                                    -=-"
 echo -e "-=-    https://gist.github.com/rvtr/1b471e5f5215c368fd78d9aba05f8dc2   -=-"
 echo -e "-=-     Lillian Skinner (2025)                                         -=-"
 echo -e "-=-                                                                    -=-"
 echo -e "-========================================================================-"

 echo "Starting job: SCRAPE_MEETINGS: $(date)"

 # Warning to all who read this script:
 # It is badly written. I know it is bad, but I am tired okay, and sometimes sloppy just works.

 # London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person!
 WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"

 TEMP_DIR="./tmp/"
 SEARCH_PAGE="./tmp/index.html"
 AGENDA_HTML="./tmp/work.html"
 #VIDEO_TIMESTAMP_JSON="./tmp/time.json"

 if [ -d "$TEMP_DIR" ]; then
  rm -r $TEMP_DIR
 fi
 rm -f $SEARCH_PAGE
 rm -f $AGENDA_HTML

 mkdir $TEMP_DIR

 SEARCH_URL="https://london.ca/government/council-civic-administration/council-committee-meetings/meetings"
 # Need to confirm. When stacking params does the type need to be f[1]?
 SEARCH_FORMAT_COMMITTEE="f[1]=meeting_type%3A"
 SEARCH_FORMAT_DATE="f[0]=meeting_date%3A"
 SEARCH_FORMAT_QUERY="search=query&sort_by=field_meeting_date"

 # As far as I'm aware there are no meetings prior to 2011.
 i=2011
 x=$((i + 1))
 echo $x
 SEARCH_END="FALSE"
 while (( i < x )); do
  j=0
  SEARCH_END="FALSE"
  while [[ $SEARCH_END == "FALSE" ]]; do
      echo "SCRAPE_MEETINGS: Downloading search results... Page $j of $i"
    wget --user-agent="$WGET_UA" $SEARCH_URL"?$SEARCH_FORMAT_DATE$i&page=$j" -O $SEARCH_PAGE -q #--show-progress
    if [ $? -ne 8 ]; then
      FOUNDMEETING="FALSE"

      GREP404=$(cat $SEARCH_PAGE | grep "No results found.")
      if [[ "$GREP404" == "" ]]; then
        while IFS= read -r LINE; do

          # All meeting items in the search results are formatted like so:
          # - One line with the name
          # - Second line with all other info including links
          #
          # We can find the first line by the class "views-field-field-meeting-notes"
          # FOUNDMEETING=TRUE will show that the first line has been found, and so the next line read will be "confirmed" as line 2 of the meeting info
          # The first two links of every second line are (in order) the PDF and HTML agendas

          if [[ "TRUE" == $FOUNDMEETING ]]; then
            FOUNDMEETING="FALSE"
            echo "SCRAPE_MEETINGS: -========================================================================-"
            echo "SCRAPE_MEETINGS:  Working on $MEETING_NAME ($MEETING_YEAR/$MEETING_MONTH/$MEETING_DAY)"
            echo "SCRAPE_MEETINGS:  All files to be saved as "$MEETING_NAME"/"$MEETING_YEAR"/"$MEETING_MONTH"-"$MEETING_DAY"/"
            echo "SCRAPE_MEETINGS: -========================================================================-"
            echo "SCRAPE_MEETINGS: Task starting on: $(date)"

            #echo "CANCEL NOW!!!"
            #sleep 5

            # Grab meeting item links
            echo $LINE | sed 's/href=./\nhref="/g' | grep 'href="https' | sed 's/.*href="\([^"]*\)".*/\1/p' | uniq > "./tmp/meeting_urls"
            # Grab meeting item types
            echo $LINE | sed 's/rel=.noreferrer.>/\nrel="noreferrer">/g' | grep 'rel="noreferrer">' | sed 's/.*rel="noreferrer">\([^<]*\)<.*/\1/p' | uniq > "./tmp/meeting_types"

            AGENDA_HTML_URL=""
            AGENDA_PDF_URL=""
            AGENDA_REVISE_HTML_URL=""
            AGENDA_REVISE_PDF_URL=""
            MINUTES_HTML_URL=""
            MINUTES_PDF_URL=""
            MINUTES_ATTACH_PDF_URL=""

            echo "SCRAPE_MEETINGS: Found the following documents:"
            while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
                echo "SCRAPE_MEETINGS: - $LINEA2"

                case "$LINEA2" in
                    "Agenda (HTML) ")
                        AGENDA_HTML_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
                    "Agenda (PDF) ")
                        AGENDA_PDF_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
                    "Revised Agenda (HTML) ")
                        AGENDA_REVISE_HTML_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
                    "Revised Agenda (PDF) ")
                        AGENDA_REVISE_PDF_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
                    "Minutes (HTML) ")
                        MINUTES_HTML_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
                    "Minutes (PDF) ")
                        MINUTES_PDF_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
                    "Minutes with Attachments (PDF) ")
                        MINUTES_ATTACH_PDF_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
                esac

            done < ./tmp/meeting_urls 3< ./tmp/meeting_types

            # Always prefer Revised Agendas
                      echo "SCRAPE_MEETINGS: Downloading agenda HTML..."
                      if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
                          wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O $AGENDA_HTML -q #--show-progress
                      elif [[ $AGENDA_HTML_URL != "" ]]; then
                          wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O $AGENDA_HTML -q #--show-progress
                      else
                          ERROR="TRUE"
                      fi

            if [[ ERROR="FALSE" ]]; then

                mkdir "./LondonArchive"
                mkdir "./LondonArchive/Meetings"

                if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME" ]; then
                    mkdir "./LondonArchive/Meetings/$MEETING_NAME/"
                fi
                if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR" ]; then
                    mkdir "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR/"
                fi
                MEETING_DIR=$(printf "./LondonArchive/Meetings/%s/%s/%s-%s" "$MEETING_NAME" "$MEETING_YEAR" "$MEETING_MONTH" "$MEETING_DAY")
                if [ ! -d "$MEETING_DIR" ]; then
                    mkdir "$MEETING_DIR/"
                fi
                if [ ! -d "$MEETING_DIR/Attachments" ]; then
                    mkdir "$MEETING_DIR/Attachments/"
                fi
            
              # Direct video links is always "video.isilive.ca/<REGION>/<NAME>"
              # There are some eScribe ones, but those are in m3u8s and are really annoying to work with

              # ...not annoying as more sed though.
              VIDEO_URL=$(grep 'id="isi_player"' ./tmp/work.html | sed -n 's/.*data-stream_name="\([^"]*\)".*/\1/p' | sed 's/ /%20/g')
              
                          if [[ $VIDEO_URL != "" ]]; then
                              echo "SCRAPE_MEETINGS: Saving recording URL..."
                              echo "https://video.isilive.ca/london/"$VIDEO_URL > "$MEETING_DIR/RecordingLink.txt"
                          fi
              
              # Get attachment links
              cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/href=.filestream\.ashx/\nhref="filestream\.ashx/g' | grep 'filestream.ashx' | sed 's/. data-toggle/\" data-toggle/p' | sed 's/href=.\([^/]*\)".*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_urls"
              # Get attachment names
              cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/data-original-title=./\ndata-original-title='\''/g' | grep 'data-original-title' | sed 's/data-original-title=.//p' | sed 's/.pdf['\'':"].*/.pdf/g' | awk '!x[$0]++' > "./tmp/attachment_names"
              # Download attachment and use the name grabbed above
              echo "SCRAPE_MEETINGS: Found the following agenda attachments:"
              while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
                echo "SCRAPE_MEETINGS: - $LINEA2"
                wget --user-agent="$WGET_UA" "https://pub-london.escribemeetings.com/$LINEA1" -O "$MEETING_DIR/Attachments/$LINEA2" -q #--show-progress
              done < ./tmp/attachment_urls 3< ./tmp/attachment_names
              echo "SCRAPE_MEETINGS: All attachments saved."

              if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then
                          if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then
                            echo "SCRAPE_MEETINGS: Saving revised agenda as PDF..."
                            wget --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "$MEETING_DIR/Agenda_Revised.pdf" -q #--show-progress
                          fi
                          if [[ $AGENDA_PDF_URL != "" ]]; then
                            echo "SCRAPE_MEETINGS: Saving regular agenda as PDF..."
                            wget --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "$MEETING_DIR/Agenda.pdf" -q #--show-progress
                          fi
                      else
                          if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
                            echo "SCRAPE_MEETINGS: Saving revised agenda as HTML... (no PDF found!)"
                            wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "$MEETING_DIR/Agenda_Revised.html" -q #--show-progress
                          fi
                          if [[ $AGENDA_HTML_URL != "" ]]; then
                            echo "SCRAPE_MEETINGS: Saving regular agenda as HTML... (no PDF found!)"
                            wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "$MEETING_DIR/Agenda.html" -q #--show-progress
                          fi
                      fi
                      
              if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then
                          if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then
                            echo "SCRAPE_MEETINGS: Saving minutes with attachments as PDF..."
                            wget --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "$MEETING_DIR/Minutes_With_Attachments.pdf" -q #--show-progress
                          fi
                          if [[ $MINUTES_PDF_URL != "" ]]; then
                            echo "SCRAPE_MEETINGS: Saving minutes as PDF..."
                            wget --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "$MEETING_DIR/Minutes.pdf" -q #--show-progress
                          fi
                      else
                          if [[ $MINUTES_HTML_URL != "" ]]; then
                            echo "SCRAPE_MEETINGS: Saving minutes as HTML... (no PDF found!)"
                            wget --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "$MEETING_DIR/Minutes.html" -q #--show-progress
                          fi
                      fi

            fi
            echo "SCRAPE_MEETINGS: All files from this meeting have been saved."
          fi
          
          GREPMEETING=$(echo $LINE | grep "views-field-field-meeting-notes")
          if [[ "$GREPMEETING" != "" ]]; then
            MEETING_INFO=$(echo $LINE | sed -n 's/.*<div class="meeting__date">\([^<]*\)<\/div>.*/\1/p')

            MEETING_MONTH_WORD=$(echo "$MEETING_INFO" | sed -E 's/^([A-Za-z]+) .*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
            MEETING_DAY_SHORT=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ ([0-9]+),.*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
            MEETING_DAY=$(printf "%02d" $MEETING_DAY_SHORT)
            MEETING_YEAR=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ [0-9]+, ([0-9]+).*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
            MEETING_NAME=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ [0-9]+, [0-9]+ - (.*)/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
            case "$MEETING_MONTH_WORD" in
                January)   MEETING_MONTH="01" ;;
                February)  MEETING_MONTH="02" ;;
                March)     MEETING_MONTH="03" ;;
                April)     MEETING_MONTH="04" ;;
                May)       MEETING_MONTH="05" ;;
                June)      MEETING_MONTH="06" ;;
                July)      MEETING_MONTH="07" ;;
                August)    MEETING_MONTH="08" ;;
                September) MEETING_MONTH="09" ;;
                October)   MEETING_MONTH="10" ;;
                November)  MEETING_MONTH="11" ;;
                December)  MEETING_MONTH="12" ;;
                *)         MEETING_MONTH="--" ;;
            esac

            FOUNDMEETING="TRUE"
          fi
        done < $SEARCH_PAGE
      else
        SEARCH_END="TRUE"
        echo "SCRAPE_MEETINGS: No more pages!"
      fi
    else
        SEARCH_END="TRUE"
        echo "SCRAPE_MEETINGS: No more pages!"
    fi
    ((j++))
  done
 ((i++))
 done
 echo "Done job: SCRAPE_MEETINGS: $(date)"
	#!/bin/bash
	# Hey folks, please do not run this script more than necessary.
	# Too many search requests will temporarily block searches for everyone, not just you.
	# I do not want to DDOS London. I just want to allow for personal backups. Cheers!
	echo -e "\n-========================================================================-"
	echo -e "-=- -=-"
	echo -e "-=- SCRAPE_MEETINGS.SH: Downloads committee videos and agendas -=-"
	echo -e "-=- -=-"
	echo -e "-=- https://gist.github.com/rvtr/1b471e5f5215c368fd78d9aba05f8dc2 -=-"
	echo -e "-=- Lillian Skinner (2025) -=-"
	echo -e "-=- -=-"
	echo -e "-========================================================================-"

	echo "Starting job: SCRAPE_MEETINGS: $(date)"

	# Warning to all who read this script:
	# It is badly written. I know it is bad, but I am tired okay, and sometimes sloppy just works.

	# London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person!
	WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"

	TEMP_DIR="./tmp/"
	SEARCH_PAGE="./tmp/index.html"
	AGENDA_HTML="./tmp/work.html"
	#VIDEO_TIMESTAMP_JSON="./tmp/time.json"

	if [ -d "$TEMP_DIR" ]; then
	rm -r $TEMP_DIR
	fi
	rm -f $SEARCH_PAGE
	rm -f $AGENDA_HTML

	mkdir $TEMP_DIR

	SEARCH_URL="https://london.ca/government/council-civic-administration/council-committee-meetings/meetings"
	# Need to confirm. When stacking params does the type need to be f[1]?
	SEARCH_FORMAT_COMMITTEE="f[1]=meeting_type%3A"
	SEARCH_FORMAT_DATE="f[0]=meeting_date%3A"
	SEARCH_FORMAT_QUERY="search=query&sort_by=field_meeting_date"

	# As far as I'm aware there are no meetings prior to 2011.
	i=2011
	x=$((i + 1))
	echo $x
	SEARCH_END="FALSE"
	while (( i < x )); do
	j=0
	SEARCH_END="FALSE"
	while [[ $SEARCH_END == "FALSE" ]]; do
	echo "SCRAPE_MEETINGS: Downloading search results... Page $j of $i"
	wget --user-agent="$WGET_UA" $SEARCH_URL"?$SEARCH_FORMAT_DATE$i&page=$j" -O $SEARCH_PAGE -q #--show-progress
	if [ $? -ne 8 ]; then
	FOUNDMEETING="FALSE"

	GREP404=$(cat $SEARCH_PAGE \| grep "No results found.")
	if [[ "$GREP404" == "" ]]; then
	while IFS= read -r LINE; do

	# All meeting items in the search results are formatted like so:
	# - One line with the name
	# - Second line with all other info including links
	#
	# We can find the first line by the class "views-field-field-meeting-notes"
	# FOUNDMEETING=TRUE will show that the first line has been found, and so the next line read will be "confirmed" as line 2 of the meeting info
	# The first two links of every second line are (in order) the PDF and HTML agendas

	if [[ "TRUE" == $FOUNDMEETING ]]; then
	FOUNDMEETING="FALSE"
	echo "SCRAPE_MEETINGS: -========================================================================-"
	echo "SCRAPE_MEETINGS: Working on $MEETING_NAME ($MEETING_YEAR/$MEETING_MONTH/$MEETING_DAY)"
	echo "SCRAPE_MEETINGS: All files to be saved as "$MEETING_NAME"/"$MEETING_YEAR"/"$MEETING_MONTH"-"$MEETING_DAY"/"
	echo "SCRAPE_MEETINGS: -========================================================================-"
	echo "SCRAPE_MEETINGS: Task starting on: $(date)"

	#echo "CANCEL NOW!!!"
	#sleep 5

	# Grab meeting item links
	echo $LINE \| sed 's/href=./\nhref="/g' \| grep 'href="https' \| sed 's/.href="\([^"]\)".*/\1/p' \| uniq > "./tmp/meeting_urls"
	# Grab meeting item types
	echo $LINE \| sed 's/rel=.noreferrer.>/\nrel="noreferrer">/g' \| grep 'rel="noreferrer">' \| sed 's/.rel="noreferrer">\([^<]\)<.*/\1/p' \| uniq > "./tmp/meeting_types"

	AGENDA_HTML_URL=""
	AGENDA_PDF_URL=""
	AGENDA_REVISE_HTML_URL=""
	AGENDA_REVISE_PDF_URL=""
	MINUTES_HTML_URL=""
	MINUTES_PDF_URL=""
	MINUTES_ATTACH_PDF_URL=""

	echo "SCRAPE_MEETINGS: Found the following documents:"
	while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
	echo "SCRAPE_MEETINGS: - $LINEA2"

	case "$LINEA2" in
	"Agenda (HTML) ")
	AGENDA_HTML_URL=$(echo $LINEA1 \| sed 's/&/\&/g' \| sed 's/'/'\''/g') ;;
	"Agenda (PDF) ")
	AGENDA_PDF_URL=$(echo $LINEA1 \| sed 's/&/\&/g' \| sed 's/'/'\''/g') ;;
	"Revised Agenda (HTML) ")
	AGENDA_REVISE_HTML_URL=$(echo $LINEA1 \| sed 's/&/\&/g' \| sed 's/'/'\''/g') ;;
	"Revised Agenda (PDF) ")
	AGENDA_REVISE_PDF_URL=$(echo $LINEA1 \| sed 's/&/\&/g' \| sed 's/'/'\''/g') ;;
	"Minutes (HTML) ")
	MINUTES_HTML_URL=$(echo $LINEA1 \| sed 's/&/\&/g' \| sed 's/'/'\''/g') ;;
	"Minutes (PDF) ")
	MINUTES_PDF_URL=$(echo $LINEA1 \| sed 's/&/\&/g' \| sed 's/'/'\''/g') ;;
	"Minutes with Attachments (PDF) ")
	MINUTES_ATTACH_PDF_URL=$(echo $LINEA1 \| sed 's/&/\&/g' \| sed 's/'/'\''/g') ;;
	esac

	done < ./tmp/meeting_urls 3< ./tmp/meeting_types

	# Always prefer Revised Agendas
	echo "SCRAPE_MEETINGS: Downloading agenda HTML..."
	if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
	wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O $AGENDA_HTML -q #--show-progress
	elif [[ $AGENDA_HTML_URL != "" ]]; then
	wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O $AGENDA_HTML -q #--show-progress
	else
	ERROR="TRUE"
	fi

	if [[ ERROR="FALSE" ]]; then

	mkdir "./LondonArchive"
	mkdir "./LondonArchive/Meetings"

	if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME" ]; then
	mkdir "./LondonArchive/Meetings/$MEETING_NAME/"
	fi
	if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR" ]; then
	mkdir "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR/"
	fi
	MEETING_DIR=$(printf "./LondonArchive/Meetings/%s/%s/%s-%s" "$MEETING_NAME" "$MEETING_YEAR" "$MEETING_MONTH" "$MEETING_DAY")
	if [ ! -d "$MEETING_DIR" ]; then
	mkdir "$MEETING_DIR/"
	fi
	if [ ! -d "$MEETING_DIR/Attachments" ]; then
	mkdir "$MEETING_DIR/Attachments/"
	fi

	# Direct video links is always "video.isilive.ca/<REGION>/<NAME>"
	# There are some eScribe ones, but those are in m3u8s and are really annoying to work with

	# ...not annoying as more sed though.
	VIDEO_URL=$(grep 'id="isi_player"' ./tmp/work.html \| sed -n 's/.data-stream_name="\([^"]\)".*/\1/p' \| sed 's/ /%20/g')

	if [[ $VIDEO_URL != "" ]]; then
	echo "SCRAPE_MEETINGS: Saving recording URL..."
	echo "https://video.isilive.ca/london/"$VIDEO_URL > "$MEETING_DIR/RecordingLink.txt"
	fi

	# Get attachment links
	cat $AGENDA_HTML \| grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" \| sed 's/href=.filestream\.ashx/\nhref="filestream\.ashx/g' \| grep 'filestream.ashx' \| sed 's/. data-toggle/\" data-toggle/p' \| sed 's/href=.\([^/]\)"./\1/p' \| awk '!x[$0]++' > "./tmp/attachment_urls"
	# Get attachment names
	cat $AGENDA_HTML \| grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" \| sed 's/data-original-title=./\ndata-original-title='\''/g' \| grep 'data-original-title' \| sed 's/data-original-title=.//p' \| sed 's/.pdf['\'':"].*/.pdf/g' \| awk '!x[$0]++' > "./tmp/attachment_names"
	# Download attachment and use the name grabbed above
	echo "SCRAPE_MEETINGS: Found the following agenda attachments:"
	while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
	echo "SCRAPE_MEETINGS: - $LINEA2"
	wget --user-agent="$WGET_UA" "https://pub-london.escribemeetings.com/$LINEA1" -O "$MEETING_DIR/Attachments/$LINEA2" -q #--show-progress
	done < ./tmp/attachment_urls 3< ./tmp/attachment_names
	echo "SCRAPE_MEETINGS: All attachments saved."

	if [[ $AGENDA_REVISE_PDF_URL != "" ]] \|\| [[ $AGENDA_PDF_URL != "" ]]; then
	if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then
	echo "SCRAPE_MEETINGS: Saving revised agenda as PDF..."
	wget --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "$MEETING_DIR/Agenda_Revised.pdf" -q #--show-progress
	fi
	if [[ $AGENDA_PDF_URL != "" ]]; then
	echo "SCRAPE_MEETINGS: Saving regular agenda as PDF..."
	wget --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "$MEETING_DIR/Agenda.pdf" -q #--show-progress
	fi
	else
	if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
	echo "SCRAPE_MEETINGS: Saving revised agenda as HTML... (no PDF found!)"
	wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "$MEETING_DIR/Agenda_Revised.html" -q #--show-progress
	fi
	if [[ $AGENDA_HTML_URL != "" ]]; then
	echo "SCRAPE_MEETINGS: Saving regular agenda as HTML... (no PDF found!)"
	wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "$MEETING_DIR/Agenda.html" -q #--show-progress
	fi
	fi

	if [[ $MINUTES_ATTACH_PDF_URL != "" ]] \|\| [[ $MINUTES_PDF_URL != "" ]]; then
	if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then
	echo "SCRAPE_MEETINGS: Saving minutes with attachments as PDF..."
	wget --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "$MEETING_DIR/Minutes_With_Attachments.pdf" -q #--show-progress
	fi
	if [[ $MINUTES_PDF_URL != "" ]]; then
	echo "SCRAPE_MEETINGS: Saving minutes as PDF..."
	wget --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "$MEETING_DIR/Minutes.pdf" -q #--show-progress
	fi
	else
	if [[ $MINUTES_HTML_URL != "" ]]; then
	echo "SCRAPE_MEETINGS: Saving minutes as HTML... (no PDF found!)"
	wget --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "$MEETING_DIR/Minutes.html" -q #--show-progress
	fi
	fi

	fi
	echo "SCRAPE_MEETINGS: All files from this meeting have been saved."
	fi

	GREPMEETING=$(echo $LINE \| grep "views-field-field-meeting-notes")
	if [[ "$GREPMEETING" != "" ]]; then
	MEETING_INFO=$(echo $LINE \| sed -n 's/.<div class="meeting__date">\([^<]\)<\/div>.*/\1/p')

	MEETING_MONTH_WORD=$(echo "$MEETING_INFO" \| sed -E 's/^([A-Za-z]+) ./\1/' \| sed 's/^[ \t]//;s/[ \t]*$//')
	MEETING_DAY_SHORT=$(echo "$MEETING_INFO" \| sed -E 's/^[A-Za-z]+ ([0-9]+),./\1/' \| sed 's/^[ \t]//;s/[ \t]*$//')
	MEETING_DAY=$(printf "%02d" $MEETING_DAY_SHORT)
	MEETING_YEAR=$(echo "$MEETING_INFO" \| sed -E 's/^[A-Za-z]+ [0-9]+, ([0-9]+)./\1/' \| sed 's/^[ \t]//;s/[ \t]*$//')
	MEETING_NAME=$(echo "$MEETING_INFO" \| sed -E 's/^[A-Za-z]+ [0-9]+, [0-9]+ - (.)/\1/' \| sed 's/^[ \t]//;s/[ \t]*$//')
	case "$MEETING_MONTH_WORD" in
	January) MEETING_MONTH="01" ;;
	February) MEETING_MONTH="02" ;;
	March) MEETING_MONTH="03" ;;
	April) MEETING_MONTH="04" ;;
	May) MEETING_MONTH="05" ;;
	June) MEETING_MONTH="06" ;;
	July) MEETING_MONTH="07" ;;
	August) MEETING_MONTH="08" ;;
	September) MEETING_MONTH="09" ;;
	October) MEETING_MONTH="10" ;;
	November) MEETING_MONTH="11" ;;
	December) MEETING_MONTH="12" ;;
	*) MEETING_MONTH="--" ;;
	esac

	FOUNDMEETING="TRUE"
	fi
	done < $SEARCH_PAGE
	else
	SEARCH_END="TRUE"
	echo "SCRAPE_MEETINGS: No more pages!"
	fi
	else
	SEARCH_END="TRUE"
	echo "SCRAPE_MEETINGS: No more pages!"
	fi
	((j++))
	done
	((i++))
	done
	echo "Done job: SCRAPE_MEETINGS: $(date)"
No results found