#!/bin/bash
WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"

echo "Checking link rot..."

test_link() {
	if wget --spider "$1" --no-check-certificate --user-agent="$WGET_UA"; then
		return 0
	else
		return 1
	fi
}

touch masterlinks_finished.txt

newnews=0
while IFS= read -r LINE_PRE; do
	if [[ "$LINE_PRE" == "Link: http"* ]] && (( newnews )); then
		LINE=$(echo "$LINE_PRE" | sed 's/Link: //')
		DONE=$(cat masterlinks_finished.txt | grep "$LINE")
		if [[ "$DONE" == "" ]]; then
			if [[ "$LINE" == *"business-development/planning-development-applications/"* ]]; then
			    sleep 10
				if test_link "$LINE"; then
					echo "$LINE" >> tmp/pa-good.txt
				else
					echo "$LINE,$LOCATION" >> tmp/pa-bad.txt
				fi
			elif [[ "$LINE" == *"pub-london.escribemeetings.com"* ]]; then
				if test_link "$LINE"; then
					echo "$LINE" >> tmp/es-good.txt
				else
					echo "$LINE,$LOCATION" >> tmp/es-bad.txt
				fi
			else
			    sleep 10
				if test_link "$LINE"; then
					echo "$LINE" >> tmp/ot-good.txt
				else
					echo "$LINE,$LOCATION" >> tmp/ot-bad.txt
				fi
			fi
		fi
		echo "$LINE" >> masterlinks_finished.txt
	else
		newnews=0
	fi

	if [[ "$LINE_PRE" == "Link: >>>"* ]]; then
        newnews=1
        LOCATION="$(echo $LINE_PRE | sed 's/Link: >>> //')"
	fi
	
done < masterlinks.txt

#echo "================GOOD================"
#cat tmp/pa-good.txt
#cat tmp/es-good.txt
#cat tmp/ot-good.txt
#echo "================BAD================="
#cat tmp/pa-bad.txt
#cat tmp/es-bad.txt
#cat tmp/ot-bad.txt

PA_GOOD=$(wc -l < tmp/pa-good.txt)
ES_GOOD=$(wc -l < tmp/es-good.txt)
OT_GOOD=$(wc -l < tmp/ot-good.txt)
PA_BAD=$(wc -l < tmp/pa-bad.txt)
ES_BAD=$(wc -l < tmp/es-bad.txt)
OT_BAD=$(wc -l < tmp/ot-bad.txt)

echo "Type, good links, error links, total links"
echo "PA*: $PA_GOOD / $PA_BAD / $(( $PA_GOOD + $PA_BAD ))"
echo "ES : $ES_GOOD / $ES_BAD / $(( $ES_GOOD + $ES_BAD ))"
echo "OT : $OT_GOOD / $OT_BAD / $(( $OT_GOOD + $OT_BAD ))"
echo "*Planning applications will always expire eventually. 'Good' links are temporary and at risk."
