Skip to content

Instantly share code, notes, and snippets.

@lukapaunovic
Created October 27, 2025 15:46
Show Gist options
  • Select an option

  • Save lukapaunovic/06fd6879de179f17876a70531eb0c906 to your computer and use it in GitHub Desktop.

Select an option

Save lukapaunovic/06fd6879de179f17876a70531eb0c906 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
set -euo pipefail
# =============================================================================
# Nginx log → IP agregacija → Team Cymru IP→ASN (bulk, netcat) → izveštaj
# - IPv4 i IPv6 podrška (posebne bulk sesije)
# - Brz join (AWK mapa) umesto grep u petlji
# - MIN_REQS filter za TOP IP ispis
# - Keš CSV sa flock zaključavanjem (+ opcioni age keš)
# - Retry + exponential backoff za bulk upite
# - Subnet analiza: IPv4 /24 i IPv6 /48
# =============================================================================
# Parametri (ENV):
# LOG = putanja do access loga (default: /var/log/nginx/access.log)
# SUBNET_MIN = prag za listanje /24 (IPv4) [po defaultu 3]
# SUBNET6_MIN = prag za listanje /48 (IPv6) [po defaultu 3]
# MIN_REQS = min broj zahteva za prikaz u "TOP IP ADRESE" [default 100]
# CACHE = CSV keš "ip,ASN,CC,Org" [default /var/cache/ip_asn_cache.csv]
# CACHE_TTL_DAYS = TTL u danima za refresh zapisa (0=isključeno) [default 0]
# BATCH_SIZE = broj IP-ova po bulk upitu [default 1000]
# CYMRU_HOST = whois.cymru.com
# CYMRU_PORT = 43
# TIMEOUT = timeout za nc (sek) [default 10]
# RETRIES = broj pokušaja po batch-u [default 3]
# SLEEP_BASE = početni backoff (sek) [default 0.4]
# =============================================================================
LOG="${LOG:-${1:-/var/log/nginx/access.log}}"
SUBNET_MIN="${SUBNET_MIN:-3}"
SUBNET6_MIN="${SUBNET6_MIN:-3}"
MIN_REQS="${MIN_REQS:-100}"
CACHE="${CACHE:-/var/cache/ip_asn_cache.csv}"
CACHE_TTL_DAYS="${CACHE_TTL_DAYS:-0}" # 0 = bez TTL refresha
CACHE_AGE="${CACHE}.age" # ip,YYYY-MM-DD (opciono)
BATCH_SIZE="${BATCH_SIZE:-1000}"
CYMRU_HOST="${CYMRU_HOST:-whois.cymru.com}"
CYMRU_PORT="${CYMRU_PORT:-43}"
TIMEOUT="${TIMEOUT:-10}"
RETRIES="${RETRIES:-3}"
SLEEP_BASE="${SLEEP_BASE:-0.4}"
mkdir -p "$(dirname "$CACHE")"
touch "$CACHE"
touch "$CACHE_AGE"
TMPDIR="$(mktemp -d)"
trap 'rm -rf "$TMPDIR"' EXIT
IP_COUNTS="$TMPDIR/ip_counts.txt"
# Zaključavanje keša
exec 9>"$CACHE.lock"
# -----------------------------------------------------------------------------
# 1) Izvući IP (cfip=... ili $1) i prebrojati
# -----------------------------------------------------------------------------
awk '
{
ip="";
if (match($0, /cfip=[^ ]+/)) {
full=substr($0, RSTART, RLENGTH); ip=substr(full, 6)
if (ip !~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ && ip !~ /^[0-9a-fA-F:]+$/) ip=""
}
if (ip == "" && $1 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) ip=$1
if (ip == "" && $1 ~ /^[0-9a-fA-F:]+$/) ip=$1
if (ip != "" && ip != "-") count[ip]++
}
END { for (i in count) print count[i], i }' "$LOG" | sort -nr > "$IP_COUNTS"
if ! [ -s "$IP_COUNTS" ]; then
echo "⚠️ Nema IP adresa u $LOG"
exit 0
fi
TOTAL=$(wc -l < "$IP_COUNTS")
IPV4=$(awk '$2 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/' "$IP_COUNTS" | wc -l)
IPV6=$(awk '$2 ~ /^[0-9a-fA-F:]+$/' "$IP_COUNTS" | wc -l)
awk '$2 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ {print $2}' "$IP_COUNTS" > "$TMPDIR/ips4.txt"
awk '$2 ~ /^[0-9a-fA-F:]+$/ {print $2}' "$IP_COUNTS" | sort -u > "$TMPDIR/ips6_all.txt"
# Normalizuj IPv6 (skini port ako je IPv6:port format – ako nema, ostaje isto)
awk '
function is_ipv6(s){ return (s ~ /^[0-9a-fA-F:]+$/) }
/:/{
# pokušaj da oduzmeš :port u formatu [::1]:443 ili ::1:443 (logovi ponekad)
s=$0
gsub(/^\[|\]$/,"",s)
# ako na kraju ima :NNN a u levoj strani ima bar jedan ':' → moguće je port
if (s ~ /:[0-9]+$/ && s ~ /:/) {
pre=s; sub(/:[0-9]+$/,"",pre)
if (is_ipv6(pre)) { print pre; next }
}
}
{ print }' "$TMPDIR/ips6_all.txt" | sort -u > "$TMPDIR/ips6.txt"
# -----------------------------------------------------------------------------
# 2) Očisti keš CSV (ip,ASN,CC,Org) i učitaj u TEMP
# -----------------------------------------------------------------------------
awk -F',' 'NF>=4 {
ip=$1; asn=$2; cc=$3; org=$4
gsub(/^[ \t]+|[ \t]+$/, "", ip)
gsub(/^[ \t]+|[ \t]+$/, "", asn)
gsub(/^[ \t]+|[ \t]+$/, "", cc)
gsub(/^[ \t]+|[ \t]+$/, "", org)
if (ip != "") print ip","asn","cc","org
}' "$CACHE" 2>/dev/null | sort -u > "$TMPDIR/cache_clean.csv"
# -----------------------------------------------------------------------------
# 3) Odredi IP-ove koji fale u kešu i/ili su istekli po TTL
# -----------------------------------------------------------------------------
today="$(date +%F)"
miss4="$TMPDIR/miss4.txt"
miss6="$TMPDIR/miss6.txt"
: > "$miss4"; : > "$miss6"
if [ "$CACHE_TTL_DAYS" -gt 0 ]; then
# mapiraj age: ip -> date
awk -F',' 'NF>=2{gsub(/^[ \t]+|[ \t]+$/, "", $1); gsub(/^[ \t]+|[ \t]+$/, "", $2); if($1!="") print $1","$2}' "$CACHE_AGE" \
| sort -u > "$TMPDIR/age_clean.csv"
# helper: da li je starije od TTL
is_stale() {
local ip="$1"
local date_str
date_str="$(grep -m1 "^$ip," "$TMPDIR/age_clean.csv" | cut -d',' -f2 || true)"
if [ -z "$date_str" ]; then
return 0 # nema datuma ⇒ tretiraj kao zastarelo
fi
# uporedi datume (GNU date)
local cutoff
cutoff="$(date -d "$today - ${CACHE_TTL_DAYS} days" +%s)"
local t
t="$(date -d "$date_str" +%s || echo 0)"
[ "$t" -lt "$cutoff" ] && return 0 || return 1
}
# IPv4 miss + stale
comm -23 <(sort -u "$TMPDIR/ips4.txt") <(cut -d',' -f1 "$TMPDIR/cache_clean.csv") > "$TMPDIR/miss4_only.txt"
while read -r ip; do [ -n "$ip" ] && echo "$ip"; done < "$TMPDIR/miss4_only.txt" > "$miss4"
while read -r ip; do
[ -z "$ip" ] && continue
if grep -q "^$ip," "$TMPDIR/cache_clean.csv"; then
if is_stale "$ip"; then echo "$ip"; fi
fi
done < <(sort -u "$TMPDIR/ips4.txt") >> "$miss4"
# IPv6 miss + stale
comm -23 <(sort -u "$TMPDIR/ips6.txt") <(cut -d',' -f1 "$TMPDIR/cache_clean.csv") > "$TMPDIR/miss6_only.txt"
while read -r ip; do [ -n "$ip" ] && echo "$ip"; done < "$TMPDIR/miss6_only.txt" > "$miss6"
while read -r ip; do
[ -z "$ip" ] && continue
if grep -q "^$ip," "$TMPDIR/cache_clean.csv"; then
if is_stale "$ip"; then echo "$ip"; fi
fi
done < <(sort -u "$TMPDIR/ips6.txt") >> "$miss6"
sort -u "$miss4" -o "$miss4"
sort -u "$miss6" -o "$miss6"
else
comm -23 <(sort -u "$TMPDIR/ips4.txt") <(cut -d',' -f1 "$TMPDIR/cache_clean.csv") > "$miss4"
comm -23 <(sort -u "$TMPDIR/ips6.txt") <(cut -d',' -f1 "$TMPDIR/cache_clean.csv") > "$miss6"
fi
# -----------------------------------------------------------------------------
# 4) Bulk lookup funkcija (odvojeno za IPv4 i IPv6)
# -----------------------------------------------------------------------------
cymru_bulk_lookup() {
# $1 = input fajl sa IP-ovima, $2 = out csv (append), $3 = "4" ili "6"
local in="$1" out_csv="$2" fam="$3"
[ -s "$in" ] || return 0
split -l "$BATCH_SIZE" -d --additional-suffix=.batch "$in" "$TMPDIR/batch_${fam}_"
for batch in "$TMPDIR"/batch_"${fam}"_*.batch; do
{
echo "begin"
echo "verbose"
cat "$batch"
echo "end"
} > "$TMPDIR/query_${fam}.txt"
local ok=0
local try
local sleep_s="$SLEEP_BASE"
for try in $(seq 1 "$RETRIES"); do
if output=$(timeout "$TIMEOUT" nc "$CYMRU_HOST" "$CYMRU_PORT" < "$TMPDIR/query_${fam}.txt" 2>/dev/null); then
ok=1
break
fi
echo "⚠️ Batch fam=$fam try=$try/$RETRIES timeout/greška; backoff ${sleep_s}s" >&2
sleep "$sleep_s"
# exponential-ish
sleep_s=$(awk -v s="$sleep_s" 'BEGIN{printf "%.3f", (s*1.8)+0.1}')
done
[ "$ok" -eq 1 ] || { echo "❌ Odustajem od ovog batch-a (fam=$fam) nakon $RETRIES pokušaja." >&2; continue; }
# Parsiranje linija: "AS | IP | BGP Prefix | CC | Registry | Allocated | AS Name"
awk -F'|' 'NR>1 && NF>=7 {
for (i=1;i<=NF;i++) gsub(/^[ \t]+|[ \t]+$/, "", $i)
asn=$1; ip=$2; cc=$4; org=$7;
# ukloni eventualni sufiks ", XX" iz AS Name (neki AS-ovi ga dodaju)
gsub(/, [A-Z]{2}$/, "", org)
if (asn!="") {
if (asn !~ /^AS/) asn="AS"asn
if (cc=="") cc="??"
if (org=="") org="Unknown"
print ip","asn","cc","org
}
}' <<< "$output" >> "$out_csv"
# nježan delay među batch-evima
sleep 0.2
done
[ -s "$out_csv" ] && sort -u "$out_csv" -o "$out_csv"
}
# -----------------------------------------------------------------------------
# 5) Lookup za IPv4 i IPv6 koji fale (ili su stale po TTL)
# -----------------------------------------------------------------------------
if [ -s "$miss4" ]; then
echo "🔍 Team Cymru bulk lookup (IPv4) za $(wc -l < "$miss4") IP-ova (batch=$BATCH_SIZE) ..."
cymru_bulk_lookup "$miss4" "$TMPDIR/new4.csv" "4"
fi
if [ -s "$miss6" ]; then
echo "🔍 Team Cymru bulk lookup (IPv6) za $(wc -l < "$miss6") IP-ova (batch=$BATCH_SIZE) ..."
cymru_bulk_lookup "$miss6" "$TMPDIR/new6.csv" "6"
fi
# Upisi u keš sa zaključavanjem i osveži age ako je uključeno
new_any=0
if [ -s "$TMPDIR/new4.csv" ] || [ -s "$TMPDIR/new6.csv" ]; then
new_any=1
flock 9
# važno: redirekcija ide na 'cat', ne na 'true'
cat "$TMPDIR"/new*.csv 2>/dev/null >> "$CACHE" || true
sort -u "$CACHE" -o "$CACHE"
if [ "$CACHE_TTL_DAYS" -gt 0 ]; then
today="$(date +%F)"
{
[ -s "$TMPDIR/new4.csv" ] && awk -v d="$today" -F',' 'NF>=1{print $1","d}' "$TMPDIR/new4.csv"
[ -s "$TMPDIR/new6.csv" ] && awk -v d="$today" -F',' 'NF>=1{print $1","d}' "$TMPDIR/new6.csv"
} >> "$CACHE_AGE"
sort -u "$CACHE_AGE" -o "$CACHE_AGE"
fi
flock -u 9
fi
# Ponovo očisti i učitaj keš CSV (posle eventualnog dopune)
awk -F',' 'NF>=4 {
ip=$1; asn=$2; cc=$3; org=$4
gsub(/^[ \t]+|[ \t]+$/, "", ip)
gsub(/^[ \t]+|[ \t]+$/, "", asn)
gsub(/^[ \t]+|[ \t]+$/, "", cc)
gsub(/^[ \t]+|[ \t]+$/, "", org)
if (ip != "") print ip","asn","cc","org
}' "$CACHE" | sort -u > "$TMPDIR/cache_clean.csv"
# -----------------------------------------------------------------------------
# 6) Izveštaj
# -----------------------------------------------------------------------------
echo "═══════════════════════════════════════════════════════════════════════════"
echo "📊 Analiza: $TOTAL jedinstvenih IP adresa ($IPV4 IPv4, $IPV6 IPv6)"
echo "═══════════════════════════════════════════════════════════════════════════"
echo ""
echo "TOP IP ADRESE (≥ ${MIN_REQS} req):"
echo "───────────────────────────────────────────────────────────────────────────"
printf "%-7s %-39s %-12s %-4s %s\n" "Req" "IP Address" "ASN" "CC" "Provider"
echo "───────────────────────────────────────────────────────────────────────────"
# Pošto gawk iznad služi samo da popuni mapu, sada uradimo realni JOIN:
gawk -v MIN="$MIN_REQS" -v cachefile="$TMPDIR/cache_clean.csv" '
BEGIN{
FS=" "; OFS=" ";
# učitaj cache u mapu
while ((getline line < cachefile) > 0) {
split(line, a, ",");
ip=a[1]; asn=a[2]; cc=a[3];
org = substr(line, index(line, a[4])); # podrži zareze u org imenu
cache[ip]=asn"|"cc"|"org;
}
close(cachefile);
}
{
count=$1; ip=$2;
if (count < MIN) next;
if (ip ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ || ip ~ /^[0-9a-fA-F:]+$/) {
if (ip in cache) {
split(cache[ip], f, "|");
printf "%-7s %-39s %-12s %-4s %s\n", count, ip, f[1], f[2], f[3];
} else {
printf "%-7s %-39s %-12s %-4s %s\n", count, ip, "-", "-", "No info";
}
}
}
' "$IP_COUNTS"
echo "═══════════════════════════════════════════════════════════════════════════"
# -----------------------------------------------------------------------------
# 7) Subnet analiza
# - IPv4: /24
# - IPv6: /48 (jednostavan rez, prva 3 hexteta)
# -----------------------------------------------------------------------------
# IPv4 /24
awk '$2 ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ {print $2}' "$IP_COUNTS" \
| awk -F'.' '{printf "%d.%d.%d.0/24\n",$1,$2,$3}' \
| sort | uniq -c | awk -v m="$SUBNET_MIN" '$1>=m {printf "%s %s\n",$1,$2}' \
| sort -nr > "$TMPDIR/subnets4.txt"
if [ -s "$TMPDIR/subnets4.txt" ]; then
echo ""
echo "🚨 SUMNJIVI IPv4 SUBNET-I (/24 sa ≥${SUBNET_MIN} različitih IP-ova):"
echo "───────────────────────────────────────────────────────────────────────────"
while read -r count subnet; do
prefix=$(echo "$subnet" | cut -d'/' -f1 | cut -d'.' -f1-3)
first_ip=$(awk -v p="$prefix" '$2 ~ "^"p {print $2; exit}' "$IP_COUNTS")
info=$(grep -m1 "^$first_ip," "$TMPDIR/cache_clean.csv" || true)
provider="Unknown"
[ -n "$info" ] && provider=$(echo "$info" | cut -d',' -f4-)
printf "%-6s %-20s %s\n" "$count" "$subnet" "$provider"
awk -v p="$prefix" '$2 ~ "^"p {printf " ├─ %s %s\n", $1, $2}' "$IP_COUNTS" | head -8
done < "$TMPDIR/subnets4.txt"
echo "═══════════════════════════════════════════════════════════════════════════"
fi
# IPv6 /48 (prva 3 hexteta → xxxx:xxxx:xxxx::/48)
awk '$2 ~ /^[0-9a-fA-F:]+$/{print $2}' "$IP_COUNTS" \
| awk -F':' '{
# normalizuj minimalno: uzmi prva 3 hexteta koja postoje
n=split($0,h,":");
# popuni prazna mesta da imamo bar 3 hexteta (nije pun proof expand, ali radi za grupisanje)
for(i=1;i<=3;i++){ if(h[i]=="") h[i]="0" }
printf "%s:%s:%s::/48\n", h[1], h[2], h[3]
}' \
| sort | uniq -c | awk -v m="$SUBNET6_MIN" '$1>=m {printf "%s %s\n",$1,$2}' \
| sort -nr > "$TMPDIR/subnets6.txt"
if [ -s "$TMPDIR/subnets6.txt" ]; then
echo ""
echo "🚨 SUMNJIVI IPv6 SUBNET-I (/48 sa ≥${SUBNET6_MIN} različitih IP-ova):"
echo "───────────────────────────────────────────────────────────────────────────"
while read -r count subnet; do
# izvući prva 3 hexteta radi prikaza representative IP
base=$(echo "$subnet" | cut -d'/' -f1)
pre=$(echo "$base" | awk -F':' '{printf "%s:%s:%s:", $1,$2,$3}')
first_ip=$(awk -v p="$pre" '$2 ~ "^"p {print $2; exit}' "$IP_COUNTS")
info=$(grep -m1 "^$first_ip," "$TMPDIR/cache_clean.csv" || true)
provider="Unknown"
[ -n "$info" ] && provider=$(echo "$info" | cut -d',' -f4-)
printf "%-6s %-22s %s\n" "$count" "$subnet" "$provider"
awk -v p="$pre" '$2 ~ "^"p {printf " ├─ %s %s\n", $1, $2}' "$IP_COUNTS" | head -8
done < "$TMPDIR/subnets6.txt"
echo "═══════════════════════════════════════════════════════════════════════════"
fi
# -----------------------------------------------------------------------------
# 8) TOP Provideri (zbir req po IP)
# -----------------------------------------------------------------------------
echo ""
echo "🌐 TOP PROVIDER-I:"
echo "───────────────────────────────────────────────────────────────────────────"
gawk -v cachefile="$TMPDIR/cache_clean.csv" '
BEGIN{
FS=" "; OFS=" ";
while ((getline line < cachefile) > 0) {
split(line, a, ",");
ip=a[1]; org = substr(line, index(line, a[4]));
orgmap[ip]=org;
}
close(cachefile);
}
{
cnt=$1; ip=$2;
if (ip in orgmap) prov=orgmap[ip]; else next;
if (prov!="" && prov!="Unknown") sum[prov]+=cnt;
}
END{
for (p in sum) printf "%d\t%s\n", sum[p], p;
}
' "$IP_COUNTS" | sort -nr | head -10 | awk -F'\t' '{printf "%-8s %s\n", $1, $2}'
echo "═══════════════════════════════════════════════════════════════════════════"
# -----------------------------------------------------------------------------
# 9) TOP Botovi
# -----------------------------------------------------------------------------
echo ""
echo "🤖 TOP BOTOVI:"
echo "───────────────────────────────────────────────────────────────────────────"
awk '
{
if (match($0, /ua="[^"]+"/)) {
ua=substr($0, RSTART+4, RLENGTH-5)
if (ua ~ /[Bb]ot|[Cc]rawl|[Ss]pider/) {
if (match(ua, /[A-Za-z0-9._-]*[Bb]ot/)) bot=substr(ua, RSTART, RLENGTH)
else if (match(ua, /[A-Za-z0-9._-]*[Cc]rawl/)) bot=substr(ua, RSTART, RLENGTH)
else if (match(ua, /[A-Za-z0-9._-]*[Ss]pider/)) bot=substr(ua, RSTART, RLENGTH)
else bot=ua
bots[bot]++
}
}
}
END { for (b in bots) print bots[b], b }' "$LOG" \
| sort -nr | head -10 | awk '{cnt=$1; $1=""; sub(/^ /,""); printf "%-8s %s\n", cnt, $0}'
echo "═══════════════════════════════════════════════════════════════════════════"
# -----------------------------------------------------------------------------
# 10) Status kodovi
# -----------------------------------------------------------------------------
echo ""
echo "📊 STATUS KODOVI:"
echo "───────────────────────────────────────────────────────────────────────────"
awk '{
if (match($0, /status=[0-9]+/)) { status=substr($0, RSTART+7, RLENGTH-7); codes[status]++ }
}
END { for (c in codes) print codes[c], c }' "$LOG" \
| sort -nr | head -10 | awk '{printf "%-8s %s\n", $1, $2}'
echo "═══════════════════════════════════════════════════════════════════════════"
echo "✓ Keš: $CACHE ($(wc -l < "$CACHE") unosa)"
if [ "$new_any" -eq 1 ] && [ "$CACHE_TTL_DAYS" -gt 0 ]; then
echo "✓ Age: $CACHE_AGE (TTL ${CACHE_TTL_DAYS}d aktivan)"
fi
echo ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment