Last active
September 8, 2025 22:04
-
-
Save zahin-mohammad/750bd6ba83aee7dc49c56171535d5e70 to your computer and use it in GitHub Desktop.
Scan your file system to see if you have installed malware for the npm ecosystem. Run from the root of your file system. Requires `jq` to be installed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| ############################################################################### | |
| # Malware npm dependency scanner using GitHub Advisory API | |
| # | |
| # REQUIREMENTS (runtime tools): | |
| # - bash 3.2+ | |
| # - curl : HTTP requests to GitHub | |
| # - jq : JSON parsing | |
| # - npm : Build dependency tree (npm ls --json --all) | |
| # - Standard POSIX utilities: find, xargs, awk, sort, paste, stat (or perl fallback) | |
| # | |
| # OPTIONAL ENV VARS: | |
| # ROOT_DIR Directory to scan (also arg1) (default: .) | |
| # JOBS Parallel workers (default: 10) | |
| # CACHE_FILE Advisory cache file (default: /tmp/github_advisories_malware.json) | |
| # GHA_CACHE_TTL Cache TTL seconds (default: 3600) | |
| # OUTPUT_JSON If set, produce JSON findings array (default: unset) | |
| # EXCLUDE_PATHS Comma-separated substrings to skip paths (default: '/.config/yarn/global,/Library') | |
| # FIND_SUPPRESS_ERRORS Suppress find permission errors (default: 1) | |
| # GITHUB_TOKEN / GH_TOKEN Auth token to raise GitHub API rate limits | |
| # | |
| # EXIT CODES: | |
| # 0 Success (even if no findings) | |
| # 1 Failed to fetch advisories | |
| # 2 Missing required tooling | |
| # | |
| # FEATURES: | |
| # - Caching + token auth to minimize rate limits | |
| # - Parallel scanning of package.json files (node_modules excluded) | |
| # - Approximate version range detection (presence-based) | |
| # - Portable mtime detection across Linux/macOS | |
| # - Optional machine-readable JSON output | |
| # | |
| # FUTURE ENHANCEMENTS (not implemented yet): | |
| # - Pagination if >100 malware advisories | |
| # - Config file for advanced allow/deny filtering | |
| # - Direct parsing of yarn/pnpm lockfiles to avoid npm ls cost | |
| ############################################################################### | |
| set -euo pipefail | |
| IFS=$'\n\t' | |
| ROOT_DIR=${1:-.} | |
| JOBS=${JOBS:-100} | |
| CACHE_FILE=${CACHE_FILE:-/tmp/github_advisories_malware.json} | |
| GHA_CACHE_TTL=${GHA_CACHE_TTL:-3600} | |
| OUTPUT_JSON=${OUTPUT_JSON:-} # set to path to also emit JSON summary | |
| EXCLUDE_PATHS=${EXCLUDE_PATHS:-"/.config/yarn/global,/Library"} # comma-separated substrings to skip | |
| FIND_SUPPRESS_ERRORS=${FIND_SUPPRESS_ERRORS:-1} | |
| if ! command -v curl >/dev/null 2>&1; then echo "curl required" >&2; exit 2; fi | |
| if ! command -v jq >/dev/null 2>&1; then echo "jq required" >&2; exit 2; fi | |
| if ! command -v npm >/dev/null 2>&1; then echo "npm required" >&2; exit 2; fi | |
| auth_header() { | |
| if [ -n "${GITHUB_TOKEN:-}" ]; then | |
| printf 'Authorization: Bearer %s' "$GITHUB_TOKEN" | |
| elif [ -n "${GH_TOKEN:-}" ]; then | |
| printf 'Authorization: Bearer %s' "$GH_TOKEN" | |
| fi | |
| } | |
| fetch_advisories() { | |
| # Basic single-page fetch (malware advisories are usually small). | |
| # If needed, extend with pagination (per_page & page loop). | |
| local url="https://api.github.com/advisories?type=malware&per_page=100" | |
| local headers=(-H 'Accept: application/vnd.github+json') | |
| local ah | |
| ah=$(auth_header || true) | |
| if [ -n "$ah" ]; then headers+=(-H "$ah"); fi | |
| curl -sSf "${headers[@]}" "$url" | |
| } | |
| # ----------------------------- | |
| # Minimal SemVer helpers (no Node dependency) | |
| # Supports operators: =, <, <=, >, >= and logical AND (space or comma separated) | |
| # and OR groups separated by '||'. Pre-release/build metadata ignored. | |
| # Example ranges handled: | |
| # "= 4.4.2" | |
| # ">=1.0.0 <2.0.0" | |
| # "< 1.2.3, >=1.0.0" | |
| # ">=1.0.0 || =0.9.5" | |
| # Anything unrecognized in a group is ignored (best-effort, safe default = NOT match). | |
| # ----------------------------- | |
| semver_cmp() { # v1 v2 -> echo -1 / 0 / 1 | |
| local a=$1 b=$2 | |
| local IFS=. | |
| local -a A B | |
| read -r -a A <<<"$a" | |
| read -r -a B <<<"$b" | |
| local i | |
| for i in 0 1 2; do | |
| local ai=${A[$i]:-0} | |
| local bi=${B[$i]:-0} | |
| # strip leading zeros | |
| ai=$(echo "$ai" | sed 's/^0*//'); ai=${ai:-0} | |
| bi=$(echo "$bi" | sed 's/^0*//'); bi=${bi:-0} | |
| if [ "$ai" -lt "$bi" ]; then echo -1; return 0; fi | |
| if [ "$ai" -gt "$bi" ]; then echo 1; return 0; fi | |
| done | |
| echo 0 | |
| } | |
| semver_check() { # version op target -> returns 0 if satisfies | |
| local v=$1 op=$2 tgt=$3 | |
| local cmp | |
| cmp=$(semver_cmp "$v" "$tgt") | |
| case $op in | |
| '=') [ "$cmp" -eq 0 ] ;; | |
| '==') [ "$cmp" -eq 0 ] ;; | |
| '>=') [ "$cmp" -ge 0 ] ;; | |
| '>') [ "$cmp" -gt 0 ] ;; | |
| '<=') [ "$cmp" -le 0 ] ;; | |
| '<') [ "$cmp" -lt 0 ] ;; | |
| *) return 1 ;; | |
| esac | |
| } | |
| semver_satisfies() { # version range_string | |
| local ver=$1 range=$2 | |
| range=$(printf '%s' "$range" | sed -E 's/^ *//; s/ *$//') | |
| [ -z "$range" ] && return 0 | |
| # Fast path: wildcard '*' | |
| [ "$range" = "*" ] && return 0 | |
| # Normalize '||' to single pipe and split | |
| local IFS='|' | |
| local part | |
| for part in $(echo "$range" | sed 's/||/|/g'); do | |
| part=$(echo "$part" | sed -E 's/^ *//; s/ *$//') | |
| [ -z "$part" ] && continue | |
| # Replace commas with spaces and collapse | |
| local group=$(echo "$part" | tr ',' ' ' | tr -s ' ') | |
| # Tokenize | |
| local ok=1 | |
| local tokens=() | |
| local word | |
| for word in $group; do tokens+=("$word"); done | |
| local i=0 | |
| while [ $i -lt ${#tokens[@]} ]; do | |
| local tok=${tokens[$i]} | |
| local op tgt | |
| if [[ $tok =~ ^([<>]=?|==?)([0-9]+(\.[0-9]+){0,2})$ ]]; then | |
| op="${BASH_REMATCH[1]}"; tgt="${BASH_REMATCH[2]}" | |
| elif [[ $tok =~ ^([<>]=?|==?)$ ]]; then | |
| # bare operator, need next token | |
| i=$((i+1)) | |
| [ $i -lt ${#tokens[@]} ] || { ok=0; break; } | |
| local nxt=${tokens[$i]} | |
| if [[ $nxt =~ ^([0-9]+(\.[0-9]+){0,2})$ ]]; then | |
| op="$tok"; tgt="$nxt" | |
| else | |
| ok=0; break | |
| fi | |
| elif [[ $tok =~ ^([0-9]+(\.[0-9]+){0,2})$ ]]; then | |
| op='='; tgt="$tok" | |
| else | |
| ok=0; break | |
| fi | |
| if ! semver_check "$ver" "$op" "$tgt"; then ok=0; break; fi | |
| i=$((i+1)) | |
| done | |
| if [ $ok -eq 1 ]; then return 0; fi | |
| done | |
| return 1 | |
| } | |
| file_mtime() { | |
| # Cross-platform mtime fetch (GNU stat, BSD stat, perl fallback) | |
| local f="$1" | |
| if stat -c %Y "$f" >/dev/null 2>&1; then | |
| stat -c %Y "$f" | |
| elif stat -f %m "$f" >/dev/null 2>&1; then | |
| stat -f %m "$f" | |
| else | |
| perl -e 'print((stat shift)[9])' "$f" 2>/dev/null || echo 0 | |
| fi | |
| } | |
| need_refresh=true | |
| if [ -f "$CACHE_FILE" ]; then | |
| now=$(date +%s) | |
| mod=$(file_mtime "$CACHE_FILE") | |
| age=$(( now - mod )) || age=999999 | |
| if [ "$age" -lt "$GHA_CACHE_TTL" ]; then | |
| need_refresh=false | |
| fi | |
| fi | |
| if $need_refresh; then | |
| echo "Fetching latest advisories from GitHub..." >&2 | |
| if ! fetch_advisories >"$CACHE_FILE".tmp 2>/dev/null; then | |
| echo "Failed to fetch advisories" >&2; exit 1; fi | |
| mv "$CACHE_FILE".tmp "$CACHE_FILE" | |
| else | |
| echo "Using cached advisories ($CACHE_FILE)" >&2 | |
| fi | |
| ADVISORIES=$(cat "$CACHE_FILE") | |
| # Extract malware advisories for npm ecosystem, retaining parent advisory context (summary, url, severity) | |
| MALWARE_JSON=$(echo "$ADVISORIES" | jq '[ .[] as $adv | |
| | select($adv.type=="malware") | |
| | $adv.vulnerabilities[]? | |
| | select(.package.ecosystem=="npm") | |
| | {package: .package.name, | |
| range: .vulnerable_version_range, | |
| advisory: ($adv.summary // "(no summary)"), | |
| url: ($adv.html_url // ""), | |
| severity: (.severity // $adv.severity // "unknown")} | |
| ] | unique') | |
| COUNT=$(echo "$MALWARE_JSON" | jq 'length') | |
| if [ "$COUNT" -eq 0 ]; then | |
| echo "No npm malware advisories found." >&2 | |
| exit 0 | |
| fi | |
| ADVISORY_COUNT=$COUNT | |
| echo "Malware advisories (npm only):" >&2 | |
| echo "$MALWARE_JSON" | jq -r '.[] | "- [" + .advisory + "](" + .url + ") -> " + .package + " (" + .range + ") (severity: " + .severity + ")"' | |
| echo "Scanning for package.json files under $ROOT_DIR (parallel: $JOBS)..." >&2 | |
| scan_pkg() { | |
| local pkgjson="$1" | |
| local pkgdir | |
| pkgdir=$(dirname "$pkgjson") | |
| # Skip some noisy global or extension dirs (adjust as needed) | |
| # Dynamic exclusions | |
| IFS=',' read -r -a _exarr <<< "$EXCLUDE_PATHS" | |
| for _pat in "${_exarr[@]}"; do | |
| [ -z "$_pat" ] && continue | |
| case "$pkgdir" in *"$_pat"*) return 0 ;; esac | |
| done | |
| local npm_json | |
| if ! npm_json=$(npm ls --all --json --prefix "$pkgdir" 2>/dev/null); then | |
| # Silenced npm ls failure (previously logged). Intentionally returning quietly. | |
| return 0 | |
| fi | |
| # Flatten dependency tree; guard against nulls (compatible with jq by type checks) | |
| local deps | |
| deps=$(echo "$npm_json" | jq -r ' | |
| def walkdeps: to_entries[]? | |
| | select(.value.version != null) | |
| | "\(.key)|\(.value.version)" , ( .value.dependencies? // {} | walkdeps ); | |
| (.dependencies? // {}) | walkdeps | |
| ' | sort -u) | |
| [ -z "$deps" ] && return 0 | |
| # Iterate malware packages; perform O(n*m) lookup (m small) using awk | |
| local findings_found=0 | |
| while IFS= read -r m; do | |
| local mname mrange murl msum found_ver | |
| IFS='|' read -r mname mrange murl msum <<< "$m" | |
| # Exact match on package name | |
| found_ver=$(printf '%s\n' "$deps" | awk -F'|' -v n="$mname" '$1==n {print $2; exit}') || true | |
| [ -z "$found_ver" ] && continue | |
| # Semver range validation (if range has an explicit operator or version). If not satisfied, skip. | |
| if [ -n "$mrange" ] && ! semver_satisfies "$found_ver" "$mrange" 2>/dev/null; then | |
| continue | |
| fi | |
| findings_found=1 | |
| # Record that we found at least one issue (parallel-safe append) | |
| [ -n "${FINDINGS_MARKER:-}" ] && echo 1 >> "$FINDINGS_MARKER" 2>/dev/null || true | |
| printf '⚠️ %s@%s in %s (advisory: %s | range: %s)\n' "$mname" "$found_ver" "$pkgdir" "$msum" "$mrange" | |
| if [ -n "$OUTPUT_JSON" ]; then | |
| printf '{"package":"%s","foundVersion":"%s","range":"%s","advisory":%s,"url":"%s","path":"%s"}\n' \ | |
| "$mname" "$found_ver" "$mrange" "$(printf '%s' "$msum" | jq -R '.')" "$murl" "$pkgdir" >> "$OUTPUT_JSON".tmp | |
| fi | |
| done < <(echo "$MALWARE_JSON" | jq -r '.[] | "\(.package)|\(.range)|\(.url)|\(.advisory)"') | |
| return 0 | |
| } | |
| export -f scan_pkg | |
| export MALWARE_JSON OUTPUT_JSON | |
| # Marker file to detect if any findings were produced (parallel processes append) | |
| FINDINGS_MARKER=${FINDINGS_MARKER:-$(mktemp -t malware_findings_XXXXXX)} | |
| export FINDINGS_MARKER | |
| # Optionally initialize JSON output | |
| if [ -n "$OUTPUT_JSON" ]; then | |
| : > "$OUTPUT_JSON".tmp | |
| fi | |
| # Find package.json files (excluding node_modules) and scan in parallel | |
| if [ "$FIND_SUPPRESS_ERRORS" -eq 1 ]; then | |
| find "$ROOT_DIR" -type f -name package.json -not -path '*/node_modules/*' -print0 2>/dev/null | \ | |
| xargs -0 -n1 -P "$JOBS" bash -c 'scan_pkg "$0"' | |
| else | |
| find "$ROOT_DIR" -type f -name package.json -not -path '*/node_modules/*' -print0 | \ | |
| xargs -0 -n1 -P "$JOBS" bash -c 'scan_pkg "$0"' | |
| fi | |
| if [ -n "$OUTPUT_JSON" ]; then | |
| # Wrap newline-delimited JSON objects into array | |
| { echo '['; paste -sd',' "$OUTPUT_JSON".tmp; echo ']'; } > "$OUTPUT_JSON" | |
| rm -f "$OUTPUT_JSON".tmp | |
| echo "JSON report written to $OUTPUT_JSON" >&2 | |
| fi | |
| # Summarize results | |
| if [ ! -s "$FINDINGS_MARKER" ]; then | |
| echo "No impacted packages detected (0 matched out of $ADVISORY_COUNT malware advisories)." >&2 | |
| else | |
| MATCHED=$(wc -l < "$FINDINGS_MARKER" 2>/dev/null || echo 0) | |
| echo "Matched $MATCHED distinct advisory package name(s) out of $ADVISORY_COUNT." >&2 | |
| fi | |
| rm -f "$FINDINGS_MARKER" 2>/dev/null || true | |
| echo "Scan complete." >&2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment