Last active
November 20, 2025 14:37
-
-
Save jpic/715f46ba7c32228db9f0d97ce90dca68 to your computer and use it in GitHub Desktop.
file read tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| read_file_slice() { | |
| local path="$1" | |
| local start_byte="${2:-0}" | |
| local length="${3:-32768}" | |
| local show_hex="${4:-false}" | |
| # Safety caps | |
| (( length > 131072 )) && length=131072 | |
| (( start_byte < 0 )) && start_byte=0 | |
| # Resolve path | |
| path=$(realpath "$path" 2>/dev/null || printf '%s\n' "$path") | |
| # Header | |
| printf '[read_file_slice] %s\n' "$path" | |
| printf '[Requested bytes: %d–%d (%d bytes)]\n\n' \ | |
| "$start_byte" "$((start_byte + length - 1))" "$length" | |
| # ── Extract the exact byte range ───────────────────────────────────── | |
| if [[ "$path" == *.gz ]]; then | |
| block_size=1048576 | |
| approx_block=$(( start_byte / block_size )) | |
| offset_in_block=$(( start_byte % block_size )) | |
| (dd if="$path" bs="$block_size" skip="$approx_block" count=8 status=none 2>/dev/null || true) | | |
| gzip -dc 2>/dev/null | | |
| dd bs=1 skip="$offset_in_block" count="$length" status=none 2>/dev/null | |
| else | |
| dd if="$path" bs=1 skip="$start_byte" count="$length" status=none 2>/dev/null | |
| fi | { | |
| # Read everything once (preserves NUL bytes!) | |
| IFS= read -r -d '' data || true | |
| if [ "$show_hex" = true ]; then | |
| printf '[show_hex forced → hexdump]\n' | |
| printf '%s' "$data" | hexdump -ve '1/1 "%.02X "' -e '16/1 "%_c" "\n"' | |
| return | |
| fi | |
| # Detect binary: >15% non-printable/control chars in first 32KB | |
| nonprint=$(printf '%s' "$data" | head -c 32768 | | |
| tr -d -c '[\001-\010\016-\037\177-\377]' | wc -c) | |
| if (( nonprint * 100 <= 32768 * 15 )); then | |
| # Text → raw output | |
| printf '%s' "$data" | |
| else | |
| # Binary → hexdump | |
| printf '[BINARY DETECTED → hexdump of requested range]\n' | |
| printf '%s' "$data" | hexdump -ve '1/1 "%.02X "' -e '16/1 "%_c" "\n"' | |
| fi | |
| } | |
| } |
Author
jpic
commented
Nov 20, 2025
Author
class ListPrometheusInstances(Tool):
description = textwrap.dedent('''
List currently scraped instances in the correct environment.
Your setup:
• job="node" → node_exporter (instance = primary-hostname:9100)
• job="nodetool" → centralized custom exporter (instance is useless, use hostname= label instead)
''')
hostname_hint = Parameter(
type='string',
description='Any hostname you know in the target environment (e.g. web01, cass07, db-stg-03)',
required=True,
)
job = Parameter(
type='string',
description="Filter by job: 'node' or 'nodetool'",
required=False,
)
async def run(self, conversation, hostname_hint: str, job: Optional[str] = None) -> str:
try:
thanos_base = await get_thanos_url_for_hostname(hostname_hint)
except Exception as e:
return f"[list_prometheus_instances] ERROR: {e}"
env_name = 'PRODUCTION' if 'prod' in thanos_base.lower() else 'STAGING'
# Build correct query for each job
if job == 'node':
promql = 'up{job="node"}'
elif job == 'nodetool':
promql = 'up{job="nodetool"}'
else:
promql = 'up{job=~"node|nodetool"}'
async with httpx.AsyncClient(timeout=20.0) as client:
try:
resp = await client.get(
f"{thanos_base}/api/v1/query",
params={'query': promql},
timeout=20,
)
resp.raise_for_status()
results = resp.json()['data']['result']
except Exception as e:
return f"[list_prometheus_instances] {env_name} — Query failed: {e}"
if not results:
return f"[list_prometheus_instances] {env_name} — No instances found."
lines = [
f"[list_prometheus_instances] {env_name} (via {hostname_hint})",
f" Found {len(results)} instances\n"
]
for r in sorted(results, key=lambda x: x['metric'].get('instance', '') or x['metric'].get('hostname', '')):
m = r['metric']
job_name = m.get('job', 'unknown')
if job_name == 'node':
instance = m.get('instance', '?')
clean_host = instance.split(':')[0] # removes :9100
display = f"{clean_host:<45} node_exporter"
else: # job="nodetool"
hostname_label = m.get('hostname', '?')
useless_instance = m.get('instance', '?')
display = f"{hostname_label:<30} nodetool_exporter (instance={useless_instance} → ignore)"
lines.append(f" {display}")
return '\n'.join(lines)### METRICS RULES — OBEY OR FAIL
You have exactly two exporters:
1. Node exporter
job="node"
instance = "primary-hostname:9100" → use this exactly
2. Centralized nodetool exporter
job="nodetool"
instance = useless (always the same) → IGNORE IT
hostname = "cass-prod-07" (real Cassandra node) → use this instead
### WORKFLOW (never skip a step)
1. Always start with:
list_prometheus_instances hostname_hint=<any-host> job=<node|nodetool>
2. Copy the exact instance (for node) or hostname label (for nodetool) from the output.
3. Write queries exactly like this:
# System / host
node_memory_MemAvailable_bytes{job="node", instance="web-prod-01.example.com:9100"}
100 * (1 - node_memory_MemAvailable_bytes{job="node", instance="db01:9100"} / node_memory_MemTotal_bytes{job="node", instance="db01:9100"})
# Cassandra
cassandra_heap_used_bytes{job="nodetool", hostname="cass-prod-07"}
rate(cassandra_gc_duration_seconds_count{job="nodetool", hostname="cass-stg-03"}[5m])
Never guess labels. Never use the wrong job.
If unsure → run list_prometheus_instances first.
Do it right → you’re faster than any human.
Do it wrong → you’re useless.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment