Skip to content

Instantly share code, notes, and snippets.

@papamoose
Last active January 29, 2026 06:29
Show Gist options
  • Select an option

  • Save papamoose/51c28536d30f8e4f2c249cba8bfb0749 to your computer and use it in GitHub Desktop.

Select an option

Save papamoose/51c28536d30f8e4f2c249cba8bfb0749 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Slurm Job Timeline Visualizer
Generates a Gantt chart from sacct output showing job execution and idle periods.
"""
import sys
import argparse
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.dates import DateFormatter
import pandas as pd
def parse_sacct_line(line):
"""Parse a single line of sacct output."""
parts = line.strip().split('|')
if len(parts) < 7:
return None
jobid, account, state, start, end, flags, nnodes = parts[:7]
# Skip jobs without valid start/end times
if start == 'None' or start == 'Unknown':
return None
if end == 'None' or end == 'Unknown':
return None
# Determine scheduling method from flags
sched_method = 'Unknown'
if 'SchedMain' in flags:
sched_method = 'SchedMain'
elif 'SchedBackfill' in flags:
sched_method = 'SchedBackfill'
elif 'SchedSubmit' in flags:
sched_method = 'SchedSubmit'
try:
start_dt = datetime.strptime(start, '%Y-%m-%dT%H:%M:%S')
end_dt = datetime.strptime(end, '%Y-%m-%dT%H:%M:%S')
return {
'jobid': jobid,
'state': state.split()[0], # Handle "CANCELLED by 5000" -> "CANCELLED"
'start': start_dt,
'end': end_dt,
'nnodes': int(nnodes),
'sched_method': sched_method,
'account': account
}
except (ValueError, TypeError):
return None
def read_sacct_data(input_file=None):
"""Read and parse sacct data from file or stdin."""
jobs = []
if input_file:
with open(input_file, 'r') as f:
lines = f.readlines()
else:
lines = sys.stdin.readlines()
for line in lines:
job = parse_sacct_line(line)
if job:
jobs.append(job)
return sorted(jobs, key=lambda x: x['start'])
def find_gaps(jobs):
"""Identify gaps between jobs."""
gaps = []
for i in range(len(jobs) - 1):
current_end = jobs[i]['end']
next_start = jobs[i + 1]['start']
if next_start > current_end:
gaps.append({
'start': current_end,
'end': next_start
})
return gaps
def create_gantt_chart(jobs, gaps, output_file='slurm_timeline.svg', figsize=(14, 8),
color_by='scheduler', show_idle_labels=True):
"""Create a Gantt chart visualization.
Args:
jobs: List of job dictionaries
gaps: List of gap dictionaries
output_file: Output filename
figsize: Tuple of (width, height) in inches
color_by: How to color jobs - 'scheduler' or 'account'
show_idle_labels: Whether to show "IDLE" labels on gaps
"""
if not jobs:
print("No valid jobs to visualize.")
return
# Get all unique nodes from jobs
all_nodes = set()
for job in jobs:
# Jobs can run on multiple nodes
for i in range(job['nnodes']):
all_nodes.add(i)
# If we don't have node info, just use a single row
if not all_nodes or max(all_nodes) == 0:
all_nodes = {0}
node_list = sorted(all_nodes)
num_nodes = len(node_list)
# Adjust figure height based on number of nodes
adjusted_height = max(8, num_nodes * 0.8)
fig, ax = plt.subplots(figsize=(figsize[0], adjusted_height))
# Determine coloring scheme
if color_by == 'account':
# Get all unique accounts and assign colors
unique_accounts = sorted(set(job['account'] for job in jobs))
# Generate distinct colors using a colormap
try:
cmap = plt.colormaps['tab20'] # Up to 20 distinct colors
if len(unique_accounts) > 20:
cmap = plt.colormaps['hsv'] # More colors but less distinct
except (KeyError, AttributeError):
# Fallback for older matplotlib versions
import matplotlib.cm as cm
cmap = cm.get_cmap('tab20')
if len(unique_accounts) > 20:
cmap = cm.get_cmap('hsv')
color_map = {}
for i, account in enumerate(unique_accounts):
import matplotlib.colors as mcolors
color_map[account] = mcolors.rgb2hex(cmap(i / max(len(unique_accounts), 1)))
def get_job_color(job):
return color_map.get(job['account'], '#95a5a6')
legend_title = 'Accounts'
legend_items = [(account, color_map[account]) for account in unique_accounts]
else: # color_by == 'scheduler'
# Color mapping for scheduling methods
sched_colors = {
'SchedMain': '#3498db', # Blue
'SchedSubmit': '#2ecc71', # Green
'SchedBackfill': '#e74c3c', # Red
'Unknown': '#95a5a6' # Gray
}
def get_job_color(job):
return sched_colors.get(job['sched_method'], sched_colors['Unknown'])
legend_title = 'Scheduling Methods'
# Only include methods that are actually used
used_methods = set(job['sched_method'] for job in jobs)
legend_items = [(method, sched_colors[method]) for method in sched_colors.keys()
if method in used_methods]
# Track occupancy for each node to calculate gaps
node_jobs = {node: [] for node in node_list}
# Assign jobs to nodes and plot them
for job in jobs:
duration = (job['end'] - job['start']).total_seconds() / 3600 # Convert to hours
color = get_job_color(job)
# Different edge colors based on state
edge_color = 'black'
line_width = 0.5
if job['state'] == 'FAILED':
edge_color = '#c0392b' # Dark red
line_width = 2
elif job['state'] == 'CANCELLED':
edge_color = '#d68910' # Dark orange
line_width = 2
# Plot job on each node it occupies
nodes_used = min(job['nnodes'], len(node_list)) if job['nnodes'] > 0 else 1
for node_idx in range(nodes_used):
node = node_list[node_idx]
y_pos = node
# Track this job on this node
node_jobs[node].append(job)
ax.barh(y_pos, duration, left=job['start'], height=0.8,
color=color, edgecolor=edge_color, linewidth=line_width)
# Calculate and plot gaps for each node
for node in node_list:
if not node_jobs[node]:
continue
# Sort jobs on this node by start time
sorted_jobs = sorted(node_jobs[node], key=lambda x: x['start'])
# Find gaps between consecutive jobs on this node
for i in range(len(sorted_jobs) - 1):
current_end = sorted_jobs[i]['end']
next_start = sorted_jobs[i + 1]['start']
if next_start > current_end:
duration = (next_start - current_end).total_seconds() / 3600
y_pos = node
ax.barh(y_pos, duration, left=current_end, height=0.8,
color='lightgray', edgecolor='gray', linewidth=0.5,
linestyle='--', alpha=0.3)
# Add "IDLE" label for significant gaps if enabled
if show_idle_labels and duration > 0.2: # Only label gaps > 12 minutes
mid_point = current_end + (next_start - current_end) / 2
ax.text(mid_point, y_pos, 'IDLE',
ha='center', va='center', fontsize=6,
style='italic', color='gray', alpha=0.7)
# Format the plot
ax.set_yticks(node_list)
ax.set_yticklabels([f'Node {node}' for node in node_list])
ax.set_ylabel('Nodes', fontsize=12)
ax.set_xlabel('Time', fontsize=12)
ax.set_title('Slurm Job Timeline by Node', fontsize=14, weight='bold')
# Scope x-axis to earliest start and latest end time
earliest_start = min(job['start'] for job in jobs)
latest_end = max(job['end'] for job in jobs)
ax.set_xlim(earliest_start, latest_end)
# Format x-axis to show dates nicely
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d\n%H:%M'))
plt.xticks(rotation=0, ha='center')
# Add legend
legend_elements = []
# Add color-coding legend items
for label, color in legend_items:
legend_elements.append(mpatches.Patch(color=color, label=label))
# Add state indicators
legend_elements.extend([
mpatches.Patch(facecolor='lightgray', edgecolor='gray',
linestyle='--', label='Node Idle', alpha=0.3),
mpatches.Patch(facecolor='white', edgecolor='#c0392b',
linewidth=2, label='Failed Job'),
mpatches.Patch(facecolor='white', edgecolor='#d68910',
linewidth=2, label='Cancelled Job')
])
ax.legend(handles=legend_elements, loc='upper right', title=legend_title)
# Add grid
ax.grid(axis='x', alpha=0.3, linestyle=':')
ax.grid(axis='y', alpha=0.2, linestyle=':')
# Calculate and display statistics
total_time = (jobs[-1]['end'] - jobs[0]['start']).total_seconds() / 3600
job_time = sum((j['end'] - j['start']).total_seconds() for j in jobs) / 3600
idle_time = sum((g['end'] - g['start']).total_seconds() for g in gaps) / 3600
utilization = (job_time / total_time * 100) if total_time > 0 else 0
# Count by scheduling method
sched_counts = {}
for job in jobs:
method = job['sched_method']
sched_counts[method] = sched_counts.get(method, 0) + 1
stats_text = f"Total Jobs: {len(jobs)} | Nodes: {num_nodes} | Timeline: {total_time:.1f}h | " \
f"Active: {job_time:.1f}h | Idle: {idle_time:.1f}h | " \
f"Utilization: {utilization:.1f}%"
fig.text(0.5, 0.02, stats_text, ha='center', fontsize=10,
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
plt.tight_layout(rect=[0, 0.04, 1, 1])
plt.savefig(output_file, format='svg', bbox_inches='tight')
print(f"Gantt chart saved to: {output_file}")
# Print statistics to console
print(f"\n{'='*60}")
print(f"Timeline Statistics:")
print(f"{'='*60}")
print(f"Total jobs: {len(jobs)}")
print(f"Total nodes: {num_nodes}")
print(f"Completed: {sum(1 for j in jobs if j['state'] == 'COMPLETED')}")
print(f"Failed: {sum(1 for j in jobs if j['state'] == 'FAILED')}")
print(f"Cancelled: {sum(1 for j in jobs if j['state'] == 'CANCELLED')}")
if color_by == 'account':
print(f"\nAccounts:")
account_counts = {}
for job in jobs:
acc = job['account']
account_counts[acc] = account_counts.get(acc, 0) + 1
for account, count in sorted(account_counts.items()):
print(f" {account:20s} {count}")
else:
print(f"\nScheduling Methods:")
for method, count in sorted(sched_counts.items()):
print(f" {method:20s} {count}")
print(f"\nTimeline span: {total_time:.2f} hours")
print(f"Active time: {job_time:.2f} hours")
print(f"Idle time: {idle_time:.2f} hours")
print(f"Utilization: {utilization:.2f}%")
print(f"Number of gaps: {len(gaps)}")
print(f"{'='*60}\n")
def main():
parser = argparse.ArgumentParser(
description='Visualize Slurm job timeline from sacct output',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Examples:
# Read from file
%(prog)s -i sacct_output.txt -o timeline.svg
# Pipe from sacct command (note: account field is REQUIRED)
sacct -a -X --starttime=2020-01-01 -P -n -o jobid,account,state,start,end,flags,nnodes | %(prog)s
# Color by account instead of scheduler
sacct -a -X --starttime=2020-01-01 -P -n -o jobid,account,state,start,end,flags,nnodes | %(prog)s --color-by account
# Custom figure size without idle labels
%(prog)s -i sacct_output.txt --width 20 --height 10 --no-idle-labels
'''
)
parser.add_argument('-i', '--input', help='Input file with sacct output (uses stdin if not specified)')
parser.add_argument('-o', '--output', default='slurm_timeline.svg',
help='Output image file (default: slurm_timeline.svg)')
parser.add_argument('--width', type=float, default=14,
help='Figure width in inches (default: 14)')
parser.add_argument('--height', type=float, default=8,
help='Figure height in inches (default: 8)')
parser.add_argument('--color-by', choices=['scheduler', 'account'], default='scheduler',
help='Color jobs by scheduler method or account (default: scheduler)')
parser.add_argument('--no-idle-labels', action='store_true',
help='Hide "IDLE" labels on gaps between jobs')
args = parser.parse_args()
# Read and process data
print("Reading sacct data...")
jobs = read_sacct_data(args.input)
if not jobs:
print("No valid jobs found in input.")
sys.exit(1)
print(f"Found {len(jobs)} valid jobs.")
# Find gaps
gaps = find_gaps(jobs)
print(f"Found {len(gaps)} idle periods.")
# Create visualization
create_gantt_chart(jobs, gaps, args.output, figsize=(args.width, args.height),
color_by=args.color_by, show_idle_labels=not args.no_idle_labels)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment