A minimal Kubernetes-like orchestrator for UI automation agents, built on hcs-sandbox.
┌─────────────────────────────────────────────────────────────────────────────┐
│ Kubernetes → HCS-Kube Mapping │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ Kubernetes HCS-Kube Purpose │
│ ────────── ──────── ─────── │
│ Pod → Sandbox Isolated Windows desktop │
│ Container → Agent UI automation process │
│ Deployment → SandboxPool N replicas of sandbox │
│ Service → Display endpoint VNC/RDP access point │
│ Node → Host machine Physical Windows host │
│ Scheduler → SandboxScheduler Place agents on sandboxes │
│ kubelet → SandboxAgent Per-sandbox daemon │
│ etcd → StateStore Desired vs actual state │
│ Ingress → DisplayGateway Route to sandbox displays │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────┐
│ API Server │
│ ┌─────────────────────────────┐ │
┌────────────────────────│ │ POST /sandboxes │ │
│ │ │ POST /agents │ │
│ HTTP/gRPC │ │ GET /sandboxes/:id/screen │ │
│ │ │ POST /sandboxes/:id/input │ │
│ │ └─────────────────────────────┘ │
│ └──────────────┬──────────────────────┘
│ │
▼ ▼
┌─────────────────────┐ ┌─────────────────────┐
│ hcskubectl │ │ State Store │
│ (CLI client) │ │ (SQLite/Redis) │
│ │ │ │
│ hcskube apply -f │ │ - Desired state │
│ hcskube get sb │ │ - Actual state │
│ hcskube logs │ │ - Agent registry │
│ hcskube screen │ │ - Lease/heartbeat │
└─────────────────────┘ └──────────┬──────────┘
│
┌────────────────────┼────────────────────┐
│ │ │
▼ ▼ ▼
┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐
│ Controller │ │ Scheduler │ │ Display Gateway │
│ Manager │ │ │ │ │
│ │ │ - Find sandbox │ │ - VNC proxy │
│ - Reconcile │ │ - Check capacity│ │ - Screenshot │
│ - Scale up/down │ │ - Affinity │ │ - Input inject │
│ - Health check │ │ - GPU matching │ │ - WebSocket │
└────────┬─────────┘ └────────┬─────────┘ └────────┬─────────┘
│ │ │
└────────────────────┼────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ Sandbox Pool │
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Sandbox 0 │ │ Sandbox 1 │ │ Sandbox 2 │ ... │
│ │ │ │ │ │ │ │
│ │ ┌─────────┐ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │
│ │ │ Agent │ │ │ │ Agent │ │ │ │ (empty) │ │ │
│ │ │ browser │ │ │ │ desktop │ │ │ │ pending │ │ │
│ │ └─────────┘ │ │ └─────────┘ │ │ └─────────┘ │ │
│ │ │ │ │ │ │ │
│ │ Display: │ │ Display: │ │ Display: │ │
│ │ :5900 │ │ :5901 │ │ :5902 │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
│ hcs-sandbox (HCS APIs) │
└─────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ Hyper-V / HCS │
└─────────────────────────────────────────────────────────────┘
# sandbox.yaml
apiVersion: hcskube/v1
kind: Sandbox
metadata:
name: browser-agent-1
labels:
app: browser-automation
tier: worker
spec:
# Resource allocation
resources:
cpu: 2
memoryMB: 4096
gpu: true
# Display configuration
display:
resolution: 1920x1080
colorDepth: 32
protocol: vnc # or rdp, enhanced-mode
port: 5900 # auto-assigned if not specified
# Folder mappings (like volumes)
volumes:
- name: workflows
hostPath: C:\workflows
sandboxPath: C:\Users\agent\workflows
readOnly: true
- name: output
hostPath: C:\output\browser-agent-1
sandboxPath: C:\Users\agent\output
readOnly: false
# Startup commands (like entrypoint)
startup:
- command: powershell
args: ["-File", "C:\\Users\\agent\\workflows\\init.ps1"]
# Health check
healthCheck:
type: process
processName: explorer.exe
intervalSeconds: 10
# Lifecycle
lifecycle:
ephemeral: true # destroy on agent completion
timeoutMinutes: 30 # max lifetime
idleTimeoutMinutes: 5 # destroy if no agent activity# sandboxpool.yaml
apiVersion: hcskube/v1
kind: SandboxPool
metadata:
name: browser-workers
spec:
replicas: 5
minReady: 2 # keep 2 warm sandboxes ready
maxPerHost: 10 # limit per physical host
template:
metadata:
labels:
pool: browser-workers
spec:
resources:
cpu: 2
memoryMB: 4096
gpu: true
display:
resolution: 1920x1080
protocol: vnc
volumes:
- name: shared-workflows
hostPath: C:\workflows
sandboxPath: C:\workflows
readOnly: true
# Scaling policy
scaling:
type: queue-based
metric: pending-agents
scaleUpThreshold: 3 # add sandbox if 3+ agents waiting
scaleDownDelay: 300 # wait 5min before removing idle# agent.yaml
apiVersion: hcskube/v1
kind: Agent
metadata:
name: book-flight-task-123
labels:
task: book-flight
customer: acme-corp
spec:
# Target sandbox pool
sandboxSelector:
matchLabels:
pool: browser-workers
# Or specific sandbox
# sandboxName: browser-agent-1
# Agent type
type: mcp-agent # or playwright, selenium, custom
# Task definition
task:
workflow: book-flight
input:
destination: "New York"
date: "2024-12-25"
airline: "any"
# MCP endpoint (if using mcp-agent)
mcp:
endpoint: http://localhost:8080/mcp
tools:
- browser_navigate
- browser_click
- browser_type
- screenshot
# Completion criteria
completion:
type: mcp-done # agent signals completion
maxRetries: 3
timeoutMinutes: 15
# Output handling
output:
screenshots: true
screenRecording: false
logs: true
artifacts:
- C:\Users\agent\output\*# displayservice.yaml
apiVersion: hcskube/v1
kind: DisplayService
metadata:
name: browser-display
spec:
selector:
pool: browser-workers
# Expose displays
ports:
- name: vnc
protocol: vnc
port: 5900
targetPort: auto # maps to sandbox display port
# Load balancing for display access
type: LoadBalancer # or ClusterIP, NodePort
# WebSocket gateway for browser access
gateway:
enabled: true
path: /display/{sandbox-id}
auth: jwt-- sandboxes table
CREATE TABLE sandboxes (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
pool_id TEXT,
status TEXT NOT NULL, -- pending, creating, running, paused, terminated
spec JSON NOT NULL,
hcs_id TEXT, -- HCS compute system ID
display_port INTEGER,
ip_address TEXT,
created_at TIMESTAMP,
started_at TIMESTAMP,
terminated_at TIMESTAMP,
last_heartbeat TIMESTAMP
);
-- agents table
CREATE TABLE agents (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
sandbox_id TEXT REFERENCES sandboxes(id),
status TEXT NOT NULL, -- pending, scheduled, running, completed, failed
spec JSON NOT NULL,
result JSON,
created_at TIMESTAMP,
scheduled_at TIMESTAMP,
started_at TIMESTAMP,
completed_at TIMESTAMP
);
-- sandbox_pools table
CREATE TABLE sandbox_pools (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
spec JSON NOT NULL,
desired_replicas INTEGER,
ready_replicas INTEGER,
created_at TIMESTAMP
);
-- events table (audit log)
CREATE TABLE events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
resource_type TEXT,
resource_id TEXT,
event_type TEXT,
message TEXT,
timestamp TIMESTAMP
);// controller.rs - Reconciliation loop
pub struct SandboxPoolController {
state: Arc<StateStore>,
hcs: Arc<HcsSandbox>,
interval: Duration,
}
impl SandboxPoolController {
pub async fn run(&self) {
loop {
for pool in self.state.list_pools().await {
if let Err(e) = self.reconcile(&pool).await {
tracing::error!(pool = %pool.name, error = %e, "reconcile failed");
}
}
tokio::time::sleep(self.interval).await;
}
}
async fn reconcile(&self, pool: &SandboxPool) -> Result<()> {
let sandboxes = self.state.list_sandboxes_by_pool(&pool.id).await?;
let running = sandboxes.iter().filter(|s| s.status == Status::Running).count();
let desired = pool.spec.replicas as usize;
// Scale up
if running < desired {
let to_create = desired - running;
tracing::info!(pool = %pool.name, count = to_create, "scaling up");
for i in 0..to_create {
let sandbox = Sandbox::from_pool_template(pool, i);
self.create_sandbox(sandbox).await?;
}
}
// Scale down (remove idle sandboxes)
if running > desired {
let to_remove = running - desired;
let idle = sandboxes.iter()
.filter(|s| s.is_idle() && s.idle_duration() > pool.spec.scaling.scale_down_delay)
.take(to_remove);
for sandbox in idle {
tracing::info!(sandbox = %sandbox.name, "scaling down");
self.terminate_sandbox(&sandbox.id).await?;
}
}
// Health check running sandboxes
for sandbox in sandboxes.iter().filter(|s| s.status == Status::Running) {
if !self.health_check(sandbox).await? {
tracing::warn!(sandbox = %sandbox.name, "health check failed, recreating");
self.terminate_sandbox(&sandbox.id).await?;
// Controller will create new one on next loop
}
}
Ok(())
}
async fn create_sandbox(&self, spec: Sandbox) -> Result<String> {
// Update state to pending
let id = self.state.insert_sandbox(&spec, Status::Pending).await?;
// Create via HCS
let config = HcsConfig::from_sandbox_spec(&spec.spec);
let hcs_id = self.hcs.create(config).await?;
// Start sandbox
self.hcs.start(&hcs_id).await?;
// Update state to running
self.state.update_sandbox_status(&id, Status::Running, Some(&hcs_id)).await?;
// Emit event
self.state.emit_event(Event {
resource_type: "Sandbox",
resource_id: id.clone(),
event_type: "Created",
message: format!("Sandbox {} created and started", spec.name),
}).await?;
Ok(id)
}
}// scheduler.rs - Assign agents to sandboxes
pub struct Scheduler {
state: Arc<StateStore>,
}
impl Scheduler {
pub async fn run(&self) {
loop {
// Get pending agents
let pending = self.state.list_agents_by_status(Status::Pending).await;
for agent in pending {
match self.schedule(&agent).await {
Ok(sandbox_id) => {
tracing::info!(
agent = %agent.name,
sandbox = %sandbox_id,
"agent scheduled"
);
}
Err(e) => {
tracing::warn!(agent = %agent.name, "no sandbox available: {}", e);
}
}
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
async fn schedule(&self, agent: &Agent) -> Result<String> {
// Find matching sandboxes
let candidates = self.find_candidates(agent).await?;
if candidates.is_empty() {
return Err(anyhow!("no sandboxes match selector"));
}
// Score candidates
let scored: Vec<_> = candidates.iter()
.map(|s| (s, self.score(s, agent)))
.collect();
// Pick best
let (sandbox, _score) = scored.iter()
.max_by_key(|(_, score)| *score)
.ok_or_else(|| anyhow!("no suitable sandbox"))?;
// Bind agent to sandbox
self.state.bind_agent(&agent.id, &sandbox.id).await?;
self.state.update_agent_status(&agent.id, Status::Scheduled).await?;
Ok(sandbox.id.clone())
}
fn score(&self, sandbox: &Sandbox, agent: &Agent) -> i32 {
let mut score = 0;
// Prefer sandboxes that are already running
if sandbox.status == Status::Running {
score += 100;
}
// Prefer sandboxes with no current agent
if sandbox.current_agent.is_none() {
score += 50;
}
// Prefer sandboxes with matching GPU requirement
if agent.spec.requires_gpu && sandbox.spec.resources.gpu {
score += 25;
}
// Prefer sandboxes with longer idle time (better warmed up)
score += (sandbox.idle_duration().as_secs() / 10) as i32;
score
}
}// display_gateway.rs - WebSocket proxy to sandbox displays
use axum::{
extract::{Path, WebSocketUpgrade, State},
response::Response,
};
use tokio_tungstenite::connect_async;
pub async fn display_websocket(
Path(sandbox_id): Path<String>,
ws: WebSocketUpgrade,
State(state): State<AppState>,
) -> Response {
// Get sandbox display info
let sandbox = state.store.get_sandbox(&sandbox_id).await
.expect("sandbox not found");
let vnc_url = format!("ws://{}:{}", sandbox.ip_address, sandbox.display_port);
ws.on_upgrade(move |socket| proxy_to_vnc(socket, vnc_url))
}
async fn proxy_to_vnc(client: WebSocket, vnc_url: String) {
let (vnc_stream, _) = connect_async(&vnc_url).await.expect("vnc connect failed");
let (client_write, client_read) = client.split();
let (vnc_write, vnc_read) = vnc_stream.split();
// Bidirectional proxy
tokio::select! {
_ = forward(client_read, vnc_write) => {},
_ = forward(vnc_read, client_write) => {},
}
}
// Screenshot endpoint
pub async fn screenshot(
Path(sandbox_id): Path<String>,
State(state): State<AppState>,
) -> impl IntoResponse {
let sandbox = state.store.get_sandbox(&sandbox_id).await?;
// Capture via HCS/RDP
let png_bytes = state.hcs.capture_screen(&sandbox.hcs_id).await?;
(
[(header::CONTENT_TYPE, "image/png")],
png_bytes
)
}
// Input injection endpoint
pub async fn send_input(
Path(sandbox_id): Path<String>,
State(state): State<AppState>,
Json(input): Json<InputEvent>,
) -> impl IntoResponse {
let sandbox = state.store.get_sandbox(&sandbox_id).await?;
match input {
InputEvent::Click { x, y, button } => {
state.hcs.send_mouse_click(&sandbox.hcs_id, x, y, button).await?;
}
InputEvent::Type { text } => {
state.hcs.send_keyboard_input(&sandbox.hcs_id, &text).await?;
}
InputEvent::KeyPress { key, modifiers } => {
state.hcs.send_key_press(&sandbox.hcs_id, key, modifiers).await?;
}
}
StatusCode::OK
}// main.rs - CLI client
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[command(name = "hcskubectl")]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Apply a configuration file
Apply {
#[arg(short, long)]
file: PathBuf,
},
/// Get resources
Get {
#[arg(value_enum)]
resource: ResourceType,
name: Option<String>,
},
/// View sandbox display
Screen {
sandbox: String,
#[arg(long)]
save: Option<PathBuf>, // save screenshot
},
/// Send input to sandbox
Input {
sandbox: String,
#[arg(long)]
click: Option<String>, // "100,200"
#[arg(long)]
type_text: Option<String>,
},
/// View logs
Logs {
sandbox: String,
#[arg(short, long)]
follow: bool,
},
/// Delete resources
Delete {
resource: ResourceType,
name: String,
},
/// Scale a sandbox pool
Scale {
pool: String,
#[arg(long)]
replicas: u32,
},
}
// Usage examples:
// hcskubectl apply -f sandbox.yaml
// hcskubectl get sandboxes
// hcskubectl get sandbox browser-agent-1
// hcskubectl screen browser-agent-1
// hcskubectl screen browser-agent-1 --save screenshot.png
// hcskubectl input browser-agent-1 --click "500,300"
// hcskubectl input browser-agent-1 --type "hello world"
// hcskubectl logs browser-agent-1 -f
// hcskubectl scale browser-workers --replicas 10
// hcskubectl delete sandbox browser-agent-1┌─────────────────────────────────────────────────────────────────────────────┐
│ REST API │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ Sandboxes │
│ ───────── │
│ POST /api/v1/sandboxes Create sandbox │
│ GET /api/v1/sandboxes List sandboxes │
│ GET /api/v1/sandboxes/:id Get sandbox │
│ DELETE /api/v1/sandboxes/:id Terminate sandbox │
│ POST /api/v1/sandboxes/:id/pause Pause sandbox │
│ POST /api/v1/sandboxes/:id/resume Resume sandbox │
│ │
│ Sandbox Pools │
│ ───────────── │
│ POST /api/v1/sandboxpools Create pool │
│ GET /api/v1/sandboxpools List pools │
│ GET /api/v1/sandboxpools/:id Get pool │
│ PATCH /api/v1/sandboxpools/:id/scale Scale pool │
│ DELETE /api/v1/sandboxpools/:id Delete pool │
│ │
│ Agents │
│ ────── │
│ POST /api/v1/agents Submit agent task │
│ GET /api/v1/agents List agents │
│ GET /api/v1/agents/:id Get agent status │
│ GET /api/v1/agents/:id/result Get agent result │
│ DELETE /api/v1/agents/:id Cancel agent │
│ │
│ Display (Computer Use) │
│ ────────────────────── │
│ GET /api/v1/sandboxes/:id/screen Screenshot (PNG) │
│ WS /api/v1/sandboxes/:id/display WebSocket VNC stream │
│ POST /api/v1/sandboxes/:id/input Send keyboard/mouse input │
│ POST /api/v1/sandboxes/:id/clipboard Set clipboard │
│ GET /api/v1/sandboxes/:id/clipboard Get clipboard │
│ │
│ Events & Logs │
│ ───────────── │
│ GET /api/v1/events List events │
│ GET /api/v1/sandboxes/:id/logs Get sandbox logs │
│ WS /api/v1/sandboxes/:id/logs Stream logs │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
┌──────────────────────────────────────────────────────────────────────────────┐
│ Agent Execution Flow │
├──────────────────────────────────────────────────────────────────────────────┤
│ │
│ 1. Submit Agent Task │
│ ───────────────── │
│ POST /api/v1/agents │
│ { │
│ "name": "book-flight-123", │
│ "sandboxSelector": { "pool": "browser-workers" }, │
│ "task": { "workflow": "book-flight", "input": {...} } │
│ } │
│ │ │
│ ▼ │
│ 2. Scheduler Assigns Sandbox │
│ ───────────────────────── │
│ - Find warm sandbox from pool │
│ - Or wait for one to become available │
│ - Bind agent to sandbox │
│ │ │
│ ▼ │
│ 3. Agent Starts in Sandbox │
│ ─────────────────────── │
│ - MCP agent process launched │
│ - Connects to MCP endpoint │
│ - Receives task input │
│ │ │
│ ▼ │
│ 4. Agent Performs UI Automation │
│ ─────────────────────────── │
│ Loop: │
│ - Screenshot → Vision Model → Decide action │
│ - POST /input (click, type) │
│ - Wait for page load │
│ - Repeat until task complete │
│ │ │
│ ▼ │
│ 5. Agent Completes │
│ ─────────────────── │
│ - Signals completion via MCP │
│ - Result stored in state │
│ - Sandbox returned to pool (or destroyed if ephemeral) │
│ │ │
│ ▼ │
│ 6. Retrieve Result │
│ ──────────────── │
│ GET /api/v1/agents/book-flight-123/result │
│ { │
│ "status": "completed", │
│ "output": { "confirmation": "ABC123", "price": "$450" }, │
│ "artifacts": ["screenshot-final.png"], │
│ "duration_seconds": 45 │
│ } │
│ │
└──────────────────────────────────────────────────────────────────────────────┘
┌─────────────────────┬──────────────────────────┬──────────────────────────┐
│ Feature │ Kubernetes │ HCS-Kube │
├─────────────────────┼──────────────────────────┼──────────────────────────┤
│ Unit of work │ Pod (containers) │ Sandbox (Windows VM) │
│ Networking │ CNI plugins, Services │ HNS NAT, simple routing │
│ Storage │ PV, PVC, CSI │ Folder mappings, VHDX │
│ Scheduling │ Complex (affinity, etc) │ Simple (pool + GPU) │
│ Scaling │ HPA, VPA, Cluster Auto │ Pool-based, queue-aware │
│ Multi-node │ Yes (distributed) │ No (single host)* │
│ State store │ etcd (distributed) │ SQLite (embedded) │
│ GUI/Display │ Not native │ First-class (VNC/RDP) │
│ GPU │ Device plugins │ Native HCS GPU-PV │
│ Boot time │ Seconds (containers) │ 2-5 sec (HCS) │
│ Isolation │ cgroups/namespaces │ Hyper-V (hardware) │
└─────────────────────┴──────────────────────────┴──────────────────────────┘
* Multi-node possible with agent on each host reporting to central API
hcs-kube/
├── Cargo.toml
├── src/
│ ├── lib.rs
│ ├── api/
│ │ ├── mod.rs
│ │ ├── server.rs # Axum HTTP server
│ │ ├── handlers.rs # Request handlers
│ │ └── websocket.rs # Display streaming
│ ├── controllers/
│ │ ├── mod.rs
│ │ ├── sandbox.rs # Sandbox controller
│ │ ├── pool.rs # SandboxPool controller
│ │ └── agent.rs # Agent controller
│ ├── scheduler/
│ │ ├── mod.rs
│ │ └── scheduler.rs # Agent scheduler
│ ├── display/
│ │ ├── mod.rs
│ │ ├── gateway.rs # VNC/RDP proxy
│ │ ├── screenshot.rs # Screen capture
│ │ └── input.rs # Input injection
│ ├── state/
│ │ ├── mod.rs
│ │ └── store.rs # SQLite state store
│ ├── hcs/
│ │ └── mod.rs # Re-export from hcs-sandbox
│ └── models/
│ ├── mod.rs
│ ├── sandbox.rs # Sandbox spec/status
│ ├── pool.rs # Pool spec/status
│ └── agent.rs # Agent spec/status
├── hcskubectl/
│ ├── Cargo.toml
│ └── src/
│ └── main.rs # CLI binary
└── examples/
├── sandbox.yaml
├── pool.yaml
└── agent.yaml
# Start the API server
hcs-kube serve --port 8080
# Create a sandbox pool
hcskubectl apply -f pool.yaml
# Check pool status
hcskubectl get pools
# NAME READY DESIRED AGE
# browser-workers 5/5 5 2m
# Submit an agent task
hcskubectl apply -f agent.yaml
# Watch agent progress
hcskubectl get agents -w
# NAME STATUS SANDBOX AGE
# book-flight-123 Running browser-agent-2 30s
# View the sandbox display (opens VNC viewer)
hcskubectl screen browser-agent-2
# Or get a screenshot
hcskubectl screen browser-agent-2 --save current.png
# Get agent result when done
hcskubectl get agent book-flight-123 -o jsonHCS-Kube provides a Kubernetes-like experience for orchestrating Windows sandboxes for computer use agents:
| Component | Purpose |
|---|---|
| Sandbox | Isolated Windows desktop (like Pod) |
| SandboxPool | Managed group with autoscaling (like Deployment) |
| Agent | UI automation task (like Job) |
| DisplayService | VNC/RDP access (like Service) |
| Scheduler | Places agents on sandboxes |
| Controller | Reconciles desired vs actual state |
| Display Gateway | WebSocket proxy for screen/input |
Built on hcs-sandbox which uses HCS APIs directly - same tech as Windows Sandbox but with multi-instance support and full programmability.