|
using Akka.Actor; |
|
using Akka.Cluster.Tools.Client; |
|
using Akka.Event; |
|
using Akka.Hosting; |
|
using Akka.Remote; |
|
using Microsoft.Extensions.Diagnostics.HealthChecks; |
|
|
|
namespace ClusterClientHealthCheck; |
|
|
|
public class QuarantineDetectorActor : ReceiveActor |
|
{ |
|
private bool _isQuarantined; |
|
|
|
public QuarantineDetectorActor() |
|
{ |
|
// Subscribe to quarantine events |
|
Context.System.EventStream.Subscribe(Self, typeof(ThisActorSystemQuarantinedEvent)); |
|
|
|
Receive<ThisActorSystemQuarantinedEvent>(quarantineEvent => |
|
{ |
|
_isQuarantined = true; |
|
Context.GetLogger().Warning($"System quarantined by {quarantineEvent.RemoteAddress}"); |
|
}); |
|
|
|
Receive<GetQuarantineStatus>(_ => { Sender.Tell(new QuarantineStatus(_isQuarantined)); }); |
|
} |
|
|
|
public static Props Props() |
|
{ |
|
return Akka.Actor.Props.Create<QuarantineDetectorActor>(); |
|
} |
|
} |
|
|
|
public class ClusterClientMonitorActor : ReceiveActor |
|
{ |
|
private readonly IActorRef _clusterClient; |
|
private bool _isAlive = true; |
|
|
|
public ClusterClientMonitorActor(IActorRef clusterClient) |
|
{ |
|
_clusterClient = clusterClient; |
|
|
|
// Watch the cluster client |
|
Context.Watch(_clusterClient); |
|
|
|
Receive<Terminated>(terminated => |
|
{ |
|
if (terminated.ActorRef.Equals(_clusterClient)) |
|
{ |
|
_isAlive = false; |
|
Context.GetLogger().Warning("ClusterClient terminated"); |
|
} |
|
}); |
|
|
|
Receive<GetClusterClientStatus>(_ => { Sender.Tell(new ClusterClientStatus(_isAlive)); }); |
|
} |
|
|
|
public static Props Props(IActorRef clusterClient) |
|
{ |
|
return Akka.Actor.Props.Create(() => new ClusterClientMonitorActor(clusterClient)); |
|
} |
|
} |
|
|
|
public sealed class GetQuarantineStatus |
|
{ |
|
public static readonly GetQuarantineStatus Instance = new(); |
|
|
|
private GetQuarantineStatus() |
|
{ |
|
} |
|
} |
|
|
|
public sealed class QuarantineStatus |
|
{ |
|
public QuarantineStatus(bool isQuarantined) |
|
{ |
|
IsQuarantined = isQuarantined; |
|
} |
|
|
|
public bool IsQuarantined { get; } |
|
} |
|
|
|
public sealed class GetClusterClientStatus |
|
{ |
|
public static readonly GetClusterClientStatus Instance = new(); |
|
|
|
private GetClusterClientStatus() |
|
{ |
|
} |
|
} |
|
|
|
public sealed class ClusterClientStatus |
|
{ |
|
public ClusterClientStatus(bool isAlive) |
|
{ |
|
IsAlive = isAlive; |
|
} |
|
|
|
public bool IsAlive { get; } |
|
} |
|
|
|
public static class AkkaHostingExtensions |
|
{ |
|
public static AkkaConfigurationBuilder WithQuarantineHealthCheck( |
|
this AkkaConfigurationBuilder builder, |
|
string quarantineDetectorName = "quarantine-detector") |
|
{ |
|
builder.WithActors((system, registry, resolver) => |
|
{ |
|
// Create quarantine detector |
|
var quarantineDetector = system.ActorOf( |
|
QuarantineDetectorActor.Props(), |
|
quarantineDetectorName); |
|
registry.TryRegister<QuarantineDetectorActor>(quarantineDetector); |
|
}); |
|
|
|
// Register health checks using Akka.Hosting pattern |
|
builder.WithHealthCheck("akka.quarantine", async (system, registry, cancellationToken) => |
|
{ |
|
try |
|
{ |
|
var detector = await registry.GetAsync<QuarantineDetectorActor>(cancellationToken); |
|
var result = await detector.Ask<QuarantineStatus>( |
|
GetQuarantineStatus.Instance, |
|
TimeSpan.FromSeconds(5), |
|
cancellationToken); |
|
|
|
if (result.IsQuarantined) |
|
return HealthCheckResult.Unhealthy( |
|
"ActorSystem is quarantined and requires restart", |
|
data: new Dictionary<string, object> |
|
{ |
|
["quarantined"] = true, |
|
["actorSystem"] = system.Name |
|
}); |
|
|
|
return HealthCheckResult.Healthy("ActorSystem is not quarantined"); |
|
} |
|
catch (Exception ex) |
|
{ |
|
return HealthCheckResult.Unhealthy( |
|
"Failed to check quarantine status", |
|
ex); |
|
} |
|
}); |
|
|
|
return builder; |
|
} |
|
|
|
/// <summary> |
|
/// Adds ClusterClient with comprehensive health checks including quarantine detection and liveness monitoring |
|
/// </summary> |
|
public static AkkaConfigurationBuilder WithClusterClientHealthCheck( |
|
this AkkaConfigurationBuilder builder, |
|
TimeSpan? reconnectTimeout = null, |
|
string clusterClientMonitorName = "cluster-client-monitor") |
|
{ |
|
// Configure reconnect timeout if specified |
|
if (reconnectTimeout.HasValue) |
|
builder.AddHocon($@" |
|
akka.cluster.client {{ |
|
reconnect-timeout = {reconnectTimeout.Value.TotalSeconds}s |
|
}} |
|
", HoconAddMode.Prepend); |
|
|
|
// Register monitoring actors |
|
builder.WithActors((system, registry) => |
|
{ |
|
// Get the registered ClusterClient and create monitor |
|
var clusterClient = registry.Get<ClusterClient>(); |
|
var clientMonitor = system.ActorOf( |
|
ClusterClientMonitorActor.Props(clusterClient), |
|
clusterClientMonitorName); |
|
registry.TryRegister<ClusterClientMonitorActor>(clientMonitor); |
|
}); |
|
|
|
// Register health checks using Akka.Hosting pattern |
|
builder.WithHealthCheck("cluster-client.alive", async (system, registry, cancellationToken) => |
|
{ |
|
try |
|
{ |
|
var monitor = await registry.GetAsync<ClusterClientMonitorActor>(cancellationToken); |
|
var result = await monitor.Ask<ClusterClientStatus>( |
|
GetClusterClientStatus.Instance, |
|
TimeSpan.FromSeconds(5), |
|
cancellationToken); |
|
|
|
if (!result.IsAlive) |
|
return HealthCheckResult.Unhealthy( |
|
"ClusterClient is terminated and requires restart", |
|
data: new Dictionary<string, object> |
|
{ |
|
["clusterClientAlive"] = false, |
|
["actorSystem"] = system.Name |
|
}); |
|
|
|
return HealthCheckResult.Healthy("ClusterClient is alive and running"); |
|
} |
|
catch (Exception ex) |
|
{ |
|
return HealthCheckResult.Unhealthy( |
|
"Failed to check ClusterClient status", |
|
ex); |
|
} |
|
}); |
|
|
|
return builder; |
|
} |
|
|
|
/// <summary> |
|
/// Adds ClusterClient with both quarantine and liveness health checks |
|
/// </summary> |
|
public static AkkaConfigurationBuilder WithClusterClientAndQuarantineHealthCheck( |
|
this AkkaConfigurationBuilder builder, |
|
TimeSpan? reconnectTimeout = null, |
|
string clusterClientMonitorName = "cluster-client-monitor", |
|
string quarantineDetectorName = "quarantine-detector") |
|
{ |
|
// Add quarantine health check |
|
builder.WithQuarantineHealthCheck(quarantineDetectorName); |
|
|
|
// Add cluster client health check |
|
builder.WithClusterClientHealthCheck(reconnectTimeout, clusterClientMonitorName); |
|
|
|
return builder; |
|
} |
|
} |