This setup deploys CheckMK (Raw Edition) on Kubernetes using a GitOps approach with ArgoCD. It addresses specific requirements such as persistence, permission handling for mounted volumes, and log forwarding using a sidecar pattern.
- Helm Chart: Uses
stakater/applicationwrapper chart. - Singleton: Deployed with
replicas: 1andRecreatestrategy. - Explicit PVC: A separate
pvc.yamlis used for the/omd/sitesvolume to ensure lifecycle independence. - Permission Handling: An InitContainer (
busybox) explicitly runschown -R 1000:1000 /omd/sitesto fix permission issues common with some CSI drivers. - Log Forwarding: A Grafana Alloy sidecar tails logs from the shared volume and forwards them to Loki.
Before using, please replace the following placeholders with your actual values:
<YOUR_DOMAIN>: Your external domain (e.g.,checkmk.example.com).<YOUR_CLUSTER_ISSUER>: Your cert-manager ClusterIssuer (e.g.,letsencrypt-prod).<YOUR_STORAGE_CLASS>: Your Kubernetes StorageClass (e.g.,standard,longhorn).<LOKI_URL>: The URL to your Loki instance (e.g.,http://loki.logging.svc.cluster.local:3100/loki/api/v1/push).
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ns.yaml
- checkmk.yaml
- pvc.yamlapiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: checkmk
namespace: argocd
spec:
project: default
source:
chart: application
repoURL: https://stakater.github.io/stakater-charts
targetRevision: 6.14.0
helm:
releaseName: checkmk
valuesObject:
applicationName: checkmk
deployment:
replicas: 1
strategy:
type: Recreate
podSecurityContext:
fsGroup: 1000
image:
repository: checkmk/check-mk-raw
tag: 2.4.0p18
pullPolicy: IfNotPresent
# InitContainer to fix volume permissions on startup
initContainers:
- name: fix-permissions
image: busybox
command: ["sh", "-c", "chown -R 1000:1000 /omd/sites"]
volumeMounts:
- name: tmp
mountPath: /omd/sites/cmk/tmp
- name: monitoring-data
mountPath: /omd/sites
# Main Container Volume Mounts (restored if overwritten)
volumeMounts:
- name: tmp
mountPath: /omd/sites/cmk/tmp
- name: localtime
mountPath: /etc/localtime
readOnly: true
- name: monitoring-data
mountPath: /omd/sites
# Probes tuned for CheckMK startup
readinessProbe:
enabled: true
httpGet:
path: /
port: 5000
initialDelaySeconds: 60
periodSeconds: 20
timeoutSeconds: 10
failureThreshold: 5
livenessProbe:
enabled: true
httpGet:
path: /
port: 5000
initialDelaySeconds: 120
periodSeconds: 300
timeoutSeconds: 10
failureThreshold: 5
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 2000m
memory: 4Gi
containerSecurityContext:
readOnlyRootFilesystem: false
runAsNonRoot: false
# Grafana Alloy Sidecar for Log Forwarding
additionalContainers:
- name: alloy-sidecar
image: grafana/alloy:latest
args:
[
"run",
"--server.http.listen-addr=0.0.0.0:12345",
"--storage.path=/var/lib/alloy/data",
"/etc/alloy/config.alloy",
]
volumeMounts:
- name: monitoring-data
mountPath: /omd/sites
readOnly: true
- name: alloy-config
mountPath: /etc/alloy
ports:
- name: alloy-http
containerPort: 12345
volumes:
- name: tmp
emptyDir:
medium: Memory
sizeLimit: 1Gi
- name: localtime
hostPath:
path: /etc/localtime
- name: monitoring-data
persistentVolumeClaim:
claimName: checkmk-pvc
- name: alloy-config
configMap:
name: checkmk-alloy-config
configMap:
enabled: true
files:
alloy-config:
config.alloy: |
local.file_match "checkmk_logs" {
path_targets = [
{"__path__" = "/omd/sites/cmk/var/log/*.log", "job" = "checkmk", "app" = "checkmk"},
{"__path__" = "/omd/sites/cmk/var/log/**/*.log", "job" = "checkmk", "app" = "checkmk"},
{"__path__" = "/omd/sites/cmk/var/log/apache/*_log", "job" = "checkmk", "app" = "checkmk"},
]
}
loki.source.file "checkmk_logs" {
targets = local.file_match.checkmk_logs.targets
forward_to = [loki.write.default.receiver]
}
loki.write "default" {
endpoint {
url = "<LOKI_URL>"
}
}
service:
enabled: true
type: LoadBalancer
ports:
- name: http
port: 8080
targetPort: 5000
protocol: TCP
- name: agent-tcp
port: 8000
targetPort: 8000
protocol: TCP
ingress:
enabled: true
ingressClassName: nginx
annotations:
cert-manager.io/cluster-issuer: <YOUR_CLUSTER_ISSUER>
hosts:
- host: <YOUR_DOMAIN>
paths:
- path: /
pathType: Prefix
tls:
- secretName: checkmk-tls
hosts:
- <YOUR_DOMAIN>
destination:
server: https://kubernetes.default.svc
namespace: monitoring
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=trueapiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: checkmk-pvc
namespace: monitoring
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
storageClassName: <YOUR_STORAGE_CLASS>apiVersion: v1
kind: Namespace
metadata:
name: monitoring