From 28dd218590d2926d2e184c8ecea59248b55b1b02 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Mar 2026 19:41:39 +0000 Subject: [PATCH] [ci skip] rybbit: add CronJob to truncate ClickHouse system logs every 6h ClickHouse system log tables (metric_log, trace_log, text_log, etc.) were growing unboundedly on NFS (~10GiB, 1.3B rows) with no TTL, causing continuous background merge operations that burned ~920m CPU. Mounting custom config.d XML files crashes ClickHouse (exit code 36) so instead add a CronJob that truncates the tables via the HTTP API every 6 hours. Also removed the broken ConfigMap/volume mount that was causing crashes. --- stacks/rybbit/main.tf | 75 +++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/stacks/rybbit/main.tf b/stacks/rybbit/main.tf index 6503b787..e78950e9 100644 --- a/stacks/rybbit/main.tf +++ b/stacks/rybbit/main.tf @@ -30,28 +30,6 @@ locals { } -resource "kubernetes_config_map" "clickhouse_config" { - metadata { - name = "clickhouse-config" - namespace = kubernetes_namespace.rybbit.metadata[0].name - } - data = { - "docker_related_config.xml" = <<-XML - - :: - 0.0.0.0 - 1 - - XML - "disable-system-logs.xml" = <<-XML - - 4 - 16 - - XML - } -} - resource "kubernetes_deployment" "clickhouse" { metadata { name = "clickhouse" @@ -95,12 +73,6 @@ resource "kubernetes_deployment" "clickhouse" { name = "data" mount_path = "/var/lib/clickhouse" } - volume_mount { - name = "config" - mount_path = "/etc/clickhouse-server/config.d/zzz-custom.xml" - sub_path = "disable-system-logs.xml" - read_only = true - } resources { requests = { cpu = "100m" @@ -119,12 +91,6 @@ resource "kubernetes_deployment" "clickhouse" { server = var.nfs_server } } - volume { - name = "config" - config_map { - name = kubernetes_config_map.clickhouse_config.metadata[0].name - } - } } } } @@ -152,6 +118,47 @@ resource "kubernetes_service" "clickhouse" { } } +# CronJob to truncate ClickHouse system log tables every 6 hours. +# These tables grow unboundedly on NFS and trigger CPU-heavy background merges. +resource "kubernetes_cron_job_v1" "clickhouse_truncate_logs" { + metadata { + name = "clickhouse-truncate-logs" + namespace = kubernetes_namespace.rybbit.metadata[0].name + } + spec { + schedule = "0 */6 * * *" + successful_jobs_history_limit = 1 + failed_jobs_history_limit = 1 + job_template { + metadata {} + spec { + template { + metadata {} + spec { + restart_policy = "OnFailure" + container { + name = "truncate" + image = "curlimages/curl:8.12.1" + command = [ + "sh", "-c", + join(" && ", [ + "curl -s 'http://clickhouse.rybbit.svc.cluster.local:8123/?user=default&password=${var.clickhouse_password}' -d 'TRUNCATE TABLE IF EXISTS system.metric_log'", + "curl -s 'http://clickhouse.rybbit.svc.cluster.local:8123/?user=default&password=${var.clickhouse_password}' -d 'TRUNCATE TABLE IF EXISTS system.trace_log'", + "curl -s 'http://clickhouse.rybbit.svc.cluster.local:8123/?user=default&password=${var.clickhouse_password}' -d 'TRUNCATE TABLE IF EXISTS system.text_log'", + "curl -s 'http://clickhouse.rybbit.svc.cluster.local:8123/?user=default&password=${var.clickhouse_password}' -d 'TRUNCATE TABLE IF EXISTS system.asynchronous_metric_log'", + "curl -s 'http://clickhouse.rybbit.svc.cluster.local:8123/?user=default&password=${var.clickhouse_password}' -d 'TRUNCATE TABLE IF EXISTS system.query_log'", + "curl -s 'http://clickhouse.rybbit.svc.cluster.local:8123/?user=default&password=${var.clickhouse_password}' -d 'TRUNCATE TABLE IF EXISTS system.part_log'", + "echo 'System logs truncated'" + ]) + ] + } + } + } + } + } + } +} + resource "kubernetes_deployment" "rybbit" { metadata { name = "rybbit"