diff --git a/main.tf b/main.tf index df3298ab..20f7b171 100644 --- a/main.tf +++ b/main.tf @@ -44,6 +44,7 @@ variable "webhook_handler_git_token" {} variable "webhook_handler_ssh_key" {} variable "monitoring_idrac_username" {} variable "monitoring_idrac_password" {} +variable "alertmanager_slack_api_url" {} variable "ansible_prefix" { default = "ANSIBLE_VAULT_PASSWORD_FILE=~/.ansible/vault_pass.txt ansible-playbook -i playbook/hosts.yaml playbook/linux.yml -t linux/initial_setup" @@ -211,6 +212,7 @@ module "kubernetes_cluster" { bind_named_conf_options = var.bind_named_conf_options alertmanager_account_password = var.alertmanager_account_password + alertmanager_slack_api_url = var.alertmanager_slack_api_url # Drone drone_github_client_id = var.drone_github_client_id diff --git a/modules/kubernetes/main.tf b/modules/kubernetes/main.tf index a8cd555b..c67aa892 100644 --- a/modules/kubernetes/main.tf +++ b/modules/kubernetes/main.tf @@ -32,6 +32,7 @@ variable "webhook_handler_git_token" {} variable "webhook_handler_ssh_key" {} variable "idrac_username" {} variable "idrac_password" {} +variable "alertmanager_slack_api_url" {} resource "null_resource" "core_services" { # List all the core modules that must be provisioned first @@ -143,6 +144,7 @@ module "monitoring" { alertmanager_account_password = var.alertmanager_account_password idrac_username = var.idrac_username idrac_password = var.idrac_password + alertmanager_slack_api_url = var.alertmanager_slack_api_url depends_on = [null_resource.core_services] } diff --git a/modules/kubernetes/monitoring/main.tf b/modules/kubernetes/monitoring/main.tf index afbd98d9..48871fee 100644 --- a/modules/kubernetes/monitoring/main.tf +++ b/modules/kubernetes/monitoring/main.tf @@ -9,6 +9,7 @@ variable "idrac_username" { variable "idrac_password" { default = "calvin" } +variable "alertmanager_slack_api_url" {} module "tls_secret" { source = "../setup_tls_secret" @@ -23,8 +24,9 @@ resource "helm_release" "prometheus" { repository = "https://prometheus-community.github.io/helm-charts" chart = "prometheus" + version = "15.0.2" - values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password })] + values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password, alertmanager_slack_api_url = var.alertmanager_slack_api_url })] } # Terraform get angry with the 30k values file :/ use ansible until solved diff --git a/modules/kubernetes/monitoring/prometheus_chart_values.tpl b/modules/kubernetes/monitoring/prometheus_chart_values.tpl index aeff5593..7288a431 100644 --- a/modules/kubernetes/monitoring/prometheus_chart_values.tpl +++ b/modules/kubernetes/monitoring/prometheus_chart_values.tpl @@ -31,6 +31,7 @@ alertmanagerFiles: smtp_auth_username: "alertmanager@viktorbarzin.me" smtp_auth_password: "${alertmanager_mail_pass}" smtp_require_tls: true + slack_api_url: "${alertmanager_slack_api_url}" templates: - "/etc/alertmanager/template/*.tmpl" route: @@ -38,14 +39,17 @@ alertmanagerFiles: group_wait: 3s group_interval: 5s repeat_interval: 1h - receiver: SMTP_STARTTLS + receiver: ALL receivers: - - name: 'SMTP_STARTTLS' + - name: ALL email_configs: - to: "me@viktorbarzin.me" send_resolved: true tls_config: insecure_skip_verify: true + slack_configs: + - send_resolved: true + channel: "#general" server: # Enable me to delete metrics @@ -93,7 +97,7 @@ serverFiles: - name: NodeDown rules: - alert: NodeDown - expr: up{job="kubernetes-nodes"} == 0 + expr: (up{job="kubernetes-nodes"} or on() vector(0)) == 0 for: 1m labels: severity: page @@ -120,7 +124,7 @@ serverFiles: - name: ReadyPodsInDeploymentLessThanSpec rules: - alert: ReadyPodsInDeploymentLessThanSpec - expr: kube_deployment_status_replicas_available - on(namespace, deployment) kube_deployment_spec_replicas < 0 + expr: kube_deployment_status_replicas_available - on(exported_namespace, deployment) kube_deployment_spec_replicas < 0 for: 10m labels: severity: page @@ -174,7 +178,7 @@ serverFiles: - name: Mailserver Down rules: - alert: Mail server has no replicas available - expr: (kube_deployment_status_replicas_available{namespace="mailserver"} or on() vector(0)) < 1 + expr: (kube_deployment_status_replicas_available{exported_namespace="mailserver"} or on() vector(0)) < 1 for: 10m labels: severity: page @@ -183,7 +187,7 @@ serverFiles: - name: Hackmd Down rules: - alert: Hackmd has no replicas available - expr: (kube_deployment_status_replicas_available{namespace="hackmd"} or on() vector(0)) < 1 + expr: (kube_deployment_status_replicas_available{exported_namespace="hackmd"} or on() vector(0)) < 1 for: 1m labels: severity: page @@ -192,7 +196,7 @@ serverFiles: - name: Privatebin Down rules: - alert: Privatebin has no replicas available - expr: (kube_deployment_status_replicas_available{namespace="privatebin"} or on() vector(0)) < 1 + expr: (kube_deployment_status_replicas_available{exported_namespace="privatebin"} or on() vector(0)) < 1 for: 10m labels: severity: page diff --git a/terraform.tfstate b/terraform.tfstate index dacbe7df..a88a592c 100644 Binary files a/terraform.tfstate and b/terraform.tfstate differ diff --git a/terraform.tfvars b/terraform.tfvars index 4c5e3cc1..2274dd97 100644 Binary files a/terraform.tfvars and b/terraform.tfvars differ