add slack to notifications and update alert definitions after upgrade [ci skip]
This commit is contained in:
parent
6870cee492
commit
0b20fc1e73
6 changed files with 18 additions and 8 deletions
2
main.tf
2
main.tf
|
|
@ -44,6 +44,7 @@ variable "webhook_handler_git_token" {}
|
|||
variable "webhook_handler_ssh_key" {}
|
||||
variable "monitoring_idrac_username" {}
|
||||
variable "monitoring_idrac_password" {}
|
||||
variable "alertmanager_slack_api_url" {}
|
||||
|
||||
variable "ansible_prefix" {
|
||||
default = "ANSIBLE_VAULT_PASSWORD_FILE=~/.ansible/vault_pass.txt ansible-playbook -i playbook/hosts.yaml playbook/linux.yml -t linux/initial_setup"
|
||||
|
|
@ -211,6 +212,7 @@ module "kubernetes_cluster" {
|
|||
bind_named_conf_options = var.bind_named_conf_options
|
||||
|
||||
alertmanager_account_password = var.alertmanager_account_password
|
||||
alertmanager_slack_api_url = var.alertmanager_slack_api_url
|
||||
|
||||
# Drone
|
||||
drone_github_client_id = var.drone_github_client_id
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ variable "webhook_handler_git_token" {}
|
|||
variable "webhook_handler_ssh_key" {}
|
||||
variable "idrac_username" {}
|
||||
variable "idrac_password" {}
|
||||
variable "alertmanager_slack_api_url" {}
|
||||
|
||||
resource "null_resource" "core_services" {
|
||||
# List all the core modules that must be provisioned first
|
||||
|
|
@ -143,6 +144,7 @@ module "monitoring" {
|
|||
alertmanager_account_password = var.alertmanager_account_password
|
||||
idrac_username = var.idrac_username
|
||||
idrac_password = var.idrac_password
|
||||
alertmanager_slack_api_url = var.alertmanager_slack_api_url
|
||||
|
||||
depends_on = [null_resource.core_services]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ variable "idrac_username" {
|
|||
variable "idrac_password" {
|
||||
default = "calvin"
|
||||
}
|
||||
variable "alertmanager_slack_api_url" {}
|
||||
|
||||
module "tls_secret" {
|
||||
source = "../setup_tls_secret"
|
||||
|
|
@ -23,8 +24,9 @@ resource "helm_release" "prometheus" {
|
|||
|
||||
repository = "https://prometheus-community.github.io/helm-charts"
|
||||
chart = "prometheus"
|
||||
version = "15.0.2"
|
||||
|
||||
values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password })]
|
||||
values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password, alertmanager_slack_api_url = var.alertmanager_slack_api_url })]
|
||||
}
|
||||
|
||||
# Terraform get angry with the 30k values file :/ use ansible until solved
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ alertmanagerFiles:
|
|||
smtp_auth_username: "alertmanager@viktorbarzin.me"
|
||||
smtp_auth_password: "${alertmanager_mail_pass}"
|
||||
smtp_require_tls: true
|
||||
slack_api_url: "${alertmanager_slack_api_url}"
|
||||
templates:
|
||||
- "/etc/alertmanager/template/*.tmpl"
|
||||
route:
|
||||
|
|
@ -38,14 +39,17 @@ alertmanagerFiles:
|
|||
group_wait: 3s
|
||||
group_interval: 5s
|
||||
repeat_interval: 1h
|
||||
receiver: SMTP_STARTTLS
|
||||
receiver: ALL
|
||||
receivers:
|
||||
- name: 'SMTP_STARTTLS'
|
||||
- name: ALL
|
||||
email_configs:
|
||||
- to: "me@viktorbarzin.me"
|
||||
send_resolved: true
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
slack_configs:
|
||||
- send_resolved: true
|
||||
channel: "#general"
|
||||
|
||||
server:
|
||||
# Enable me to delete metrics
|
||||
|
|
@ -93,7 +97,7 @@ serverFiles:
|
|||
- name: NodeDown
|
||||
rules:
|
||||
- alert: NodeDown
|
||||
expr: up{job="kubernetes-nodes"} == 0
|
||||
expr: (up{job="kubernetes-nodes"} or on() vector(0)) == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: page
|
||||
|
|
@ -120,7 +124,7 @@ serverFiles:
|
|||
- name: ReadyPodsInDeploymentLessThanSpec
|
||||
rules:
|
||||
- alert: ReadyPodsInDeploymentLessThanSpec
|
||||
expr: kube_deployment_status_replicas_available - on(namespace, deployment) kube_deployment_spec_replicas < 0
|
||||
expr: kube_deployment_status_replicas_available - on(exported_namespace, deployment) kube_deployment_spec_replicas < 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: page
|
||||
|
|
@ -174,7 +178,7 @@ serverFiles:
|
|||
- name: Mailserver Down
|
||||
rules:
|
||||
- alert: Mail server has no replicas available
|
||||
expr: (kube_deployment_status_replicas_available{namespace="mailserver"} or on() vector(0)) < 1
|
||||
expr: (kube_deployment_status_replicas_available{exported_namespace="mailserver"} or on() vector(0)) < 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: page
|
||||
|
|
@ -183,7 +187,7 @@ serverFiles:
|
|||
- name: Hackmd Down
|
||||
rules:
|
||||
- alert: Hackmd has no replicas available
|
||||
expr: (kube_deployment_status_replicas_available{namespace="hackmd"} or on() vector(0)) < 1
|
||||
expr: (kube_deployment_status_replicas_available{exported_namespace="hackmd"} or on() vector(0)) < 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: page
|
||||
|
|
@ -192,7 +196,7 @@ serverFiles:
|
|||
- name: Privatebin Down
|
||||
rules:
|
||||
- alert: Privatebin has no replicas available
|
||||
expr: (kube_deployment_status_replicas_available{namespace="privatebin"} or on() vector(0)) < 1
|
||||
expr: (kube_deployment_status_replicas_available{exported_namespace="privatebin"} or on() vector(0)) < 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: page
|
||||
|
|
|
|||
Binary file not shown.
BIN
terraform.tfvars
BIN
terraform.tfvars
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue