add loki + alloy deployments for logs collection [ci skip]
This commit is contained in:
parent
0d8b6b7480
commit
c49e4d0a86
6 changed files with 2489 additions and 0 deletions
100
modules/kubernetes/monitoring/alloy.yaml
Normal file
100
modules/kubernetes/monitoring/alloy.yaml
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
alloy:
|
||||
configMap:
|
||||
content: |-
|
||||
// Write your Alloy config here:
|
||||
logging {
|
||||
level = "info"
|
||||
format = "logfmt"
|
||||
}
|
||||
loki.write "default" {
|
||||
endpoint {
|
||||
url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push"
|
||||
}
|
||||
}
|
||||
|
||||
// discovery.kubernetes allows you to find scrape targets from Kubernetes resources.
|
||||
// It watches cluster state and ensures targets are continually synced with what is currently running in your cluster.
|
||||
discovery.kubernetes "pod" {
|
||||
role = "pod"
|
||||
}
|
||||
|
||||
// discovery.relabel rewrites the label set of the input targets by applying one or more relabeling rules.
|
||||
// If no rules are defined, then the input targets are exported as-is.
|
||||
discovery.relabel "pod_logs" {
|
||||
targets = discovery.kubernetes.pod.targets
|
||||
|
||||
// Label creation - "namespace" field from "__meta_kubernetes_namespace"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace"]
|
||||
action = "replace"
|
||||
target_label = "namespace"
|
||||
}
|
||||
|
||||
// Label creation - "pod" field from "__meta_kubernetes_pod_name"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_name"]
|
||||
action = "replace"
|
||||
target_label = "pod"
|
||||
}
|
||||
|
||||
// Label creation - "container" field from "__meta_kubernetes_pod_container_name"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_container_name"]
|
||||
action = "replace"
|
||||
target_label = "container"
|
||||
}
|
||||
|
||||
// Label creation - "app" field from "__meta_kubernetes_pod_label_app_kubernetes_io_name"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
|
||||
action = "replace"
|
||||
target_label = "app"
|
||||
}
|
||||
|
||||
// Label creation - "job" field from "__meta_kubernetes_namespace" and "__meta_kubernetes_pod_container_name"
|
||||
// Concatenate values __meta_kubernetes_namespace/__meta_kubernetes_pod_container_name
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
|
||||
action = "replace"
|
||||
target_label = "job"
|
||||
separator = "/"
|
||||
replacement = "$1"
|
||||
}
|
||||
|
||||
// Label creation - "container" field from "__meta_kubernetes_pod_uid" and "__meta_kubernetes_pod_container_name"
|
||||
// Concatenate values __meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name.log
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
|
||||
action = "replace"
|
||||
target_label = "__path__"
|
||||
separator = "/"
|
||||
replacement = "/var/log/pods/*$1/*.log"
|
||||
}
|
||||
|
||||
// Label creation - "container_runtime" field from "__meta_kubernetes_pod_container_id"
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_container_id"]
|
||||
action = "replace"
|
||||
target_label = "container_runtime"
|
||||
regex = "^(\\S+):\\/\\/.+$"
|
||||
replacement = "$1"
|
||||
}
|
||||
}
|
||||
|
||||
// loki.source.kubernetes tails logs from Kubernetes containers using the Kubernetes API.
|
||||
loki.source.kubernetes "pod_logs" {
|
||||
targets = discovery.relabel.pod_logs.output
|
||||
forward_to = [loki.process.pod_logs.receiver]
|
||||
}
|
||||
|
||||
// loki.process receives log entries from other Loki components, applies one or more processing stages,
|
||||
// and forwards the results to the list of receivers in the component's arguments.
|
||||
loki.process "pod_logs" {
|
||||
stage.static_labels {
|
||||
values = {
|
||||
cluster = "default",
|
||||
}
|
||||
}
|
||||
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
288
modules/kubernetes/monitoring/dashboards/loki.json
Normal file
288
modules/kubernetes/monitoring/dashboards/loki.json
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Logs collected from Kubernetes, stored in Loki",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 15141,
|
||||
"graphTooltip": 0,
|
||||
"id": 25,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "fejvsai4fvvggf"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "fejvsai4fvvggf"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(count_over_time({namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\" [$__interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "Log count",
|
||||
"queryType": "range",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "fejvsai4fvvggf"
|
||||
},
|
||||
"description": "Logs from services running in Kubernetes",
|
||||
"gridPos": {
|
||||
"h": 25,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 4
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "fejvsai4fvvggf"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "{namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\"",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"description": "String to search for",
|
||||
"hide": 0,
|
||||
"label": "Search Query",
|
||||
"name": "query",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"dbaas"
|
||||
],
|
||||
"value": [
|
||||
"dbaas"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "fejvsai4fvvggf"
|
||||
},
|
||||
"definition": "label_values(namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [],
|
||||
"query": "label_values(namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "fejvsai4fvvggf"
|
||||
},
|
||||
"definition": "label_values(stream)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "stream",
|
||||
"options": [],
|
||||
"query": "label_values(stream)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "fejvsai4fvvggf"
|
||||
},
|
||||
"definition": "label_values(container)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "container",
|
||||
"options": [],
|
||||
"query": "label_values(container)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Loki Kubernetes Logs",
|
||||
"uid": "o6-BGgnnk",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
1875
modules/kubernetes/monitoring/dashboards/registry.json
Normal file
1875
modules/kubernetes/monitoring/dashboards/registry.json
Normal file
File diff suppressed because it is too large
Load diff
78
modules/kubernetes/monitoring/k8s-monitoring-values.yaml
Normal file
78
modules/kubernetes/monitoring/k8s-monitoring-values.yaml
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
---
|
||||
cluster:
|
||||
name: default
|
||||
|
||||
destinations:
|
||||
- name: loki
|
||||
type: loki
|
||||
url: http://loki-gateway.monitoring.svc.cluster.local/loki/api/v1/push
|
||||
|
||||
clusterEvents:
|
||||
enabled: false
|
||||
collector: alloy-logs
|
||||
namespaces:
|
||||
- dbaas
|
||||
- immich
|
||||
- authentik
|
||||
- mailserver
|
||||
- crowdsec
|
||||
- descheduler
|
||||
- calibre
|
||||
- monitoring
|
||||
- ingress-nginx
|
||||
- vaultwarden
|
||||
|
||||
nodeLogs:
|
||||
enabled: false
|
||||
|
||||
podLogs:
|
||||
enabled: true
|
||||
gatherMethod: kubernetesApi
|
||||
collector: alloy-logs
|
||||
labelsToKeep:
|
||||
[
|
||||
"app_kubernetes_io_name",
|
||||
"container",
|
||||
"instance",
|
||||
"job",
|
||||
"level",
|
||||
"namespace",
|
||||
"service_name",
|
||||
"service_namespace",
|
||||
"deployment_environment",
|
||||
"deployment_environment_name",
|
||||
]
|
||||
structuredMetadata:
|
||||
pod: pod # Set structured metadata "pod" from label "pod"
|
||||
namespaces:
|
||||
- dbaas
|
||||
- immich
|
||||
- authentik
|
||||
- mailserver
|
||||
- crowdsec
|
||||
- descheduler
|
||||
- calibre
|
||||
- monitoring
|
||||
- ingress-nginx
|
||||
- vaultwarden
|
||||
# Collectors
|
||||
alloy-singleton:
|
||||
enabled: false
|
||||
|
||||
alloy-metrics:
|
||||
enabled: false
|
||||
|
||||
alloy-logs:
|
||||
enabled: true
|
||||
# Required when using the Kubernetes API to pod logs
|
||||
alloy:
|
||||
mounts:
|
||||
varlog: false
|
||||
clustering:
|
||||
enabled: true
|
||||
|
||||
alloy-profiles:
|
||||
enabled: false
|
||||
|
||||
alloy-receiver:
|
||||
enabled: false
|
||||
64
modules/kubernetes/monitoring/loki.yaml
Normal file
64
modules/kubernetes/monitoring/loki.yaml
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
loki:
|
||||
commonConfig:
|
||||
replication_factor: 1
|
||||
schemaConfig:
|
||||
configs:
|
||||
- from: "2025-04-01"
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: loki_index_
|
||||
period: 24h
|
||||
pattern_ingester:
|
||||
enabled: true
|
||||
limits_config:
|
||||
allow_structured_metadata: true
|
||||
volume_enabled: true
|
||||
ruler:
|
||||
enable_api: true
|
||||
storage:
|
||||
type: "filesystem"
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 15Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
# Auth requires a revers proxy providing basic auth
|
||||
# https://grafana.com/docs/loki/latest/operations/authentication/
|
||||
auth_enabled: false
|
||||
|
||||
minio:
|
||||
enabled: false
|
||||
|
||||
deploymentMode: SingleBinary
|
||||
|
||||
singleBinary:
|
||||
replicas: 1
|
||||
|
||||
# Zero out replica counts of other deployment modes
|
||||
backend:
|
||||
replicas: 0
|
||||
read:
|
||||
replicas: 0
|
||||
write:
|
||||
replicas: 0
|
||||
|
||||
ingester:
|
||||
replicas: 0
|
||||
querier:
|
||||
replicas: 0
|
||||
queryFrontend:
|
||||
replicas: 0
|
||||
queryScheduler:
|
||||
replicas: 0
|
||||
distributor:
|
||||
replicas: 0
|
||||
compactor:
|
||||
replicas: 0
|
||||
indexGateway:
|
||||
replicas: 0
|
||||
bloomCompactor:
|
||||
replicas: 0
|
||||
bloomGateway:
|
||||
replicas: 0
|
||||
|
|
@ -526,3 +526,87 @@ resource "kubernetes_service" "snmp-exporter" {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "loki" {
|
||||
namespace = "monitoring"
|
||||
create_namespace = true
|
||||
name = "loki"
|
||||
|
||||
repository = "https://grafana.github.io/helm-charts"
|
||||
chart = "loki"
|
||||
|
||||
values = [templatefile("${path.module}/loki.yaml", {})]
|
||||
atomic = true
|
||||
timeout = 120
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume" "loki" {
|
||||
metadata {
|
||||
name = "loki"
|
||||
}
|
||||
spec {
|
||||
capacity = {
|
||||
storage = "15Gi"
|
||||
}
|
||||
access_modes = ["ReadWriteOnce"]
|
||||
persistent_volume_source {
|
||||
nfs {
|
||||
path = "/mnt/main/loki/loki"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
persistent_volume_reclaim_policy = "Retain"
|
||||
volume_mode = "Filesystem"
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume" "loki-minio" {
|
||||
metadata {
|
||||
name = "loki-minio"
|
||||
}
|
||||
spec {
|
||||
capacity = {
|
||||
storage = "15Gi"
|
||||
}
|
||||
access_modes = ["ReadWriteMany"]
|
||||
persistent_volume_source {
|
||||
nfs {
|
||||
path = "/mnt/main/loki/minio"
|
||||
server = "10.0.10.15"
|
||||
}
|
||||
}
|
||||
persistent_volume_reclaim_policy = "Retain"
|
||||
volume_mode = "Filesystem"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# https://grafana.com/docs/alloy/latest/configure/kubernetes/
|
||||
resource "helm_release" "alloy" {
|
||||
namespace = "monitoring"
|
||||
create_namespace = true
|
||||
name = "alloy"
|
||||
|
||||
repository = "https://grafana.github.io/helm-charts"
|
||||
chart = "alloy"
|
||||
|
||||
atomic = true
|
||||
}
|
||||
|
||||
# Increase open file limits as alloy is reading files:
|
||||
# https://serverfault.com/questions/1137211/failed-to-create-fsnotify-watcher-too-many-open-files
|
||||
|
||||
# run for all nodes using :
|
||||
# for n in $(kbn | awk '{print $1}'); do echo $n; s wizard@$n 'sudo sysctl -w fs.inotify.max_user_watches=2099999999; sudo sysctl -w fs.inotify.max_user_instances=2099999999;sudo sysctl -w fs.inotify.max_queued_events=2099999999'; done
|
||||
|
||||
resource "helm_release" "k8s-monitoring" {
|
||||
namespace = "monitoring"
|
||||
create_namespace = true
|
||||
name = "k8s-monitoring"
|
||||
|
||||
repository = "https://grafana.github.io/helm-charts"
|
||||
chart = "k8s-monitoring"
|
||||
|
||||
values = [templatefile("${path.module}/k8s-monitoring-values.yaml", {})]
|
||||
atomic = true
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue