add loki + alloy deployments for logs collection [ci skip]

This commit is contained in:
Viktor Barzin 2025-05-04 11:22:12 +00:00
parent 0d8b6b7480
commit c49e4d0a86
6 changed files with 2489 additions and 0 deletions

View file

@ -0,0 +1,100 @@
alloy:
configMap:
content: |-
// Write your Alloy config here:
logging {
level = "info"
format = "logfmt"
}
loki.write "default" {
endpoint {
url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push"
}
}
// discovery.kubernetes allows you to find scrape targets from Kubernetes resources.
// It watches cluster state and ensures targets are continually synced with what is currently running in your cluster.
discovery.kubernetes "pod" {
role = "pod"
}
// discovery.relabel rewrites the label set of the input targets by applying one or more relabeling rules.
// If no rules are defined, then the input targets are exported as-is.
discovery.relabel "pod_logs" {
targets = discovery.kubernetes.pod.targets
// Label creation - "namespace" field from "__meta_kubernetes_namespace"
rule {
source_labels = ["__meta_kubernetes_namespace"]
action = "replace"
target_label = "namespace"
}
// Label creation - "pod" field from "__meta_kubernetes_pod_name"
rule {
source_labels = ["__meta_kubernetes_pod_name"]
action = "replace"
target_label = "pod"
}
// Label creation - "container" field from "__meta_kubernetes_pod_container_name"
rule {
source_labels = ["__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "container"
}
// Label creation - "app" field from "__meta_kubernetes_pod_label_app_kubernetes_io_name"
rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
action = "replace"
target_label = "app"
}
// Label creation - "job" field from "__meta_kubernetes_namespace" and "__meta_kubernetes_pod_container_name"
// Concatenate values __meta_kubernetes_namespace/__meta_kubernetes_pod_container_name
rule {
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "job"
separator = "/"
replacement = "$1"
}
// Label creation - "container" field from "__meta_kubernetes_pod_uid" and "__meta_kubernetes_pod_container_name"
// Concatenate values __meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name.log
rule {
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "__path__"
separator = "/"
replacement = "/var/log/pods/*$1/*.log"
}
// Label creation - "container_runtime" field from "__meta_kubernetes_pod_container_id"
rule {
source_labels = ["__meta_kubernetes_pod_container_id"]
action = "replace"
target_label = "container_runtime"
regex = "^(\\S+):\\/\\/.+$"
replacement = "$1"
}
}
// loki.source.kubernetes tails logs from Kubernetes containers using the Kubernetes API.
loki.source.kubernetes "pod_logs" {
targets = discovery.relabel.pod_logs.output
forward_to = [loki.process.pod_logs.receiver]
}
// loki.process receives log entries from other Loki components, applies one or more processing stages,
// and forwards the results to the list of receivers in the component's arguments.
loki.process "pod_logs" {
stage.static_labels {
values = {
cluster = "default",
}
}
forward_to = [loki.write.default.receiver]
}

View file

@ -0,0 +1,288 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Logs collected from Kubernetes, stored in Loki",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 15141,
"graphTooltip": 0,
"id": 25,
"links": [],
"panels": [
{
"datasource": {
"type": "loki",
"uid": "fejvsai4fvvggf"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "fejvsai4fvvggf"
},
"editorMode": "code",
"expr": "sum(count_over_time({namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\" [$__interval]))",
"instant": false,
"legendFormat": "Log count",
"queryType": "range",
"range": true,
"refId": "A"
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "loki",
"uid": "fejvsai4fvvggf"
},
"description": "Logs from services running in Kubernetes",
"gridPos": {
"h": 25,
"w": 24,
"x": 0,
"y": 4
},
"id": 2,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "fejvsai4fvvggf"
},
"editorMode": "code",
"expr": "{namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\"",
"queryType": "range",
"refId": "A"
}
],
"type": "logs"
}
],
"refresh": "5s",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"description": "String to search for",
"hide": 0,
"label": "Search Query",
"name": "query",
"options": [
{
"selected": true,
"text": "",
"value": ""
}
],
"query": "",
"skipUrlSync": false,
"type": "textbox"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"dbaas"
],
"value": [
"dbaas"
]
},
"datasource": {
"type": "loki",
"uid": "fejvsai4fvvggf"
},
"definition": "label_values(namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [],
"query": "label_values(namespace)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "loki",
"uid": "fejvsai4fvvggf"
},
"definition": "label_values(stream)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "stream",
"options": [],
"query": "label_values(stream)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "loki",
"uid": "fejvsai4fvvggf"
},
"definition": "label_values(container)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "container",
"options": [],
"query": "label_values(container)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Loki Kubernetes Logs",
"uid": "o6-BGgnnk",
"version": 2,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,78 @@
---
cluster:
name: default
destinations:
- name: loki
type: loki
url: http://loki-gateway.monitoring.svc.cluster.local/loki/api/v1/push
clusterEvents:
enabled: false
collector: alloy-logs
namespaces:
- dbaas
- immich
- authentik
- mailserver
- crowdsec
- descheduler
- calibre
- monitoring
- ingress-nginx
- vaultwarden
nodeLogs:
enabled: false
podLogs:
enabled: true
gatherMethod: kubernetesApi
collector: alloy-logs
labelsToKeep:
[
"app_kubernetes_io_name",
"container",
"instance",
"job",
"level",
"namespace",
"service_name",
"service_namespace",
"deployment_environment",
"deployment_environment_name",
]
structuredMetadata:
pod: pod # Set structured metadata "pod" from label "pod"
namespaces:
- dbaas
- immich
- authentik
- mailserver
- crowdsec
- descheduler
- calibre
- monitoring
- ingress-nginx
- vaultwarden
# Collectors
alloy-singleton:
enabled: false
alloy-metrics:
enabled: false
alloy-logs:
enabled: true
# Required when using the Kubernetes API to pod logs
alloy:
mounts:
varlog: false
clustering:
enabled: true
alloy-profiles:
enabled: false
alloy-receiver:
enabled: false

View file

@ -0,0 +1,64 @@
loki:
commonConfig:
replication_factor: 1
schemaConfig:
configs:
- from: "2025-04-01"
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: loki_index_
period: 24h
pattern_ingester:
enabled: true
limits_config:
allow_structured_metadata: true
volume_enabled: true
ruler:
enable_api: true
storage:
type: "filesystem"
persistence:
enabled: true
size: 15Gi
accessModes:
- ReadWriteOnce
# Auth requires a revers proxy providing basic auth
# https://grafana.com/docs/loki/latest/operations/authentication/
auth_enabled: false
minio:
enabled: false
deploymentMode: SingleBinary
singleBinary:
replicas: 1
# Zero out replica counts of other deployment modes
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
ingester:
replicas: 0
querier:
replicas: 0
queryFrontend:
replicas: 0
queryScheduler:
replicas: 0
distributor:
replicas: 0
compactor:
replicas: 0
indexGateway:
replicas: 0
bloomCompactor:
replicas: 0
bloomGateway:
replicas: 0

View file

@ -526,3 +526,87 @@ resource "kubernetes_service" "snmp-exporter" {
}
}
}
resource "helm_release" "loki" {
namespace = "monitoring"
create_namespace = true
name = "loki"
repository = "https://grafana.github.io/helm-charts"
chart = "loki"
values = [templatefile("${path.module}/loki.yaml", {})]
atomic = true
timeout = 120
}
resource "kubernetes_persistent_volume" "loki" {
metadata {
name = "loki"
}
spec {
capacity = {
storage = "15Gi"
}
access_modes = ["ReadWriteOnce"]
persistent_volume_source {
nfs {
path = "/mnt/main/loki/loki"
server = "10.0.10.15"
}
}
persistent_volume_reclaim_policy = "Retain"
volume_mode = "Filesystem"
}
}
resource "kubernetes_persistent_volume" "loki-minio" {
metadata {
name = "loki-minio"
}
spec {
capacity = {
storage = "15Gi"
}
access_modes = ["ReadWriteMany"]
persistent_volume_source {
nfs {
path = "/mnt/main/loki/minio"
server = "10.0.10.15"
}
}
persistent_volume_reclaim_policy = "Retain"
volume_mode = "Filesystem"
}
}
# https://grafana.com/docs/alloy/latest/configure/kubernetes/
resource "helm_release" "alloy" {
namespace = "monitoring"
create_namespace = true
name = "alloy"
repository = "https://grafana.github.io/helm-charts"
chart = "alloy"
atomic = true
}
# Increase open file limits as alloy is reading files:
# https://serverfault.com/questions/1137211/failed-to-create-fsnotify-watcher-too-many-open-files
# run for all nodes using :
# for n in $(kbn | awk '{print $1}'); do echo $n; s wizard@$n 'sudo sysctl -w fs.inotify.max_user_watches=2099999999; sudo sysctl -w fs.inotify.max_user_instances=2099999999;sudo sysctl -w fs.inotify.max_queued_events=2099999999'; done
resource "helm_release" "k8s-monitoring" {
namespace = "monitoring"
create_namespace = true
name = "k8s-monitoring"
repository = "https://grafana.github.io/helm-charts"
chart = "k8s-monitoring"
values = [templatefile("${path.module}/k8s-monitoring-values.yaml", {})]
atomic = true
}