ISSUE RESOLVED: - Root cause: Loki's 15GB iSCSI PVC was completely full - Symptom: 'no space left on device' errors during TSDB operations - Impact: Loki service completely down, logging unavailable - Side effects: Contributed to node2 containerd corruption incident SOLUTION APPLIED: - Expanded PVC storage: 15Gi → 50Gi via direct kubectl patch - Triggered pod restart to complete filesystem resize - Verified successful expansion and service recovery CURRENT STATUS: ✅ PVC: 50Gi capacity (iscsi-truenas storage class) ✅ Loki StatefulSet: 1/1 ready ✅ Loki Pod: 2/2 containers running ✅ Service: Successfully processing log streams ✅ No storage errors in recent logs TERRAFORM ALIGNED: - Updated loki.yaml persistence.size to match actual PVC - Infrastructure code now reflects deployed state [ci skip] - Emergency fix applied locally first due to service outage
110 lines
2 KiB
YAML
110 lines
2 KiB
YAML
loki:
|
|
commonConfig:
|
|
replication_factor: 1
|
|
schemaConfig:
|
|
configs:
|
|
- from: "2025-04-01"
|
|
store: tsdb
|
|
object_store: filesystem
|
|
schema: v13
|
|
index:
|
|
prefix: loki_index_
|
|
period: 24h
|
|
ingester:
|
|
chunk_idle_period: 12h
|
|
max_chunk_age: 24h
|
|
chunk_retain_period: 1m
|
|
chunk_target_size: 1572864
|
|
wal:
|
|
dir: /loki-wal
|
|
pattern_ingester:
|
|
enabled: true
|
|
limits_config:
|
|
allow_structured_metadata: true
|
|
volume_enabled: true
|
|
retention_period: 720h
|
|
compactor:
|
|
retention_enabled: true
|
|
working_directory: /var/loki/compactor
|
|
compaction_interval: 1h
|
|
delete_request_store: filesystem
|
|
ruler:
|
|
enable_api: true
|
|
storage:
|
|
type: local
|
|
local:
|
|
directory: /loki/rules
|
|
alertmanager_url: http://prometheus-alertmanager.monitoring.svc.cluster.local:9093
|
|
ring:
|
|
kvstore:
|
|
store: inmemory
|
|
rule_path: /var/loki/scratch
|
|
storage:
|
|
type: "filesystem"
|
|
auth_enabled: false
|
|
|
|
minio:
|
|
enabled: false
|
|
|
|
deploymentMode: SingleBinary
|
|
|
|
singleBinary:
|
|
replicas: 1
|
|
persistence:
|
|
enabled: true
|
|
size: 50Gi
|
|
storageClass: "iscsi-truenas"
|
|
extraVolumes:
|
|
- name: wal
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: 2Gi
|
|
- name: rules
|
|
configMap:
|
|
name: loki-alert-rules
|
|
extraVolumeMounts:
|
|
- name: wal
|
|
mountPath: /loki-wal
|
|
- name: rules
|
|
mountPath: /loki/rules/fake
|
|
resources:
|
|
requests:
|
|
cpu: 250m
|
|
memory: 2Gi
|
|
limits:
|
|
cpu: "1"
|
|
memory: 4Gi
|
|
|
|
# Zero out replica counts of other deployment modes
|
|
backend:
|
|
replicas: 0
|
|
read:
|
|
replicas: 0
|
|
write:
|
|
replicas: 0
|
|
ingester:
|
|
replicas: 0
|
|
querier:
|
|
replicas: 0
|
|
queryFrontend:
|
|
replicas: 0
|
|
queryScheduler:
|
|
replicas: 0
|
|
distributor:
|
|
replicas: 0
|
|
compactor:
|
|
replicas: 0
|
|
indexGateway:
|
|
replicas: 0
|
|
bloomCompactor:
|
|
replicas: 0
|
|
bloomGateway:
|
|
replicas: 0
|
|
|
|
# Disable optional components for single binary mode
|
|
gateway:
|
|
enabled: false
|
|
chunksCache:
|
|
enabled: false
|
|
resultsCache:
|
|
enabled: false
|