diff --git a/stacks/crowdsec/modules/crowdsec/main.tf b/stacks/crowdsec/modules/crowdsec/main.tf index 9141be3a..5c711ef0 100644 --- a/stacks/crowdsec/modules/crowdsec/main.tf +++ b/stacks/crowdsec/modules/crowdsec/main.tf @@ -112,6 +112,31 @@ resource "helm_release" "crowdsec" { wait_for_jobs = true } +# NodePort service for pfSense syslog → CrowdSec agent +# pfSense sends firewall logs to 10.0.20.202:30514 (any k8s node IP works) +resource "kubernetes_service" "crowdsec_syslog" { + metadata { + name = "crowdsec-syslog" + namespace = kubernetes_namespace.crowdsec.metadata[0].name + labels = { + app = "crowdsec-syslog" + } + } + spec { + type = "NodePort" + selector = { + "k8s-app" = "crowdsec" + type = "agent" + } + port { + name = "syslog-udp" + port = 514 + target_port = 514 + node_port = 30514 + protocol = "UDP" + } + } +} # Deployment for my custom dashboard that helps me unblock myself when I blocklist myself resource "kubernetes_deployment" "crowdsec-web" { diff --git a/stacks/crowdsec/modules/crowdsec/values.yaml b/stacks/crowdsec/modules/crowdsec/values.yaml index fcfbb3af..6eea5e06 100644 --- a/stacks/crowdsec/modules/crowdsec/values.yaml +++ b/stacks/crowdsec/modules/crowdsec/values.yaml @@ -17,6 +17,19 @@ agent: podName: traefik-* # as in crowdsec configuration, we need to specify the program name so the parser will match and parse logs program: traefik + # pfSense firewall logs via syslog + - source: syslog + listen_addr: "0.0.0.0" + listen_port: 514 + labels: + type: pf + # Mailserver logs for SMTP/IMAP brute-force detection + - namespace: mailserver + podName: mailserver-* + program: postfix + - namespace: mailserver + podName: mailserver-* + program: dovecot # Those are ENV variables env: # As it's a test, we don't want to share signals with CrowdSec so disable the Online API. @@ -24,10 +37,18 @@ agent: # value: "true" # As we are running Traefik, we want to install the Traefik collection - name: COLLECTIONS - value: "crowdsecurity/traefik crowdsecurity/base-http-scenarios crowdsecurity/http-cve" + value: "crowdsecurity/traefik crowdsecurity/base-http-scenarios crowdsecurity/http-cve crowdsecurity/pf crowdsecurity/postfix crowdsecurity/dovecot crowdsecurity/sshd" - name: SCENARIOS value: "" # value: "crowdsecurity/http-crawl-aggressive" + # Expose syslog port for pfSense log ingestion + additionalPorts: + - name: syslog-udp + containerPort: 514 + protocol: UDP + - name: syslog-tcp + containerPort: 514 + protocol: TCP # Mount custom scenarios into /etc/crowdsec/scenarios extraVolumeMounts: - name: custom-scenarios diff --git a/stacks/monitoring/modules/monitoring/dashboards/network_traffic.json b/stacks/monitoring/modules/monitoring/dashboards/network_traffic.json new file mode 100644 index 00000000..50c0ecd2 --- /dev/null +++ b/stacks/monitoring/modules/monitoring/dashboards/network_traffic.json @@ -0,0 +1,280 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "datasource", "uid": "grafana" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Network traffic monitoring via GoFlow2 NetFlow + DNS anomaly detection + CrowdSec", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "title": "GoFlow2 Status", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "mappings": [ + { "type": "value", "options": { "0": { "text": "DOWN", "color": "red" }, "1": { "text": "UP", "color": "green" } } } + ] + } + }, + "targets": [ + { "expr": "up{job=\"goflow2\"}", "legendFormat": "GoFlow2" } + ] + }, + { + "title": "NetFlow Bytes/s", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "unit": "Bps", + "thresholds": { + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 10485760 }, + { "color": "red", "value": 104857600 } + ] + } + } + }, + "targets": [ + { "expr": "rate(goflow2_flow_traffic_bytes_total[5m])", "legendFormat": "Total" } + ] + }, + { + "title": "NetFlow Flows Processed", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "unit": "short", + "thresholds": { + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + } + } + }, + "targets": [ + { "expr": "rate(goflow2_flow_process_nf_total[5m])", "legendFormat": "flows/s" } + ] + }, + { + "title": "CrowdSec Active Decisions", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "red", "value": 200 } + ] + } + } + }, + "targets": [ + { "expr": "cs_active_decisions", "legendFormat": "Decisions" } + ] + }, + { + "title": "DNS Queries (Last Hour)", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 5000 }, + { "color": "red", "value": 20000 } + ] + } + } + }, + "targets": [ + { "expr": "dns_anomaly_total_queries", "legendFormat": "Queries" } + ] + }, + { + "title": "DNS DGA Suspects", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 5 } + ] + } + } + }, + "targets": [ + { "expr": "dns_anomaly_dga_suspects", "legendFormat": "DGA Suspects" } + ] + }, + { + "title": "NetFlow Traffic Over Time", + "type": "timeseries", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, + "fieldConfig": { + "defaults": { + "unit": "Bps", + "custom": { "fillOpacity": 20, "lineWidth": 2 } + } + }, + "targets": [ + { "expr": "rate(goflow2_flow_traffic_bytes_total[5m])", "legendFormat": "Bytes/s" }, + { "expr": "rate(goflow2_flow_traffic_packets_total[5m])", "legendFormat": "Packets/s" } + ] + }, + { + "title": "NetFlow Processing", + "type": "timeseries", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, + "fieldConfig": { + "defaults": { + "custom": { "fillOpacity": 10, "lineWidth": 1 } + } + }, + "targets": [ + { "expr": "rate(goflow2_flow_process_nf_total[5m])", "legendFormat": "Flows processed/s (v{{ version }})" }, + { "expr": "rate(goflow2_flow_decoder_error_total[5m])", "legendFormat": "Decoder errors/s" }, + { "expr": "rate(goflow2_flow_process_nf_errors_total[5m])", "legendFormat": "Processing errors/s ({{ error }})" } + ] + }, + { + "title": "NetFlow Processing Delay", + "type": "timeseries", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 }, + "fieldConfig": { + "defaults": { + "unit": "s", + "custom": { "fillOpacity": 10, "lineWidth": 1 } + } + }, + "targets": [ + { "expr": "goflow2_flow_process_nf_delay_seconds{quantile=\"0.5\"}", "legendFormat": "p50 delay" }, + { "expr": "goflow2_flow_process_nf_delay_seconds{quantile=\"0.9\"}", "legendFormat": "p90 delay" }, + { "expr": "goflow2_flow_process_nf_delay_seconds{quantile=\"0.99\"}", "legendFormat": "p99 delay" } + ] + }, + { + "title": "CrowdSec Alerts & Decisions Over Time", + "type": "timeseries", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 }, + "fieldConfig": { + "defaults": { + "custom": { "fillOpacity": 15, "lineWidth": 2 } + } + }, + "targets": [ + { "expr": "cs_active_decisions", "legendFormat": "Active Decisions" }, + { "expr": "cs_alerts", "legendFormat": "Total Alerts" }, + { "expr": "rate(cs_lapi_bouncer_requests_total[5m])", "legendFormat": "Bouncer req/s" } + ] + }, + { + "title": "CrowdSec LAPI Request Latency", + "type": "timeseries", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 }, + "fieldConfig": { + "defaults": { + "unit": "s", + "custom": { "fillOpacity": 10, "lineWidth": 1 } + } + }, + "targets": [ + { "expr": "histogram_quantile(0.50, rate(cs_lapi_request_duration_seconds_bucket[5m]))", "legendFormat": "p50" }, + { "expr": "histogram_quantile(0.99, rate(cs_lapi_request_duration_seconds_bucket[5m]))", "legendFormat": "p99" } + ] + }, + { + "title": "DNS Metrics Over Time", + "type": "timeseries", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 }, + "fieldConfig": { + "defaults": { + "custom": { "fillOpacity": 15, "lineWidth": 2 } + } + }, + "targets": [ + { "expr": "dns_anomaly_total_queries", "legendFormat": "Total Queries" }, + { "expr": "dns_anomaly_nx_domain", "legendFormat": "NX Domain" }, + { "expr": "dns_anomaly_server_failure", "legendFormat": "SERVFAIL" }, + { "expr": "dns_anomaly_blocked", "legendFormat": "Blocked" }, + { "expr": "dns_anomaly_dga_suspects", "legendFormat": "DGA Suspects" } + ] + }, + { + "title": "DNS Anomaly Check Health", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 20 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "unit": "dateTimeFromNow", + "thresholds": { + "steps": [ + { "color": "green", "value": null } + ] + } + } + }, + "targets": [ + { "expr": "dns_anomaly_check_timestamp * 1000", "legendFormat": "Last Check" } + ] + } + ], + "schemaVersion": 39, + "tags": ["network", "security", "goflow2", "dns", "crowdsec"], + "templating": { "list": [] }, + "time": { "from": "now-6h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "Network Traffic & Adversary Detection", + "uid": "network-traffic-adversary", + "version": 1 +} diff --git a/stacks/monitoring/modules/monitoring/main.tf b/stacks/monitoring/modules/monitoring/main.tf index 048f21c9..9ffcb1c5 100644 --- a/stacks/monitoring/modules/monitoring/main.tf +++ b/stacks/monitoring/modules/monitoring/main.tf @@ -187,6 +187,129 @@ resource "kubernetes_cron_job_v1" "cloudsync_monitor" { } } +# ----------------------------------------------------------------------------- +# DNS Anomaly Monitor — query Technitium stats API, detect anomalies, push to Pushgateway +# Runs every 15 min. Checks for query spikes, high error rates, and suspicious patterns. +# ----------------------------------------------------------------------------- +resource "kubernetes_cron_job_v1" "dns_anomaly_monitor" { + metadata { + name = "dns-anomaly-monitor" + namespace = kubernetes_namespace.monitoring.metadata[0].name + } + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 3 + successful_jobs_history_limit = 3 + schedule = "*/15 * * * *" + job_template { + metadata {} + spec { + backoff_limit = 2 + ttl_seconds_after_finished = 300 + template { + metadata {} + spec { + container { + name = "dns-anomaly-monitor" + image = "docker.io/library/alpine" + command = ["/bin/sh", "-c", <<-EOT + set -euo pipefail + apk add --no-cache curl jq + + TECHNITIUM_URL="http://technitium-web.technitium.svc.cluster.local:5380" + + # Get main stats + STATS=$(curl -sf "$TECHNITIUM_URL/api/stats/get?token=&type=LastHour" 2>&1) || { + echo "ERROR: Failed to query Technitium stats API" + exit 1 + } + + # Parse key metrics + TOTAL_QUERIES=$(echo "$STATS" | jq -r '.response.stats.totalQueries // 0') + SERVER_FAILURE=$(echo "$STATS" | jq -r '.response.stats.serverFailure // 0') + NX_DOMAIN=$(echo "$STATS" | jq -r '.response.stats.nxDomain // 0') + BLOCKED=$(echo "$STATS" | jq -r '.response.stats.blocked // 0') + NO_ERROR=$(echo "$STATS" | jq -r '.response.stats.noError // 0') + + echo "DNS Stats (last hour): total=$TOTAL_QUERIES noError=$NO_ERROR nxDomain=$NX_DOMAIN serverFailure=$SERVER_FAILURE blocked=$BLOCKED" + + # Get top clients for anomaly context + TOP_CLIENTS=$(curl -sf "$TECHNITIUM_URL/api/stats/getTopClients?token=&type=LastHour&limit=10" 2>&1) || true + + # Get top domains for DGA/tunneling detection + TOP_DOMAINS=$(curl -sf "$TECHNITIUM_URL/api/stats/getTopDomains?token=&type=LastHour&limit=20" 2>&1) || true + + # Check for high-entropy domains (potential DGA) + DGA_SUSPECT=0 + if [ -n "$TOP_DOMAINS" ]; then + # Simple heuristic: domains with many consonant clusters or very long labels + DGA_SUSPECT=$(echo "$TOP_DOMAINS" | jq -r '[.response.topDomains[]?.name // empty | select(length > 30 or test("[bcdfghjklmnpqrstvwxyz]{5,}"))] | length') + fi + + # Push metrics to Pushgateway + cat <