add network traffic monitoring and adversary detection
- CrowdSec: add syslog listener for pfSense firewall logs (NodePort 30514), add postfix/dovecot log acquisition, install pf/postfix/dovecot/sshd collections - Monitoring: add DNS anomaly CronJob (queries Technitium every 15m, DGA detection, pushes metrics to Pushgateway) - Grafana: add "Network Traffic & Adversary Detection" dashboard (GoFlow2 flows, CrowdSec decisions, DNS anomaly metrics) pfSense changes applied live: syslog forwarding to 10.0.20.202:30514, Snort suppress rules for http_inspect false positives, IPS connectivity policy enabled
This commit is contained in:
parent
877cd15b45
commit
55246c8b5d
4 changed files with 450 additions and 1 deletions
|
|
@ -112,6 +112,31 @@ resource "helm_release" "crowdsec" {
|
|||
wait_for_jobs = true
|
||||
}
|
||||
|
||||
# NodePort service for pfSense syslog → CrowdSec agent
|
||||
# pfSense sends firewall logs to 10.0.20.202:30514 (any k8s node IP works)
|
||||
resource "kubernetes_service" "crowdsec_syslog" {
|
||||
metadata {
|
||||
name = "crowdsec-syslog"
|
||||
namespace = kubernetes_namespace.crowdsec.metadata[0].name
|
||||
labels = {
|
||||
app = "crowdsec-syslog"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
type = "NodePort"
|
||||
selector = {
|
||||
"k8s-app" = "crowdsec"
|
||||
type = "agent"
|
||||
}
|
||||
port {
|
||||
name = "syslog-udp"
|
||||
port = 514
|
||||
target_port = 514
|
||||
node_port = 30514
|
||||
protocol = "UDP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Deployment for my custom dashboard that helps me unblock myself when I blocklist myself
|
||||
resource "kubernetes_deployment" "crowdsec-web" {
|
||||
|
|
|
|||
|
|
@ -17,6 +17,19 @@ agent:
|
|||
podName: traefik-*
|
||||
# as in crowdsec configuration, we need to specify the program name so the parser will match and parse logs
|
||||
program: traefik
|
||||
# pfSense firewall logs via syslog
|
||||
- source: syslog
|
||||
listen_addr: "0.0.0.0"
|
||||
listen_port: 514
|
||||
labels:
|
||||
type: pf
|
||||
# Mailserver logs for SMTP/IMAP brute-force detection
|
||||
- namespace: mailserver
|
||||
podName: mailserver-*
|
||||
program: postfix
|
||||
- namespace: mailserver
|
||||
podName: mailserver-*
|
||||
program: dovecot
|
||||
# Those are ENV variables
|
||||
env:
|
||||
# As it's a test, we don't want to share signals with CrowdSec so disable the Online API.
|
||||
|
|
@ -24,10 +37,18 @@ agent:
|
|||
# value: "true"
|
||||
# As we are running Traefik, we want to install the Traefik collection
|
||||
- name: COLLECTIONS
|
||||
value: "crowdsecurity/traefik crowdsecurity/base-http-scenarios crowdsecurity/http-cve"
|
||||
value: "crowdsecurity/traefik crowdsecurity/base-http-scenarios crowdsecurity/http-cve crowdsecurity/pf crowdsecurity/postfix crowdsecurity/dovecot crowdsecurity/sshd"
|
||||
- name: SCENARIOS
|
||||
value: ""
|
||||
# value: "crowdsecurity/http-crawl-aggressive"
|
||||
# Expose syslog port for pfSense log ingestion
|
||||
additionalPorts:
|
||||
- name: syslog-udp
|
||||
containerPort: 514
|
||||
protocol: UDP
|
||||
- name: syslog-tcp
|
||||
containerPort: 514
|
||||
protocol: TCP
|
||||
# Mount custom scenarios into /etc/crowdsec/scenarios
|
||||
extraVolumeMounts:
|
||||
- name: custom-scenarios
|
||||
|
|
|
|||
|
|
@ -0,0 +1,280 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": { "type": "datasource", "uid": "grafana" },
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Network traffic monitoring via GoFlow2 NetFlow + DNS anomaly detection + CrowdSec",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"title": "GoFlow2 Status",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "green", "value": 1 }
|
||||
]
|
||||
},
|
||||
"mappings": [
|
||||
{ "type": "value", "options": { "0": { "text": "DOWN", "color": "red" }, "1": { "text": "UP", "color": "green" } } }
|
||||
]
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "up{job=\"goflow2\"}", "legendFormat": "GoFlow2" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "NetFlow Bytes/s",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"unit": "Bps",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 10485760 },
|
||||
{ "color": "red", "value": 104857600 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "rate(goflow2_flow_traffic_bytes_total[5m])", "legendFormat": "Total" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "NetFlow Flows Processed",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "green", "value": 1 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "rate(goflow2_flow_process_nf_total[5m])", "legendFormat": "flows/s" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "CrowdSec Active Decisions",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 50 },
|
||||
{ "color": "red", "value": 200 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "cs_active_decisions", "legendFormat": "Decisions" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "DNS Queries (Last Hour)",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 5000 },
|
||||
{ "color": "red", "value": 20000 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "dns_anomaly_total_queries", "legendFormat": "Queries" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "DNS DGA Suspects",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "dns_anomaly_dga_suspects", "legendFormat": "DGA Suspects" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "NetFlow Traffic Over Time",
|
||||
"type": "timeseries",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps",
|
||||
"custom": { "fillOpacity": 20, "lineWidth": 2 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "rate(goflow2_flow_traffic_bytes_total[5m])", "legendFormat": "Bytes/s" },
|
||||
{ "expr": "rate(goflow2_flow_traffic_packets_total[5m])", "legendFormat": "Packets/s" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "NetFlow Processing",
|
||||
"type": "timeseries",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": { "fillOpacity": 10, "lineWidth": 1 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "rate(goflow2_flow_process_nf_total[5m])", "legendFormat": "Flows processed/s (v{{ version }})" },
|
||||
{ "expr": "rate(goflow2_flow_decoder_error_total[5m])", "legendFormat": "Decoder errors/s" },
|
||||
{ "expr": "rate(goflow2_flow_process_nf_errors_total[5m])", "legendFormat": "Processing errors/s ({{ error }})" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "NetFlow Processing Delay",
|
||||
"type": "timeseries",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"custom": { "fillOpacity": 10, "lineWidth": 1 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "goflow2_flow_process_nf_delay_seconds{quantile=\"0.5\"}", "legendFormat": "p50 delay" },
|
||||
{ "expr": "goflow2_flow_process_nf_delay_seconds{quantile=\"0.9\"}", "legendFormat": "p90 delay" },
|
||||
{ "expr": "goflow2_flow_process_nf_delay_seconds{quantile=\"0.99\"}", "legendFormat": "p99 delay" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "CrowdSec Alerts & Decisions Over Time",
|
||||
"type": "timeseries",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": { "fillOpacity": 15, "lineWidth": 2 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "cs_active_decisions", "legendFormat": "Active Decisions" },
|
||||
{ "expr": "cs_alerts", "legendFormat": "Total Alerts" },
|
||||
{ "expr": "rate(cs_lapi_bouncer_requests_total[5m])", "legendFormat": "Bouncer req/s" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "CrowdSec LAPI Request Latency",
|
||||
"type": "timeseries",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"custom": { "fillOpacity": 10, "lineWidth": 1 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "histogram_quantile(0.50, rate(cs_lapi_request_duration_seconds_bucket[5m]))", "legendFormat": "p50" },
|
||||
{ "expr": "histogram_quantile(0.99, rate(cs_lapi_request_duration_seconds_bucket[5m]))", "legendFormat": "p99" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "DNS Metrics Over Time",
|
||||
"type": "timeseries",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": { "fillOpacity": 15, "lineWidth": 2 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "dns_anomaly_total_queries", "legendFormat": "Total Queries" },
|
||||
{ "expr": "dns_anomaly_nx_domain", "legendFormat": "NX Domain" },
|
||||
{ "expr": "dns_anomaly_server_failure", "legendFormat": "SERVFAIL" },
|
||||
{ "expr": "dns_anomaly_blocked", "legendFormat": "Blocked" },
|
||||
{ "expr": "dns_anomaly_dga_suspects", "legendFormat": "DGA Suspects" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "DNS Anomaly Check Health",
|
||||
"type": "stat",
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 20 },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"unit": "dateTimeFromNow",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{ "expr": "dns_anomaly_check_timestamp * 1000", "legendFormat": "Last Check" }
|
||||
]
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["network", "security", "goflow2", "dns", "crowdsec"],
|
||||
"templating": { "list": [] },
|
||||
"time": { "from": "now-6h", "to": "now" },
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Network Traffic & Adversary Detection",
|
||||
"uid": "network-traffic-adversary",
|
||||
"version": 1
|
||||
}
|
||||
|
|
@ -187,6 +187,129 @@ resource "kubernetes_cron_job_v1" "cloudsync_monitor" {
|
|||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# DNS Anomaly Monitor — query Technitium stats API, detect anomalies, push to Pushgateway
|
||||
# Runs every 15 min. Checks for query spikes, high error rates, and suspicious patterns.
|
||||
# -----------------------------------------------------------------------------
|
||||
resource "kubernetes_cron_job_v1" "dns_anomaly_monitor" {
|
||||
metadata {
|
||||
name = "dns-anomaly-monitor"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Replace"
|
||||
failed_jobs_history_limit = 3
|
||||
successful_jobs_history_limit = 3
|
||||
schedule = "*/15 * * * *"
|
||||
job_template {
|
||||
metadata {}
|
||||
spec {
|
||||
backoff_limit = 2
|
||||
ttl_seconds_after_finished = 300
|
||||
template {
|
||||
metadata {}
|
||||
spec {
|
||||
container {
|
||||
name = "dns-anomaly-monitor"
|
||||
image = "docker.io/library/alpine"
|
||||
command = ["/bin/sh", "-c", <<-EOT
|
||||
set -euo pipefail
|
||||
apk add --no-cache curl jq
|
||||
|
||||
TECHNITIUM_URL="http://technitium-web.technitium.svc.cluster.local:5380"
|
||||
|
||||
# Get main stats
|
||||
STATS=$(curl -sf "$TECHNITIUM_URL/api/stats/get?token=&type=LastHour" 2>&1) || {
|
||||
echo "ERROR: Failed to query Technitium stats API"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse key metrics
|
||||
TOTAL_QUERIES=$(echo "$STATS" | jq -r '.response.stats.totalQueries // 0')
|
||||
SERVER_FAILURE=$(echo "$STATS" | jq -r '.response.stats.serverFailure // 0')
|
||||
NX_DOMAIN=$(echo "$STATS" | jq -r '.response.stats.nxDomain // 0')
|
||||
BLOCKED=$(echo "$STATS" | jq -r '.response.stats.blocked // 0')
|
||||
NO_ERROR=$(echo "$STATS" | jq -r '.response.stats.noError // 0')
|
||||
|
||||
echo "DNS Stats (last hour): total=$TOTAL_QUERIES noError=$NO_ERROR nxDomain=$NX_DOMAIN serverFailure=$SERVER_FAILURE blocked=$BLOCKED"
|
||||
|
||||
# Get top clients for anomaly context
|
||||
TOP_CLIENTS=$(curl -sf "$TECHNITIUM_URL/api/stats/getTopClients?token=&type=LastHour&limit=10" 2>&1) || true
|
||||
|
||||
# Get top domains for DGA/tunneling detection
|
||||
TOP_DOMAINS=$(curl -sf "$TECHNITIUM_URL/api/stats/getTopDomains?token=&type=LastHour&limit=20" 2>&1) || true
|
||||
|
||||
# Check for high-entropy domains (potential DGA)
|
||||
DGA_SUSPECT=0
|
||||
if [ -n "$TOP_DOMAINS" ]; then
|
||||
# Simple heuristic: domains with many consonant clusters or very long labels
|
||||
DGA_SUSPECT=$(echo "$TOP_DOMAINS" | jq -r '[.response.topDomains[]?.name // empty | select(length > 30 or test("[bcdfghjklmnpqrstvwxyz]{5,}"))] | length')
|
||||
fi
|
||||
|
||||
# Push metrics to Pushgateway
|
||||
cat <<METRICS | curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/dns-anomaly-monitor"
|
||||
# HELP dns_anomaly_total_queries Total DNS queries in last hour
|
||||
# TYPE dns_anomaly_total_queries gauge
|
||||
dns_anomaly_total_queries $TOTAL_QUERIES
|
||||
# HELP dns_anomaly_server_failure DNS server failures in last hour
|
||||
# TYPE dns_anomaly_server_failure gauge
|
||||
dns_anomaly_server_failure $SERVER_FAILURE
|
||||
# HELP dns_anomaly_nx_domain NX domain responses in last hour
|
||||
# TYPE dns_anomaly_nx_domain gauge
|
||||
dns_anomaly_nx_domain $NX_DOMAIN
|
||||
# HELP dns_anomaly_blocked Blocked queries in last hour
|
||||
# TYPE dns_anomaly_blocked gauge
|
||||
dns_anomaly_blocked $BLOCKED
|
||||
# HELP dns_anomaly_dga_suspects Domains with DGA-like characteristics
|
||||
# TYPE dns_anomaly_dga_suspects gauge
|
||||
dns_anomaly_dga_suspects $DGA_SUSPECT
|
||||
# HELP dns_anomaly_check_timestamp Last successful check timestamp
|
||||
# TYPE dns_anomaly_check_timestamp gauge
|
||||
dns_anomaly_check_timestamp $(date +%s)
|
||||
METRICS
|
||||
|
||||
# Calculate average for spike detection (store as a simple rolling metric)
|
||||
# The Prometheus alert rule compares current vs stored average
|
||||
AVG_FILE="/tmp/dns_avg"
|
||||
if [ -f "$AVG_FILE" ]; then
|
||||
PREV_AVG=$(cat "$AVG_FILE")
|
||||
NEW_AVG=$(( (PREV_AVG + TOTAL_QUERIES) / 2 ))
|
||||
else
|
||||
NEW_AVG=$TOTAL_QUERIES
|
||||
fi
|
||||
|
||||
cat <<METRICS | curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/dns-anomaly-monitor"
|
||||
# HELP dns_anomaly_avg_queries Rolling average DNS queries
|
||||
# TYPE dns_anomaly_avg_queries gauge
|
||||
dns_anomaly_avg_queries $NEW_AVG
|
||||
METRICS
|
||||
|
||||
echo "DNS anomaly check complete (DGA suspects: $DGA_SUSPECT)"
|
||||
EOT
|
||||
]
|
||||
resources {
|
||||
requests = {
|
||||
memory = "32Mi"
|
||||
cpu = "10m"
|
||||
}
|
||||
limits = {
|
||||
memory = "64Mi"
|
||||
}
|
||||
}
|
||||
}
|
||||
dns_config {
|
||||
option {
|
||||
name = "ndots"
|
||||
value = "2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_manifest" "status_redirect_middleware" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue