diff --git a/.woodpecker/default.yml b/.woodpecker/default.yml index d46f5ae1..dc77913e 100644 --- a/.woodpecker/default.yml +++ b/.woodpecker/default.yml @@ -324,13 +324,8 @@ steps: fi GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git push origin master - # ── Slack notification ── - - | - PLATFORM_COUNT=$(wc -l < .platform_apply 2>/dev/null | tr -d ' ') - APP_COUNT=$(wc -l < .app_apply 2>/dev/null | tr -d ' ') - curl -s -X POST -H 'Content-type: application/json' \ - --data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: infra pipeline ${CI_PIPELINE_STATUS} (platform:${PLATFORM_COUNT}, apps:${APP_COUNT})\"}" \ - "$SLACK_WEBHOOK" || true + # (No Slack post on success — Viktor 2026-07-02: CI notifies on FAILED + # runs only; the notify-failure step below covers those.) # Slack on failure (runs even if apply step fails) - name: notify-failure diff --git a/.woodpecker/drift-detection.yml b/.woodpecker/drift-detection.yml index b2a552f4..50287d2d 100644 --- a/.woodpecker/drift-detection.yml +++ b/.woodpecker/drift-detection.yml @@ -147,13 +147,30 @@ steps: echo "Drift: ${DRIFTED:-none}" echo "Errors: ${ERRORS:-none}" - # ── Slack alert if drift found ── + # ── Slack only when something is WRONG (drift or errors) ── + # All-clean runs are silent (Viktor 2026-07-02: CI notifies on + # failed/actionable runs only; clean is the daily normal). if [ -n "$DRIFTED" ]; then curl -s -X POST -H 'Content-type: application/json' \ --data "{\"channel\":\"general\",\"text\":\":warning: Drift detected in:${DRIFTED}\nClean: ${CLEAN} stacks. Errors:${ERRORS:-none}\"}" \ "$SLACK_WEBHOOK" || true - else + elif [ -n "$ERRORS" ]; then curl -s -X POST -H 'Content-type: application/json' \ - --data "{\"channel\":\"general\",\"text\":\":white_check_mark: Drift detection: all ${CLEAN} stacks clean${ERRORS:+. Errors: $ERRORS}\"}" \ + --data "{\"channel\":\"general\",\"text\":\":red_circle: Drift detection had errors: ${ERRORS} (clean: ${CLEAN})\"}" \ "$SLACK_WEBHOOK" || true fi + + # Hard-failure catch: the in-script posts above never run if the step + # itself crashes early — this step is the only signal for that case. + - name: notify-failure + image: curlimages/curl + commands: + - | + curl -s -X POST -H 'Content-type: application/json' \ + --data "{\"channel\":\"general\",\"text\":\":red_circle: Drift-detection pipeline FAILED (crashed before reporting)\"}" \ + "$SLACK_WEBHOOK" || true + environment: + SLACK_WEBHOOK: + from_secret: slack_webhook + when: + status: [failure] diff --git a/.woodpecker/postmortem-todos.yml b/.woodpecker/postmortem-todos.yml index 68330272..0af2b950 100644 --- a/.woodpecker/postmortem-todos.yml +++ b/.woodpecker/postmortem-todos.yml @@ -28,6 +28,7 @@ steps: from_secret: slack_webhook commands: - apk add --no-cache curl - - "curl -sf -X POST https://hooks.slack.com/services/$SLACK_WEBHOOK -H 'Content-Type: application/json' -d '{\"text\": \"Post-mortem TODO pipeline completed\"}' || true" + - "curl -sf -X POST https://hooks.slack.com/services/$SLACK_WEBHOOK -H 'Content-Type: application/json' -d '{\"text\": \":red_circle: Post-mortem TODO pipeline FAILED\"}' || true" when: - - status: [success, failure] + # Failure-only (Viktor 2026-07-02): CI notifies on failed runs only. + - status: [failure] diff --git a/.woodpecker/pve-nfs-exports-sync.yml b/.woodpecker/pve-nfs-exports-sync.yml index 54aea68a..7a3cb3e8 100644 --- a/.woodpecker/pve-nfs-exports-sync.yml +++ b/.woodpecker/pve-nfs-exports-sync.yml @@ -58,7 +58,8 @@ steps: commands: - | curl -s -X POST -H 'Content-type: application/json' \ - --data "{\"channel\":\"general\",\"text\":\"PVE /etc/exports sync: ${CI_PIPELINE_STATUS}\"}" \ + --data "{\"channel\":\"general\",\"text\":\":red_circle: PVE /etc/exports sync FAILED\"}" \ "$SLACK_WEBHOOK" || true when: - status: [success, failure] + # Failure-only (Viktor 2026-07-02): CI notifies on failed runs only. + status: [failure] diff --git a/.woodpecker/registry-config-sync.yml b/.woodpecker/registry-config-sync.yml index aad59fbe..9368a35f 100644 --- a/.woodpecker/registry-config-sync.yml +++ b/.woodpecker/registry-config-sync.yml @@ -151,7 +151,8 @@ steps: commands: - | curl -s -X POST -H 'Content-type: application/json' \ - --data "{\"channel\":\"general\",\"text\":\"Registry config sync on 10.0.20.10: ${CI_PIPELINE_STATUS}\"}" \ + --data "{\"channel\":\"general\",\"text\":\":red_circle: Registry config sync on 10.0.20.10 FAILED\"}" \ "$SLACK_WEBHOOK" || true when: - status: [success, failure] + # Failure-only (Viktor 2026-07-02): CI notifies on failed runs only. + status: [failure] diff --git a/.woodpecker/renew-tls.yml b/.woodpecker/renew-tls.yml index cd93fe7c..45e17028 100644 --- a/.woodpecker/renew-tls.yml +++ b/.woodpecker/renew-tls.yml @@ -71,10 +71,11 @@ steps: commands: - | curl -s -X POST -H 'Content-type: application/json' \ - --data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: TLS certificate renewal ${CI_PIPELINE_STATUS}\"}" \ + --data "{\"channel\":\"general\",\"text\":\":red_circle: Woodpecker CI: TLS certificate renewal FAILED\"}" \ "$SLACK_WEBHOOK" || true environment: SLACK_WEBHOOK: from_secret: slack_webhook when: - status: [success, failure] + # Failure-only (Viktor 2026-07-02): successful renewals are routine. + status: [failure] diff --git a/docs/architecture/ci-cd.md b/docs/architecture/ci-cd.md index 5a9c3722..b8cfcdd5 100644 --- a/docs/architecture/ci-cd.md +++ b/docs/architecture/ci-cd.md @@ -293,7 +293,9 @@ The infra repo runs on Woodpecker via **two** forge registrations: the Forgejo forge (repo id 82, registered 2026-06-08) and the legacy GitHub forge (repo id 1). Pushes to **Forgejo** `master` fire `.woodpecker/default.yml` (changed-stacks terragrunt apply, in `infra-ci`) plus the `notify-nonadmin-push` -Slack audit step. Operational facts (2026-06-10): +Slack audit step. **Slack policy (2026-07-02): every infra pipeline posts only +on FAILURE** (plus the non-admin audit post and drift/error findings) — routine +successful runs are silent. Operational facts (2026-06-10): - **Webhook URL is the IN-CLUSTER service**: `http://woodpecker-server.woodpecker.svc.cluster.local/api/hook?...` (PATCHed @@ -375,7 +377,8 @@ steps: notify: image: plugins/slack when: - status: [success, failure] + # Failure-only (2026-07-02 policy): CI notifies about failed runs only. + status: [failure] ``` ### CI/CD secrets sync