From 3e9231ae0df6639cc73ed47a3258d3f507cda560 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 14 Apr 2026 20:03:44 +0000 Subject: [PATCH] feat: augment outage report template with debugging context - Expand service list: add Home Assistant, Actual Budget, Audiobookshelf, Linkwarden, Matrix, Paperless, Tandoor, FreshRSS, Frigate, HackMD, Excalidraw, Wealthfolio, Send, Stirling PDF - Add structured debugging fields: error type, scope (just me vs others), when it started, URL accessed - Fix user report parser to extract all form fields into status.json - Show error type, scope, and start time in status page report cards [ci skip] Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/ISSUE_TEMPLATE/outage-report.yml | 71 +++++++++++++++++++++--- stacks/status-page/index.html | 3 + stacks/status-page/main.tf | 41 +++++++++----- 3 files changed, 95 insertions(+), 20 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/outage-report.yml b/.github/ISSUE_TEMPLATE/outage-report.yml index 326cc002..fd001588 100644 --- a/.github/ISSUE_TEMPLATE/outage-report.yml +++ b/.github/ISSUE_TEMPLATE/outage-report.yml @@ -8,16 +8,30 @@ body: label: Affected Service description: Which service is affected? options: - - Nextcloud - - Immich - - Vaultwarden - - Grafana + - Nextcloud (files, calendar, contacts) + - Immich (photos) + - Vaultwarden (passwords) + - Mail (email, roundcube) + - Home Assistant + - Actual Budget + - Navidrome (music) + - Audiobookshelf (audiobooks) - Plex / Jellyfin - - Mail + - Grafana (dashboards) + - Linkwarden (bookmarks) + - Matrix (chat) + - Paperless-ngx (documents) + - Tandoor (recipes) + - FreshRSS (news) + - Frigate (cameras) + - HackMD (notes) + - Excalidraw (whiteboard) + - Wealthfolio / Finance + - Headscale / VPN - DNS - - VPN / Tailscale - Website / Blog - - Music (Navidrome / Freedify) + - Send (file sharing) + - Stirling PDF - Other validations: required: true @@ -29,6 +43,49 @@ body: placeholder: "e.g., Getting 502 errors when trying to access Nextcloud since about 3pm" validations: required: true + - type: dropdown + id: error_type + attributes: + label: What kind of error? + description: This helps us narrow down the issue faster. + options: + - Page won't load (timeout / connection refused) + - 502 Bad Gateway + - 503 Service Unavailable + - Login / authentication not working + - Slow / degraded performance + - Specific feature broken (app loads but something inside doesn't work) + - Data missing or incorrect + - Other / not sure + validations: + required: true + - type: dropdown + id: scope + attributes: + label: Is it just you or others too? + description: Helps us tell apart service outages from account/device issues. + options: + - Just me (others seem fine) + - Multiple people affected + - Not sure + validations: + required: false + - type: input + id: when + attributes: + label: When did it start? + description: Approximate time helps us correlate with logs and deployments. + placeholder: "e.g., about 3pm today, or since yesterday morning" + validations: + required: false + - type: input + id: url + attributes: + label: URL you were accessing (optional) + description: The exact URL helps us check the right endpoint. + placeholder: "e.g., https://nextcloud.viktorbarzin.me/apps/files" + validations: + required: false - type: input id: contact attributes: diff --git a/stacks/status-page/index.html b/stacks/status-page/index.html index c98adfd5..7ac2f82b 100644 --- a/stacks/status-page/index.html +++ b/stacks/status-page/index.html @@ -209,6 +209,9 @@ footer { color: var(--fg3); font-size: 11px; margin-top: 32px; padding-top: 16px html+='
'+esc(inc.title)+'
'; html+='
'+ago(created)+''; if(!isReport)html+=''+dur(created,end)+''; + if(isReport&&inc.error_type)html+=''+esc(inc.error_type)+''; + if(isReport&&inc.scope)html+=''+esc(inc.scope)+''; + if(isReport&&inc.when_started)html+='Since: '+esc(inc.when_started)+''; if(resolved)html+='Resolved'; html+='
'; if(inc.affected_services&&inc.affected_services.length){ diff --git a/stacks/status-page/main.tf b/stacks/status-page/main.tf index 246db52a..2ef663e0 100644 --- a/stacks/status-page/main.tf +++ b/stacks/status-page/main.tf @@ -385,22 +385,33 @@ ISSUES_REPO = "ViktorBarzin/infra" def has_label(issue, name): return any(l["name"].lower() == name.lower() for l in issue.get("labels", [])) -def parse_user_report_service(body): - """Extract service from GitHub Issue Form dropdown response.""" +def parse_form_field(body, heading): + """Extract value after a ### heading from GitHub Issue Form response.""" if not body: return None - for line in body.split("\n"): - stripped = line.strip() - if stripped and not stripped.startswith("#") and not stripped.startswith("_") and not stripped.startswith(" "Nextcloud" + if service and "(" in service: + service = service[:service.index("(")].strip() + return { + "service": service, + "error_type": parse_form_field(body, "what kind of error"), + "scope": parse_form_field(body, "is it just you"), + "when": parse_form_field(body, "when did it start"), + "url": parse_form_field(body, "url you were accessing"), + } + try: issues_url = "https://api.github.com/repos/" + ISSUES_REPO + "/issues" @@ -435,7 +446,8 @@ try: continue if has_label(issue, "incident"): continue # Already promoted to incident, skip duplicate - svc = parse_user_report_service(issue.get("body")) + ctx = parse_user_report_context(issue.get("body")) + svc = ctx["service"] user_reports.append({ "id": issue["number"], "title": issue["title"], @@ -443,6 +455,9 @@ try: "status": "open", "created_at": issue["created_at"], "affected_services": [svc] if svc else [], + "error_type": ctx.get("error_type"), + "scope": ctx.get("scope"), + "when_started": ctx.get("when"), "url": issue["html_url"], })