The remaining verbs that pass the "saves reasoning, not just typing" test the user posed mid-session: each encodes the non-obvious which-endpoint-reached-how resolution otherwise re-derived every time. (Same test deprioritized node-ssh and secret-get aliasing — thin wrappers over commands already known.) - net check <host> [path]: two-legged reachability — external (public DNS→CF) vs internal (Traefik LB) — so you see WHERE a break is, not just that one path works. (live: surfaced the LB at 6ms vs CF 77ms.) - dns lookup <name> [type]: Technitium (10.0.20.201) vs public (1.1.1.1) diff. - metrics query "<promql>" / metrics alerts: Prometheus via the LB (prometheus-query.viktorbarzin.lan); alerts uses the synthetic ALERTS series since the query frontend has no /api/v1/alerts and Alertmanager has no ingress. - logs query "<logql>" [--since 1h] [--limit N]: Loki range query via the LB. All reach auth-free internal ingresses through the LB (Go form of curl --resolve host:443:10.0.20.203) — no port-forward, no kubectl. In-cluster- only endpoints (Alertmanager v2) deliberately out of scope. Verified live before building; all five smoke-tested green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
197 lines
4.5 KiB
Go
197 lines
4.5 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/url"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
promHost = "prometheus-query.viktorbarzin.lan"
|
|
lokiHost = "loki.viktorbarzin.lan"
|
|
)
|
|
|
|
func obsCommands() []Command {
|
|
return []Command{
|
|
{Path: []string{"metrics", "query"}, Tier: TierRead,
|
|
Summary: `Prometheus instant query: metrics query "<promql>" [--json]`, Run: metricsQuery},
|
|
{Path: []string{"metrics", "alerts"}, Tier: TierRead,
|
|
Summary: "list currently firing Prometheus alerts", Run: metricsAlerts},
|
|
{Path: []string{"logs", "query"}, Tier: TierRead,
|
|
Summary: `Loki query (last --since, default 1h): logs query "<logql>" [--since 1h] [--limit N] [--json]`, Run: logsQuery},
|
|
}
|
|
}
|
|
|
|
// queryArg joins non-flag args into the query (PromQL/LogQL should normally be
|
|
// passed as a single quoted argument; this also tolerates unquoted multi-token).
|
|
func queryArg(args []string, valueFlags map[string]bool) string {
|
|
var parts []string
|
|
for i := 0; i < len(args); i++ {
|
|
a := args[i]
|
|
if valueFlags[a] {
|
|
i++
|
|
continue
|
|
}
|
|
if strings.HasPrefix(a, "-") {
|
|
continue
|
|
}
|
|
parts = append(parts, a)
|
|
}
|
|
return strings.Join(parts, " ")
|
|
}
|
|
|
|
func labelStr(m map[string]string) string {
|
|
name := m["__name__"]
|
|
var kv []string
|
|
for k, v := range m {
|
|
if k != "__name__" {
|
|
kv = append(kv, k+"="+v)
|
|
}
|
|
}
|
|
sort.Strings(kv)
|
|
return name + "{" + strings.Join(kv, ",") + "}"
|
|
}
|
|
|
|
func metricsQuery(args []string) error {
|
|
q := queryArg(args, nil)
|
|
if q == "" {
|
|
return fmt.Errorf(`usage: homelab metrics query "<promql>" [--json]`)
|
|
}
|
|
v := url.Values{}
|
|
v.Set("query", q)
|
|
body, err := lbGetBody(promHost, "/api/v1/query", v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if containsArg(args, "--json") {
|
|
fmt.Println(string(body))
|
|
return nil
|
|
}
|
|
var r struct {
|
|
Data struct {
|
|
Result []struct {
|
|
Metric map[string]string `json:"metric"`
|
|
Value []interface{} `json:"value"`
|
|
} `json:"result"`
|
|
} `json:"data"`
|
|
}
|
|
if err := json.Unmarshal(body, &r); err != nil {
|
|
fmt.Println(string(body))
|
|
return nil
|
|
}
|
|
if len(r.Data.Result) == 0 {
|
|
fmt.Println("(no series)")
|
|
return nil
|
|
}
|
|
for _, s := range r.Data.Result {
|
|
val := ""
|
|
if len(s.Value) == 2 {
|
|
val = fmt.Sprint(s.Value[1])
|
|
}
|
|
fmt.Printf("%-14s %s\n", val, labelStr(s.Metric))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func metricsAlerts(args []string) error {
|
|
// prometheus-query is a query-only frontend (no /api/v1/alerts); the firing
|
|
// set is exposed as the synthetic ALERTS series, queryable the normal way.
|
|
v := url.Values{}
|
|
v.Set("query", `ALERTS{alertstate="firing"}`)
|
|
body, err := lbGetBody(promHost, "/api/v1/query", v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if containsArg(args, "--json") {
|
|
fmt.Println(string(body))
|
|
return nil
|
|
}
|
|
var r struct {
|
|
Data struct {
|
|
Result []struct {
|
|
Metric map[string]string `json:"metric"`
|
|
} `json:"result"`
|
|
} `json:"data"`
|
|
}
|
|
if err := json.Unmarshal(body, &r); err != nil {
|
|
fmt.Println(string(body))
|
|
return nil
|
|
}
|
|
if len(r.Data.Result) == 0 {
|
|
fmt.Println("(no firing alerts)")
|
|
return nil
|
|
}
|
|
for _, a := range r.Data.Result {
|
|
m := a.Metric
|
|
scope := ""
|
|
for _, k := range []string{"namespace", "deployment", "instance", "job", "node"} {
|
|
if v := m[k]; v != "" {
|
|
scope = k + "=" + v
|
|
break
|
|
}
|
|
}
|
|
fmt.Printf("%-9s %-34s %s\n", m["severity"], m["alertname"], scope)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func logsQuery(args []string) error {
|
|
q := queryArg(args, map[string]bool{"--since": true, "--limit": true})
|
|
if q == "" {
|
|
return fmt.Errorf(`usage: homelab logs query "<logql>" [--since 1h] [--limit N] [--json]`)
|
|
}
|
|
since := flagValue(args, "--since")
|
|
if since == "" {
|
|
since = "1h"
|
|
}
|
|
dur, err := time.ParseDuration(since)
|
|
if err != nil {
|
|
return fmt.Errorf("bad --since %q: %w", since, err)
|
|
}
|
|
limit := flagValue(args, "--limit")
|
|
if limit == "" {
|
|
limit = "100"
|
|
}
|
|
end := time.Now()
|
|
v := url.Values{}
|
|
v.Set("query", q)
|
|
v.Set("limit", limit)
|
|
v.Set("start", strconv.FormatInt(end.Add(-dur).UnixNano(), 10))
|
|
v.Set("end", strconv.FormatInt(end.UnixNano(), 10))
|
|
body, err := lbGetBody(lokiHost, "/loki/api/v1/query_range", v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if containsArg(args, "--json") {
|
|
fmt.Println(string(body))
|
|
return nil
|
|
}
|
|
var r struct {
|
|
Data struct {
|
|
Result []struct {
|
|
Values [][]string `json:"values"`
|
|
} `json:"result"`
|
|
} `json:"data"`
|
|
}
|
|
if err := json.Unmarshal(body, &r); err != nil {
|
|
fmt.Println(string(body))
|
|
return nil
|
|
}
|
|
n := 0
|
|
for _, s := range r.Data.Result {
|
|
for _, val := range s.Values {
|
|
if len(val) == 2 {
|
|
fmt.Println(val[1])
|
|
n++
|
|
}
|
|
}
|
|
}
|
|
if n == 0 {
|
|
fmt.Println("(no log lines)")
|
|
}
|
|
return nil
|
|
}
|