homelab: v0.5.0 — net/dns/metrics/logs probes (endpoint resolution)

The remaining verbs that pass the "saves reasoning, not just typing" test the
user posed mid-session: each encodes the non-obvious which-endpoint-reached-how
resolution otherwise re-derived every time. (Same test deprioritized node-ssh
and secret-get aliasing — thin wrappers over commands already known.)

- net check <host> [path]: two-legged reachability — external (public DNS→CF)
  vs internal (Traefik LB) — so you see WHERE a break is, not just that one path
  works. (live: surfaced the LB at 6ms vs CF 77ms.)
- dns lookup <name> [type]: Technitium (10.0.20.201) vs public (1.1.1.1) diff.
- metrics query "<promql>" / metrics alerts: Prometheus via the LB
  (prometheus-query.viktorbarzin.lan); alerts uses the synthetic ALERTS series
  since the query frontend has no /api/v1/alerts and Alertmanager has no ingress.
- logs query "<logql>" [--since 1h] [--limit N]: Loki range query via the LB.

All reach auth-free internal ingresses through the LB (Go form of
curl --resolve host:443:10.0.20.203) — no port-forward, no kubectl. In-cluster-
only endpoints (Alertmanager v2) deliberately out of scope. Verified live before
building; all five smoke-tested green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-19 11:27:31 +00:00
parent 9189560ac3
commit e91e1612dd
9 changed files with 466 additions and 3 deletions

83
cli/cmd_net.go Normal file
View file

@ -0,0 +1,83 @@
package main
import (
"fmt"
"strings"
"time"
)
func netCommands() []Command {
return []Command{
{Path: []string{"net", "check"}, Tier: TierRead,
Summary: "reachability of <host>[/path]: external (public DNS→CF) vs internal (Traefik LB)", Run: netCheck},
{Path: []string{"dns", "lookup"}, Tier: TierRead,
Summary: "resolve <name> via Technitium (10.0.20.201) and public (1.1.1.1), diffed", Run: dnsLookup},
}
}
func fmtProbe(code int, d time.Duration, err error) string {
if err != nil {
return "ERR " + err.Error()
}
return fmt.Sprintf("HTTP %d %dms", code, d.Milliseconds())
}
func netCheck(args []string) error {
host, rest := firstPositional(args)
if host == "" {
return fmt.Errorf("usage: homelab net check <host> [path]")
}
path := "/"
if len(rest) > 0 && !strings.HasPrefix(rest[0], "-") {
path = rest[0]
if !strings.HasPrefix(path, "/") {
path = "/" + path
}
}
u := "https://" + host + path
fmt.Printf("%s\n", u)
// external leg: resolve via public DNS, dial the public IP (tests the real CF path)
pubOut, _ := dig(hostOnly(host), "1.1.1.1", "")
if pubIP := firstLine(pubOut); pubIP != "" {
c, d, e := probeURL(clientDialingIP(pubIP, 10*time.Second), u)
fmt.Printf(" external (public %-15s) %s\n", pubIP, fmtProbe(c, d, e))
} else {
fmt.Println(" external (public) no public A record")
}
// internal leg: dial the Traefik LB directly
c, d, e := probeURL(clientDialingIP(internalLBIP, 10*time.Second), u)
fmt.Printf(" internal (LB %-15s) %s\n", internalLBIP, fmtProbe(c, d, e))
return nil
}
func dnsLookup(args []string) error {
name, rest := firstPositional(args)
if name == "" {
return fmt.Errorf("usage: homelab dns lookup <name> [A|AAAA|TXT|MX|PTR]")
}
rr := ""
if len(rest) > 0 {
rr = rest[0]
}
tech, _ := dig(name, "10.0.20.201", rr)
pub, _ := dig(name, "1.1.1.1", rr)
fmt.Printf("technitium (10.0.20.201): %s\n", oneLineList(tech))
fmt.Printf("public (1.1.1.1) : %s\n", oneLineList(pub))
if strings.TrimSpace(tech) != strings.TrimSpace(pub) {
fmt.Println("⚠ mismatch — split-horizon (expected for internal-only apps) or a propagation gap")
}
return nil
}
func hostOnly(h string) string { // strip any path accidentally included
return strings.SplitN(h, "/", 2)[0]
}
func oneLineList(s string) string {
s = strings.TrimSpace(s)
if s == "" {
return "(none)"
}
return strings.ReplaceAll(s, "\n", ", ")
}