diff --git a/stacks/dbaas/modules/dbaas/main.tf b/stacks/dbaas/modules/dbaas/main.tf index 854e20b8..8389aa93 100644 --- a/stacks/dbaas/modules/dbaas/main.tf +++ b/stacks/dbaas/modules/dbaas/main.tf @@ -1185,6 +1185,30 @@ resource "null_resource" "pg_payslip_ingest_db" { } } +# Create job_hunter database for the job-hunter scraper service. +# Role password is managed by Vault Database Secrets Engine (static role `pg-job-hunter`, 7d rotation). +resource "null_resource" "pg_job_hunter_db" { + depends_on = [null_resource.pg_cluster] + + triggers = { + db_name = "job_hunter" + username = "job_hunter" + } + + provisioner "local-exec" { + command = <<-EOT + kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas pg-cluster-1 -c postgres -- \ + bash -c ' + psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = '"'"'job_hunter'"'"'" | grep -q 1 || \ + psql -U postgres -c "CREATE ROLE job_hunter WITH LOGIN PASSWORD '"'"'changeme-vault-will-rotate'"'"'" + psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_database WHERE datname = '"'"'job_hunter'"'"'" | grep -q 1 || \ + psql -U postgres -c "CREATE DATABASE job_hunter OWNER job_hunter" + psql -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE job_hunter TO job_hunter" + ' + EOT + } +} + # Old PostgreSQL deployment — kept commented for rollback reference # resource "kubernetes_deployment" "postgres" { # metadata { diff --git a/stacks/job-hunter/main.tf b/stacks/job-hunter/main.tf new file mode 100644 index 00000000..5d123d7c --- /dev/null +++ b/stacks/job-hunter/main.tf @@ -0,0 +1,320 @@ +variable "image_tag" { + type = string + default = "latest" + description = "job-hunter image tag. Use 8-char git SHA in CI; :latest only for local trials." +} + +variable "postgresql_host" { type = string } + +locals { + namespace = "job-hunter" + image = "registry.viktorbarzin.me/job-hunter:${var.image_tag}" + labels = { + app = "job-hunter" + } +} + +resource "kubernetes_namespace" "job_hunter" { + metadata { + name = local.namespace + labels = { + tier = local.tiers.aux + "istio-injection" = "disabled" + } + } + lifecycle { + # KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace + ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]] + } +} + +# App secrets — seed these in Vault before applying: +# secret/job-hunter +# webhook_bearer_token — bearer for /webhook/cdio, /digest/generate, /refresh +# cdio_api_key — changedetection.io x-api-key (copy from +# `jsondecode(secret/changedetection.homepage_credentials).changedetection.api_key`) +# smtp_username — SMTP sender account (mailserver) +# smtp_password — SMTP password (mailserver) +# digest_to_address — where the weekly digest goes +# digest_from_address — From: header for the digest +resource "kubernetes_manifest" "external_secret" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "job-hunter-secrets" + namespace = local.namespace + } + spec = { + refreshInterval = "15m" + secretStoreRef = { + name = "vault-kv" + kind = "ClusterSecretStore" + } + target = { + name = "job-hunter-secrets" + template = { + metadata = { + annotations = { + "reloader.stakater.com/match" = "true" + } + } + } + } + dataFrom = [{ + extract = { + key = "job-hunter" + } + }] + } + } + depends_on = [kubernetes_namespace.job_hunter] +} + +# DB credentials from Vault database engine (7-day rotation). +# Template builds the asyncpg DSN consumed by the FastAPI app as DB_CONNECTION_STRING. +resource "kubernetes_manifest" "db_external_secret" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "job-hunter-db-creds" + namespace = local.namespace + } + spec = { + refreshInterval = "15m" + secretStoreRef = { + name = "vault-database" + kind = "ClusterSecretStore" + } + target = { + name = "job-hunter-db-creds" + template = { + metadata = { + annotations = { + "reloader.stakater.com/match" = "true" + } + } + data = { + DB_CONNECTION_STRING = "postgresql+asyncpg://job_hunter:{{ .password }}@${var.postgresql_host}:5432/job_hunter" + DB_PASSWORD = "{{ .password }}" + } + } + } + data = [{ + secretKey = "password" + remoteRef = { + key = "static-creds/pg-job-hunter" + property = "password" + } + }] + } + } + depends_on = [kubernetes_namespace.job_hunter] +} + +resource "kubernetes_deployment" "job_hunter" { + metadata { + name = "job-hunter" + namespace = kubernetes_namespace.job_hunter.metadata[0].name + labels = merge(local.labels, { + tier = local.tiers.aux + }) + annotations = { + "reloader.stakater.com/search" = "true" + } + } + + spec { + replicas = 1 + strategy { + type = "Recreate" + } + + selector { + match_labels = local.labels + } + + template { + metadata { + labels = local.labels + } + + spec { + image_pull_secrets { + name = "registry-credentials" + } + + init_container { + name = "alembic-migrate" + image = local.image + command = ["python", "-m", "job_hunter", "migrate"] + + env_from { + secret_ref { + name = "job-hunter-secrets" + } + } + env_from { + secret_ref { + name = "job-hunter-db-creds" + } + } + + resources { + requests = { + cpu = "50m" + memory = "256Mi" + } + limits = { + memory = "512Mi" + } + } + } + + container { + name = "job-hunter" + image = local.image + + port { + container_port = 8080 + } + + env_from { + secret_ref { + name = "job-hunter-secrets" + } + } + env_from { + secret_ref { + name = "job-hunter-db-creds" + } + } + + env { + name = "CDIO_BASE_URL" + value = "http://changedetection.changedetection.svc.cluster.local" + } + env { + name = "SMTP_HOST" + value = "mailserver.mailserver.svc.cluster.local" + } + env { + name = "SMTP_PORT" + value = "587" + } + env { + name = "JOB_HUNTER_WEBHOOK_URL" + value = "http://job-hunter.job-hunter.svc.cluster.local:8080/webhook/cdio" + } + + readiness_probe { + http_get { + path = "/healthz" + port = 8080 + } + initial_delay_seconds = 5 + period_seconds = 10 + } + + liveness_probe { + http_get { + path = "/healthz" + port = 8080 + } + initial_delay_seconds = 5 + period_seconds = 10 + } + + resources { + requests = { + cpu = "100m" + memory = "512Mi" + } + # Chromium baseline ~1Gi — matches broker-sync precedent. + limits = { + memory = "1280Mi" + } + } + } + } + } + } + + lifecycle { + ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1 + } + + depends_on = [ + kubernetes_manifest.external_secret, + kubernetes_manifest.db_external_secret, + ] +} + +# ClusterIP-only — job-hunter has no public UI. Webhook, digest, and refresh +# endpoints are cluster-internal (n8n / CDIO / CronJob triggers). +resource "kubernetes_service" "job_hunter" { + metadata { + name = "job-hunter" + namespace = kubernetes_namespace.job_hunter.metadata[0].name + labels = local.labels + } + + spec { + type = "ClusterIP" + selector = local.labels + + port { + name = "http" + port = 8080 + target_port = 8080 + } + } +} + +# Plan-time read of the ESO-created DB creds Secret for Grafana datasource. +# First apply: -target=kubernetes_manifest.db_external_secret first so the Secret exists. +data "kubernetes_secret" "job_hunter_db_creds" { + metadata { + name = "job-hunter-db-creds" + namespace = kubernetes_namespace.job_hunter.metadata[0].name + } + depends_on = [kubernetes_manifest.db_external_secret] +} + +# Grafana datasource for the job_hunter Postgres DB. Lives in the monitoring +# namespace so the grafana sidecar (label grafana_datasource=1) picks it up. +resource "kubernetes_config_map" "grafana_job_hunter_datasource" { + metadata { + name = "grafana-job-hunter-datasource" + namespace = "monitoring" + labels = { + grafana_datasource = "1" + } + } + data = { + "job-hunter-datasource.yaml" = yamlencode({ + apiVersion = 1 + datasources = [{ + name = "Job Hunter" + type = "postgres" + access = "proxy" + url = "${var.postgresql_host}:5432" + user = "job_hunter" + uid = "job-hunter-pg" + # Grafana 11.2+ Postgres plugin reads the DB name from jsonData.database; + # the top-level `database` field is silently ignored by the frontend. + jsonData = { + database = "job_hunter" + sslmode = "disable" + postgresVersion = 1600 + timescaledb = false + } + secureJsonData = { + password = data.kubernetes_secret.job_hunter_db_creds.data["DB_PASSWORD"] + } + editable = true + }] + }) + } +} diff --git a/stacks/job-hunter/terragrunt.hcl b/stacks/job-hunter/terragrunt.hcl new file mode 100644 index 00000000..d39afd69 --- /dev/null +++ b/stacks/job-hunter/terragrunt.hcl @@ -0,0 +1,23 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "platform" { + config_path = "../platform" + skip_outputs = true +} + +dependency "vault" { + config_path = "../vault" + skip_outputs = true +} + +dependency "external-secrets" { + config_path = "../external-secrets" + skip_outputs = true +} + +inputs = { + # Bump on every deploy — 8-char git SHA from the CI build. + image_tag = "latest" +} diff --git a/stacks/monitoring/modules/monitoring/dashboards/job-hunter.json b/stacks/monitoring/modules/monitoring/dashboards/job-hunter.json new file mode 100644 index 00000000..d38bc40c --- /dev/null +++ b/stacks/monitoring/modules/monitoring/dashboards/job-hunter.json @@ -0,0 +1,213 @@ +{ + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "description": "Newly-ingested roles (by fetched_at).", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "bars", + "fillOpacity": 60, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "showPoints": "auto", + "spanNulls": false, + "stacking": {"mode": "none"} + }, + "thresholds": {"mode": "absolute", "steps": []} + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "id": 1, + "options": { + "legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "single", "sort": "none"} + }, + "targets": [ + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT date_trunc('day', fetched_at) AT TIME ZONE 'UTC' AS time, source, COUNT(*) AS value FROM job_hunter.roles WHERE $__timeFilter(fetched_at) GROUP BY 1, 2 ORDER BY 1", + "refId": "A" + } + ], + "title": "New roles per day by source", + "type": "timeseries" + }, + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "description": "Distinct open roles by source over the time window.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"hideFrom": {"legend": false, "tooltip": false, "viz": false}}, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "id": 2, + "options": { + "legend": {"displayMode": "table", "placement": "right", "showLegend": true, "values": ["value"]}, + "pieType": "donut", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "tooltip": {"mode": "single", "sort": "none"} + }, + "targets": [ + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "format": "table", + "rawQuery": true, + "rawSql": "SELECT source AS metric, COUNT(DISTINCT dedup_key) AS value FROM job_hunter.roles WHERE $__timeFilter(fetched_at) GROUP BY source ORDER BY value DESC", + "refId": "A" + } + ], + "title": "Roles by source (deduplicated)", + "type": "piechart" + }, + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "description": "Top 20 companies by recent role volume.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "lineWidth": 1 + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": []} + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 8}, + "id": 3, + "options": { + "barRadius": 0, + "barWidth": 0.85, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": {"displayMode": "hidden", "placement": "bottom", "showLegend": false}, + "orientation": "horizontal", + "showValue": "auto", + "stacking": "none", + "tooltip": {"mode": "single", "sort": "none"}, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "format": "table", + "rawQuery": true, + "rawSql": "SELECT c.display_name, COUNT(*) AS roles FROM job_hunter.roles r JOIN job_hunter.companies c ON r.company_id = c.id WHERE $__timeFilter(r.fetched_at) GROUP BY c.display_name ORDER BY roles DESC LIMIT 20", + "refId": "A" + } + ], + "title": "Top companies by role volume", + "type": "barchart" + }, + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "description": "Normalised base salary distribution (£) for roles with explicit comp.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"fillOpacity": 80, "lineWidth": 1}, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 8}, + "id": 4, + "options": { + "bucketOffset": 0, + "combine": false, + "legend": {"displayMode": "list", "placement": "bottom", "showLegend": true} + }, + "targets": [ + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "format": "table", + "rawQuery": true, + "rawSql": "SELECT parsed_base_gbp::float AS base_gbp FROM job_hunter.roles WHERE parsed_base_gbp IS NOT NULL AND $__timeFilter(fetched_at)", + "refId": "A" + } + ], + "title": "Salary distribution (GBP)", + "type": "histogram" + }, + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "description": "Recent roles, ranked by salary-parse confidence then parsed base.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "custom": { + "align": "auto", + "cellOptions": {"type": "auto"}, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": []} + }, + "overrides": [ + { + "matcher": {"id": "byName", "options": "apply_url"}, + "properties": [ + {"id": "custom.cellOptions", "value": {"type": "auto"}}, + {"id": "links", "value": [{"targetBlank": true, "title": "Open", "url": "${__value.raw}"}]} + ] + }, + { + "matcher": {"id": "byName", "options": "base_gbp"}, + "properties": [{"id": "unit", "value": "currencyGBP"}] + } + ] + }, + "gridPos": {"h": 12, "w": 24, "x": 0, "y": 17}, + "id": 5, + "options": { + "cellHeight": "sm", + "footer": {"countRows": false, "fields": "", "reducer": ["sum"], "show": false}, + "showHeader": true + }, + "targets": [ + { + "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, + "format": "table", + "rawQuery": true, + "rawSql": "SELECT r.posted_at, c.display_name AS company, r.title, r.location, r.remote_policy, r.parsed_base_gbp::float AS base_gbp, r.salary_parse_confidence, r.source, r.apply_url FROM job_hunter.roles r JOIN job_hunter.companies c ON r.company_id = c.id WHERE $__timeFilter(r.fetched_at) ORDER BY r.salary_parse_confidence DESC NULLS LAST, r.parsed_base_gbp DESC NULLS LAST, r.posted_at DESC NULLS LAST LIMIT 100", + "refId": "A" + } + ], + "title": "Top roles", + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": ["job-hunter", "jobs", "careers"], + "templating": {"list": []}, + "time": {"from": "now-30d", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Job Hunter", + "uid": "job-hunter", + "version": 1, + "weekStart": "" +} diff --git a/stacks/monitoring/modules/monitoring/grafana.tf b/stacks/monitoring/modules/monitoring/grafana.tf index c82eee6b..2c5089ee 100644 --- a/stacks/monitoring/modules/monitoring/grafana.tf +++ b/stacks/monitoring/modules/monitoring/grafana.tf @@ -107,16 +107,16 @@ locals { # Dashboard folder assignments dashboard_folders = { # Cluster & Kubernetes - "api_server.json" = "Cluster" - "cluster_health.json" = "Cluster" - "nodes.json" = "Cluster" - "pods.json" = "Cluster" - "kube-state-metrics.json" = "Cluster" + "api_server.json" = "Cluster" + "cluster_health.json" = "Cluster" + "nodes.json" = "Cluster" + "pods.json" = "Cluster" + "kube-state-metrics.json" = "Cluster" # Networking & DNS - "core_dns.json" = "Networking" - "technitium-dns.json" = "Networking" - "nginx_ingress.json" = "Networking" - "network_traffic.json" = "Networking" + "core_dns.json" = "Networking" + "technitium-dns.json" = "Networking" + "nginx_ingress.json" = "Networking" + "network_traffic.json" = "Networking" # Hardware & Host "node_exporter_full.json" = "Hardware" @@ -126,15 +126,16 @@ locals { "nvidia.json" = "Hardware" # Operations - "backup_health.json" = "Operations" - "registry.json" = "Operations" - "loki.json" = "Operations" - "k8s-audit.json" = "Operations" + "backup_health.json" = "Operations" + "registry.json" = "Operations" + "loki.json" = "Operations" + "k8s-audit.json" = "Operations" # Applications - "qbittorrent.json" = "Applications" - "realestate-crawler.json" = "Applications" - "uk-payslip.json" = "Finance" + "qbittorrent.json" = "Applications" + "realestate-crawler.json" = "Applications" + "uk-payslip.json" = "Finance" + "job-hunter.json" = "Finance" } } diff --git a/stacks/n8n/workflows/job-hunter-weekly-digest.json b/stacks/n8n/workflows/job-hunter-weekly-digest.json new file mode 100644 index 00000000..40c6b3cd --- /dev/null +++ b/stacks/n8n/workflows/job-hunter-weekly-digest.json @@ -0,0 +1,49 @@ +{ + "name": "Job Hunter Weekly Digest", + "active": false, + "nodes": [ + { + "parameters": { + "rule": { + "interval": [{"field": "weeks", "triggerAtDay": [1], "triggerAtHour": 7, "triggerAtMinute": 0}] + } + }, + "id": "cron-weekly", + "name": "Weekly Monday 07:00 London", + "type": "n8n-nodes-base.scheduleTrigger", + "typeVersion": 1.1, + "position": [250, 300] + }, + { + "parameters": { + "method": "POST", + "url": "http://job-hunter.job-hunter.svc.cluster.local:8080/digest/generate", + "sendHeaders": true, + "headerParameters": { + "parameters": [ + {"name": "Authorization", "value": "=Bearer {{ $env.JOB_HUNTER_WEBHOOK_TOKEN }}"}, + {"name": "Content-Type", "value": "application/json"} + ] + }, + "sendBody": true, + "contentType": "json", + "jsonBody": "={\"window_days\":7,\"limit\":50,\"dry_run\":false}", + "options": {"timeout": 120000} + }, + "id": "digest-generate", + "name": "Call /digest/generate", + "type": "n8n-nodes-base.httpRequest", + "typeVersion": 4.2, + "position": [500, 300] + } + ], + "connections": { + "Weekly Monday 07:00 London": { + "main": [[{"node": "Call /digest/generate", "type": "main", "index": 0}]] + } + }, + "settings": {"executionOrder": "v1", "saveExecutionProgress": false, "saveManualExecutions": true}, + "staticData": null, + "meta": {"templateCredsSetupCompleted": false}, + "pinData": {} +} diff --git a/stacks/vault/main.tf b/stacks/vault/main.tf index 4f1a4f68..0b8ef993 100644 --- a/stacks/vault/main.tf +++ b/stacks/vault/main.tf @@ -536,7 +536,7 @@ resource "vault_database_secret_backend_connection" "postgresql" { # "pg-trading", # Commented out 2026-04-06 - trading-bot disabled "pg-health", "pg-linkwarden", "pg-affine", "pg-woodpecker", "pg-claude-memory", - "pg-terraform-state", "pg-payslip-ingest" + "pg-terraform-state", "pg-payslip-ingest", "pg-job-hunter" ] postgresql { @@ -682,6 +682,14 @@ resource "vault_database_secret_backend_static_role" "pg_payslip_ingest" { rotation_period = 604800 } +resource "vault_database_secret_backend_static_role" "pg_job_hunter" { + backend = vault_mount.database.path + db_name = vault_database_secret_backend_connection.postgresql.name + name = "pg-job-hunter" + username = "job_hunter" + rotation_period = 604800 +} + # ============================================================================= # Kubernetes Secrets Engine — Dynamic K8s Credentials # =============================================================================