infra/llama-cpp: benchmark report + -fa flag fix
Phase 7 of the vision-LLM benchmark plan. Adds: - docs/benchmarks/2026-05-10-vision-llm.md — curated report (TL;DR, per-model analysis, top-N agreement, cost vs cloud APIs, sample captions). Verdict: qwen3vl-4b for the request path (3.55 s p50, 100% parse, decisive top-N distro); qwen3vl-8b for caption polish. - docs/benchmarks/benchmark-2026-05-10-1424.json — raw 300-row dump for diff-checking against future runs. - main.tf: -fa -> -fa on (b9085 llama.cpp removed the no-value form of the flash-attention flag; without the value llama-server exits before serving any request). - llama-cpp.md architecture doc links the report so future operators land on the deployed-and-evaluated model from one entry point. 300/300 calls, 0 parse errors, 33m32s wall on a single T4 with the GPU exclusively allocated. immich-ml was scaled to 0 for the run (node1 RAM constraint, not GPU - bumping node1 RAM is tracked as a follow-up). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
3da01e6e1e
commit
6e7fe96a40
23 changed files with 8504 additions and 11 deletions
|
|
@ -1149,7 +1149,8 @@ resource "null_resource" "pg_terraform_state_db" {
|
|||
|
||||
provisioner "local-exec" {
|
||||
command = <<-EOT
|
||||
kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas pg-cluster-1 -c postgres -- \
|
||||
PRIMARY=$(kubectl --kubeconfig ${var.kube_config_path} get cluster -n dbaas pg-cluster -o jsonpath='{.status.currentPrimary}')
|
||||
kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas $PRIMARY -c postgres -- \
|
||||
bash -c '
|
||||
psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = '"'"'terraform_state'"'"'" | grep -q 1 || \
|
||||
psql -U postgres -c "CREATE ROLE terraform_state WITH LOGIN PASSWORD '"'"'changeme-vault-will-rotate'"'"'"
|
||||
|
|
@ -1173,7 +1174,8 @@ resource "null_resource" "pg_payslip_ingest_db" {
|
|||
|
||||
provisioner "local-exec" {
|
||||
command = <<-EOT
|
||||
kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas pg-cluster-1 -c postgres -- \
|
||||
PRIMARY=$(kubectl --kubeconfig ${var.kube_config_path} get cluster -n dbaas pg-cluster -o jsonpath='{.status.currentPrimary}')
|
||||
kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas $PRIMARY -c postgres -- \
|
||||
bash -c '
|
||||
psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = '"'"'payslip_ingest'"'"'" | grep -q 1 || \
|
||||
psql -U postgres -c "CREATE ROLE payslip_ingest WITH LOGIN PASSWORD '"'"'changeme-vault-will-rotate'"'"'"
|
||||
|
|
@ -1197,7 +1199,8 @@ resource "null_resource" "pg_job_hunter_db" {
|
|||
|
||||
provisioner "local-exec" {
|
||||
command = <<-EOT
|
||||
kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas pg-cluster-1 -c postgres -- \
|
||||
PRIMARY=$(kubectl --kubeconfig ${var.kube_config_path} get cluster -n dbaas pg-cluster -o jsonpath='{.status.currentPrimary}')
|
||||
kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas $PRIMARY -c postgres -- \
|
||||
bash -c '
|
||||
psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = '"'"'job_hunter'"'"'" | grep -q 1 || \
|
||||
psql -U postgres -c "CREATE ROLE job_hunter WITH LOGIN PASSWORD '"'"'changeme-vault-will-rotate'"'"'"
|
||||
|
|
@ -1209,6 +1212,35 @@ resource "null_resource" "pg_job_hunter_db" {
|
|||
}
|
||||
}
|
||||
|
||||
# Postiz: 3 databases (postiz, temporal, temporal_visibility) all owned by the
|
||||
# `postiz` role. Bundled bitnami PostgreSQL was retired 2026-05-09 in favour of
|
||||
# this CNPG cluster — covered by postgresql-backup-per-db automatically.
|
||||
# Role password placeholder; Vault static role `pg-postiz` rotates 7d.
|
||||
resource "null_resource" "pg_postiz_dbs" {
|
||||
depends_on = [null_resource.pg_cluster]
|
||||
|
||||
triggers = {
|
||||
role = "postiz"
|
||||
dbs = "postiz,temporal,temporal_visibility"
|
||||
}
|
||||
|
||||
provisioner "local-exec" {
|
||||
command = <<-EOT
|
||||
PRIMARY=$(kubectl --kubeconfig ${var.kube_config_path} get cluster -n dbaas pg-cluster -o jsonpath='{.status.currentPrimary}')
|
||||
kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas $PRIMARY -c postgres -- \
|
||||
bash -c '
|
||||
psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = '"'"'postiz'"'"'" | grep -q 1 || \
|
||||
psql -U postgres -c "CREATE ROLE postiz WITH LOGIN PASSWORD '"'"'changeme-vault-will-rotate'"'"'"
|
||||
for db in postiz temporal temporal_visibility; do
|
||||
psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_database WHERE datname = '"'"'$db'"'"'" | grep -q 1 || \
|
||||
psql -U postgres -c "CREATE DATABASE $db OWNER postiz"
|
||||
psql -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE $db TO postiz"
|
||||
done
|
||||
'
|
||||
EOT
|
||||
}
|
||||
}
|
||||
|
||||
# Create wealthfolio_sync database for the SQLite→PG ETL sidecar that mirrors
|
||||
# Wealthfolio's daily_account_valuation/accounts/activities into PG so Grafana
|
||||
# can chart net worth, contributions, and growth.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue