From c02c1277986993359c92b18a2e0ccd5fac5260e0 Mon Sep 17 00:00:00 2001 From: kiania Date: Fri, 19 Jun 2026 23:38:22 +0300 Subject: [PATCH] fix(connections): shrink MCP DB-connection footprint on a shared 100-conn DB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DB is at max_connections=100 and several stack services hold persistent pools (several as the postgres superuser, idle for hours), so peaks hit "too many connections". The MCP is a minor contributor but easy to bound: - Dockerfile: uvicorn --workers 2 → 1. The MCP's connection budget is workers × MCP_POOL_MAX, so this caps it at 8 backends instead of 16. Scale via MCP_POOL_MAX, not workers, so the budget stays obvious. (Pairs with the minconn=0 lazy pool already on this branch: 0 connections held when idle.) - analytics_ro_role.sql: add idle_session_timeout=5min so the DB reaps the MCP's idle POOLED connections (idle_in_transaction never reaps them — they're idle outside a txn) and returns the slots. Safe because the server now discards + transparently retries a reaped connection instead of erroring. Note: the dominant fix is stack-wide (get the superuser app pools onto bounded, timed roles; consider PgBouncer; or raise max_connections) — out of this repo's scope but documented in the review. Co-Authored-By: Claude Opus 4.8 --- Dockerfile | 6 +++++- scripts/analytics_ro_role.sql | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index dfc8d7b..7a5d015 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,4 +29,8 @@ EXPOSE 8892 HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \ CMD python -c "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8892/healthz').status==200 else 1)" || exit 1 -CMD ["uvicorn", "analytics_mcp:app", "--host", "0.0.0.0", "--port", "8892", "--workers", "2"] +# Single worker: this is a low-traffic read-only proxy for a handful of analysts, and +# the DB connection budget = workers × MCP_POOL_MAX. One worker (× default pool 8) caps +# the MCP at 8 backends instead of 16, which matters on a shared 100-connection DB. +# Scale out by raising MCP_POOL_MAX, not workers, so the budget stays obvious. +CMD ["uvicorn", "analytics_mcp:app", "--host", "0.0.0.0", "--port", "8892", "--workers", "1"] diff --git a/scripts/analytics_ro_role.sql b/scripts/analytics_ro_role.sql index 0692b59..b90a8ea 100644 --- a/scripts/analytics_ro_role.sql +++ b/scripts/analytics_ro_role.sql @@ -62,3 +62,9 @@ ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA fuel GRANT EX ALTER ROLE analytics_ro SET default_transaction_read_only = on; ALTER ROLE analytics_ro SET statement_timeout = '30s'; ALTER ROLE analytics_ro SET idle_in_transaction_session_timeout = '60s'; +-- Cap idle POOLED connections too (these sit idle but NOT in a transaction, so the +-- idle_in_transaction timeout never reaps them). On a shared 100-connection DB this +-- returns slots the MCP isn't using. Safe with the server's dead-connection handling: +-- a reaped pooled connection is discarded + transparently retried, not surfaced as an +-- error. (Requires PostgreSQL 14+.) +ALTER ROLE analytics_ro SET idle_session_timeout = '5min';