From fae40942a26f96965efe10c94542c4cff2b92549 Mon Sep 17 00:00:00 2001 From: kiania Date: Tue, 23 Jun 2026 21:35:31 +0300 Subject: [PATCH 1/2] infra(deploy): serve MCP on multiple domains + SKIP_BUILD for label-only redeploys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prod connector domain (fleetmcp.rahamafresh.com) had no Traefik router — deploy.sh only ever set one HOST_DOMAIN (defaulting to fleetmcp.fivetitude.com), so requests to the prod domain returned 503 "no available server" even with the container healthy. - HOST_DOMAINS: comma-separated list folded into one Traefik router rule (Host(`a`) || Host(`b`)). One LE cert covers all names (SANs), so connectors on either domain keep working. Defaults to HOST_DOMAIN (back-compatible). - SKIP_BUILD=1: reuse the existing image for a labels/env-only redeploy, so a routing change can't accidentally bake in new/stale code. Deployed to prod with HOST_DOMAINS="fleetmcp.rahamafresh.com,fleetmcp.fivetitude.com"; both domains verified (healthz 200, /mcp 401, valid SAN cert). Co-Authored-By: Claude Opus 4.8 --- deploy.sh | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/deploy.sh b/deploy.sh index d837822..b16868c 100755 --- a/deploy.sh +++ b/deploy.sh @@ -24,6 +24,20 @@ set -euo pipefail NAME=analytics_mcp PORT=8892 HOST_DOMAIN="${HOST_DOMAIN:-fleetmcp.fivetitude.com}" # prod: fleetmcp.rahamafresh.com +# Comma-separated list of every domain this service answers on (defaults to +# HOST_DOMAIN). All are folded into ONE Traefik router rule so a single cert +# covers them and connectors on either domain keep working. +HOST_DOMAINS="${HOST_DOMAINS:-$HOST_DOMAIN}" +BT='`' +RULE="" +IFS=',' read -ra _DOMS <<< "$HOST_DOMAINS" +for _d in "${_DOMS[@]}"; do + _d="${_d// /}" + if [ -n "$_d" ]; then + seg="Host(${BT}${_d}${BT})" + if [ -z "$RULE" ]; then RULE="$seg"; else RULE="$RULE || $seg"; fi + fi +done IMAGE="fleetanalytics-mcp:latest" ENV_FILE="$(pwd)/.deploy.env" @@ -53,9 +67,15 @@ RO_PW=$(cat "${ANALYTICS_RO_PW_FILE:-$HOME/.analytics_ro.pw}" 2>/dev/null || tru HOSTPART="${SRC_DB_URL#*@}" # host:port/dbname[?params] RO_DB_URL="postgresql://analytics_ro:${RO_PW}@${HOSTPART}" -# Build the image from this repo. -echo "Building $IMAGE ..." -docker build -t "$IMAGE" . +# Build the image from this repo (SKIP_BUILD=1 reuses the existing image for a +# labels/env-only change — no new code is pulled in). +if [ "${SKIP_BUILD:-0}" = "1" ]; then + echo "SKIP_BUILD=1 — reusing existing $IMAGE (no rebuild)." + docker image inspect "$IMAGE" >/dev/null 2>&1 || { echo "ERROR: $IMAGE not present"; exit 1; } +else + echo "Building $IMAGE ..." + docker build -t "$IMAGE" . +fi # Minimal env (read-only DSN + auth only — no Tracksolid ingestion secrets). { echo "DATABASE_URL=${RO_DB_URL}"; echo "MCP_AUTH_TOKENS=${MCP_AUTH_TOKENS}"; } > "$ENV_FILE" @@ -73,9 +93,9 @@ docker run -d --name "$NAME" --restart unless-stopped \ --label 'traefik.http.middlewares.fleetmcp-ratelimit.ratelimit.burst=60' \ --label "traefik.http.routers.http-0-fleetmcp.entryPoints=http" \ --label "traefik.http.routers.http-0-fleetmcp.middlewares=redirect-to-https" \ - --label "traefik.http.routers.http-0-fleetmcp.rule=Host(\`${HOST_DOMAIN}\`)" \ + --label "traefik.http.routers.http-0-fleetmcp.rule=${RULE}" \ --label "traefik.http.routers.https-0-fleetmcp.entryPoints=https" \ - --label "traefik.http.routers.https-0-fleetmcp.rule=Host(\`${HOST_DOMAIN}\`)" \ + --label "traefik.http.routers.https-0-fleetmcp.rule=${RULE}" \ --label "traefik.http.routers.https-0-fleetmcp.middlewares=fleetmcp-ratelimit" \ --label "traefik.http.routers.https-0-fleetmcp.tls=true" \ --label "traefik.http.routers.https-0-fleetmcp.tls.certresolver=letsencrypt" \ From af6fdbcd3f357e2aafef4b236879a0d7bf67d99c Mon Sep 17 00:00:00 2001 From: kiania Date: Fri, 26 Jun 2026 16:54:07 +0300 Subject: [PATCH 2/2] fix(logging): attribute each query to its analyst caller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BearerAuth middleware matched a per-analyst token but only stashed it on request.state, which the FastMCP tools never see — so the query log line logged rows/sql with no caller, defeating the per-token attribution the auth design promises. Bridge the caller name through a ContextVar (anyio propagates it into the worker thread that runs each sync tool) and include it in the query() log. Co-Authored-By: Claude Opus 4.8 --- analytics_mcp.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/analytics_mcp.py b/analytics_mcp.py index 20f559e..e75d426 100644 --- a/analytics_mcp.py +++ b/analytics_mcp.py @@ -26,6 +26,7 @@ Env: """ from __future__ import annotations +import contextvars import hmac import logging import os @@ -65,6 +66,12 @@ def _get_logger(name: str) -> logging.Logger: log = _get_logger("server") +# Per-request caller name, set by BearerAuth from the matched token so the tools can +# attribute each query to an analyst in the logs. A ContextVar (not a tool arg) because +# FastMCP tools never receive the HTTP request; anyio propagates the context into the +# worker thread that runs each sync tool. Defaults to "?" if auth ever didn't run. +_caller_var: contextvars.ContextVar[str] = contextvars.ContextVar("caller", default="?") + DATABASE_URL = os.environ["DATABASE_URL"] # analytics_ro DSN (set by deploy) MAX_ROWS_CEIL = int(os.getenv("MCP_MAX_ROWS", "10000")) # Schemas the introspection helpers (list_tables/describe_table/sample_table) expose. @@ -284,7 +291,10 @@ def query(sql: str, max_rows: int = 1000) -> dict: truncated = len(rows) > cap rows = rows[:cap] dur_ms = int((time.monotonic() - t0) * 1000) - log.info("query rows=%d trunc=%s %dms :: %s", len(rows), truncated, dur_ms, sql[:200]) + log.info( + "query caller=%s rows=%d trunc=%s %dms :: %s", + _caller_var.get(), len(rows), truncated, dur_ms, sql[:200], + ) return {"row_count": len(rows), "truncated": truncated, "rows": rows} @@ -393,6 +403,7 @@ class BearerAuth(BaseHTTPMiddleware): if caller is None: return JSONResponse({"error": "unauthorized"}, status_code=401) request.state.caller = caller + _caller_var.set(caller) # so the tools can attribute the query in the logs return await call_next(request)