-- 21_ingest_health_active_only.sql -- BUG-P5 (260702 audit): reporting.v_ingest_health included EVERY endpoint ever -- written to tracksolid.ingestion_log. One-shot operator tools (e.g. -- backfill_trips_enrichment, last run 2026-05-01) therefore sat at 'stale' -- forever, wedging dashboard_api GET /health/ingest at "overall": "stale" even -- when every real poller was healthy — the FleetOps freshness panel signal was -- useless. -- -- Fix: restrict the view to the ALLOW-LIST of endpoints the running pipeline -- actually emits (ingest_worker_rev.py schedule + webhook_receiver_rev.py). -- Adding a new pipeline endpoint => extend the list here (and the cadence CASE). -- Everything else about the view (columns, verdict logic) is unchanged, so the -- dashboard_api endpoint and its consumers need no changes. -- Idempotent — CREATE OR REPLACE. CREATE OR REPLACE VIEW reporting.v_ingest_health AS WITH pipeline_endpoints(endpoint) AS ( VALUES ('jimi.user.device.location.list'), -- live sweep (60s) ('jimi.device.alarm.list'), -- alarms (5m) ('jimi.device.track.mileage'), -- trips (15m) ('jimi.open.platform.report.parking'), -- parking (15m) ('jimi.device.track.list'), -- high-res trail (30m) ('jimi.user.device.list+detail'), -- registry sync (daily) ('webhook/pushobd'), ('webhook/pushfaultinfo'), ('webhook/pushalarm'), ('webhook/pushgps'), ('webhook/pushhb'), ('webhook/pushtripreport'), ('webhook/pushevent') ), last_run AS ( SELECT DISTINCT ON (il.endpoint) il.endpoint, il.run_at, il.success, il.error_code, il.error_message, il.rows_inserted, il.rows_upserted, il.imei_count, il.duration_ms FROM tracksolid.ingestion_log il JOIN pipeline_endpoints pe USING (endpoint) ORDER BY il.endpoint, il.run_at DESC ), agg AS ( SELECT il.endpoint, count(*) FILTER (WHERE il.run_at > now() - interval '1 hour') AS runs_1h, count(*) FILTER (WHERE il.run_at > now() - interval '1 hour' AND NOT il.success) AS failures_1h FROM tracksolid.ingestion_log il JOIN pipeline_endpoints pe USING (endpoint) WHERE il.run_at > now() - interval '1 hour' GROUP BY il.endpoint ) SELECT lr.endpoint, lr.run_at AS last_run_at, EXTRACT(EPOCH FROM (now() - lr.run_at))::int AS seconds_ago, lr.success AS last_success, lr.error_code, lr.error_message, lr.rows_inserted, lr.rows_upserted, COALESCE(a.runs_1h, 0) AS runs_1h, COALESCE(a.failures_1h, 0) AS failures_1h, ex.expected_interval_s, CASE WHEN EXTRACT(EPOCH FROM (now() - lr.run_at)) > 3 * ex.expected_interval_s THEN 'stale' WHEN NOT lr.success THEN 'error' ELSE 'ok' END AS freshness FROM last_run lr LEFT JOIN agg a USING (endpoint) CROSS JOIN LATERAL ( SELECT CASE lr.endpoint WHEN 'jimi.user.device.location.list' THEN 60 -- live sweep (60s) WHEN 'jimi.device.alarm.list' THEN 300 -- alarms (5m) WHEN 'jimi.device.track.mileage' THEN 900 -- trips (15m) WHEN 'jimi.open.platform.report.parking' THEN 900 -- parking (15m) WHEN 'jimi.device.track.list' THEN 1800 -- high-res trail (30m) WHEN 'jimi.user.device.list+detail' THEN 86400 -- registry sync (daily) ELSE 3600 -- webhooks / default (1h) END AS expected_interval_s ) ex ORDER BY seconds_ago DESC; COMMENT ON VIEW reporting.v_ingest_health IS 'Per-endpoint ingest freshness from tracksolid.ingestion_log, restricted to ' 'the active pipeline endpoints (migration 21) so one-shot tools cannot wedge ' 'the verdict at stale. Surfaced by dashboard_api GET /health/ingest. ' 'freshness = ok|stale|error (stale = past 3x the ingest_worker_rev.py cadence).'; -- Read-only access for the dashboard roles (guarded; idempotent). DO $grants$ BEGIN IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'grafana_ro') THEN GRANT SELECT ON reporting.v_ingest_health TO grafana_ro; END IF; IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'dashboard_ro') THEN GRANT SELECT ON reporting.v_ingest_health TO dashboard_ro; END IF; END $grants$;