tracksolid_timescale_grafan.../migrations/19_v_ingest_health.sql
david kiania 76f6915e61 feat(stack): consolidate 7→4 services (merge pollers, drop pgbouncer/grafana)
Collapse the backend from 7 Coolify services to 4 app services + the DB.

- Merge ingest_movement + ingest_events into a single ingest_worker:
  split each poller's main() into reusable startup_catchup()/register_jobs()
  and drive both from one schedule loop in new ingest_worker_rev.py
  (standalone entrypoints retained for local debug).
- docker-compose.yaml: replace the two poller services with ingest_worker;
  remove the pgbouncer service (dormant; transaction-mode pooling is unsafe
  for the advisory-lock'd v_trips refresher) and the grafana service +
  grafana-data volume (redundant with the FleetOps SPA).
- Add reporting.v_ingest_health (migration 19) + dashboard_api GET
  /health/ingest as the pipeline-freshness surface that replaces Grafana's
  health panels.

webhook_receiver stays isolated so a poller fault can't drop inbound pushes.
timescale_db and db_backup are unchanged.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 21:41:05 +03:00

79 lines
3.6 KiB
SQL

-- 19_v_ingest_health.sql
-- reporting.v_ingest_health — per-endpoint ingest freshness for FleetOps.
--
-- CONTEXT: Grafana was removed (2026-06-10) as redundant with the FleetOps SPA.
-- The one signal only Grafana surfaced was pipeline freshness. This view replaces
-- it with a read-API-friendly surface derived from the existing
-- tracksolid.ingestion_log (every poll already writes a row via log_ingestion()),
-- so FleetOps can show "is the ingest pipeline alive / stale / erroring" per
-- endpoint without a separate dashboard product. Exposed by dashboard_api as
-- GET /health/ingest.
--
-- One row per endpoint: the latest run, how long ago, last success/error, 1-hour
-- run + failure counts, and a coarse freshness verdict. Each endpoint's expected
-- cadence mirrors the ingest_worker schedule (ingest_worker_rev.py); 'stale' fires
-- only past 3x that cadence so daily/low-frequency jobs aren't false-flagged.
-- Guarded + idempotent -> safe to re-apply.
CREATE OR REPLACE VIEW reporting.v_ingest_health AS
WITH last_run AS (
SELECT DISTINCT ON (endpoint)
endpoint, run_at, success, error_code, error_message,
rows_inserted, rows_upserted, imei_count, duration_ms
FROM tracksolid.ingestion_log
ORDER BY endpoint, run_at DESC
),
agg AS (
SELECT endpoint,
count(*) FILTER (WHERE run_at > now() - interval '1 hour') AS runs_1h,
count(*) FILTER (WHERE run_at > now() - interval '1 hour' AND NOT success) AS failures_1h
FROM tracksolid.ingestion_log
GROUP BY endpoint
)
SELECT
lr.endpoint,
lr.run_at AS last_run_at,
EXTRACT(EPOCH FROM (now() - lr.run_at))::int AS seconds_ago,
lr.success AS last_success,
lr.error_code,
lr.error_message,
lr.rows_inserted,
lr.rows_upserted,
COALESCE(a.runs_1h, 0) AS runs_1h,
COALESCE(a.failures_1h, 0) AS failures_1h,
ex.expected_interval_s,
CASE
WHEN EXTRACT(EPOCH FROM (now() - lr.run_at)) > 3 * ex.expected_interval_s THEN 'stale'
WHEN NOT lr.success THEN 'error'
ELSE 'ok'
END AS freshness
FROM last_run lr
LEFT JOIN agg a USING (endpoint)
CROSS JOIN LATERAL (
SELECT CASE lr.endpoint
WHEN 'jimi.user.device.location.list' THEN 60 -- live sweep (60s)
WHEN 'jimi.device.alarm.list' THEN 300 -- alarms (5m)
WHEN 'jimi.device.track.mileage' THEN 900 -- trips (15m)
WHEN 'jimi.open.platform.report.parking' THEN 900 -- parking (15m)
WHEN 'jimi.device.track.list' THEN 1800 -- high-res trail (30m)
ELSE 3600 -- default (1h)
END AS expected_interval_s
) ex
ORDER BY seconds_ago DESC;
COMMENT ON VIEW reporting.v_ingest_health IS
'Per-endpoint ingest freshness from tracksolid.ingestion_log. Replaces the '
'Grafana pipeline-health panels (Grafana removed 2026-06-10). Surfaced by '
'dashboard_api GET /health/ingest. freshness = ok|stale|error (stale = past 3x '
'the ingest_worker_rev.py cadence).';
-- Read-only access for the dashboard roles (guarded; idempotent).
DO $grants$
BEGIN
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'grafana_ro') THEN
GRANT SELECT ON reporting.v_ingest_health TO grafana_ro;
END IF;
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'dashboard_ro') THEN
GRANT SELECT ON reporting.v_ingest_health TO dashboard_ro;
END IF;
END $grants$;