tracksolid_timescale_grafan.../migrations/19_v_ingest_health.sql

80 lines
3.6 KiB
MySQL
Raw Normal View History

-- 19_v_ingest_health.sql
-- reporting.v_ingest_health — per-endpoint ingest freshness for FleetOps.
--
-- CONTEXT: Grafana was removed (2026-06-10) as redundant with the FleetOps SPA.
-- The one signal only Grafana surfaced was pipeline freshness. This view replaces
-- it with a read-API-friendly surface derived from the existing
-- tracksolid.ingestion_log (every poll already writes a row via log_ingestion()),
-- so FleetOps can show "is the ingest pipeline alive / stale / erroring" per
-- endpoint without a separate dashboard product. Exposed by dashboard_api as
-- GET /health/ingest.
--
-- One row per endpoint: the latest run, how long ago, last success/error, 1-hour
-- run + failure counts, and a coarse freshness verdict. Each endpoint's expected
-- cadence mirrors the ingest_worker schedule (ingest_worker_rev.py); 'stale' fires
-- only past 3x that cadence so daily/low-frequency jobs aren't false-flagged.
-- Guarded + idempotent -> safe to re-apply.
CREATE OR REPLACE VIEW reporting.v_ingest_health AS
WITH last_run AS (
SELECT DISTINCT ON (endpoint)
endpoint, run_at, success, error_code, error_message,
rows_inserted, rows_upserted, imei_count, duration_ms
FROM tracksolid.ingestion_log
ORDER BY endpoint, run_at DESC
),
agg AS (
SELECT endpoint,
count(*) FILTER (WHERE run_at > now() - interval '1 hour') AS runs_1h,
count(*) FILTER (WHERE run_at > now() - interval '1 hour' AND NOT success) AS failures_1h
FROM tracksolid.ingestion_log
GROUP BY endpoint
)
SELECT
lr.endpoint,
lr.run_at AS last_run_at,
EXTRACT(EPOCH FROM (now() - lr.run_at))::int AS seconds_ago,
lr.success AS last_success,
lr.error_code,
lr.error_message,
lr.rows_inserted,
lr.rows_upserted,
COALESCE(a.runs_1h, 0) AS runs_1h,
COALESCE(a.failures_1h, 0) AS failures_1h,
ex.expected_interval_s,
CASE
WHEN EXTRACT(EPOCH FROM (now() - lr.run_at)) > 3 * ex.expected_interval_s THEN 'stale'
WHEN NOT lr.success THEN 'error'
ELSE 'ok'
END AS freshness
FROM last_run lr
LEFT JOIN agg a USING (endpoint)
CROSS JOIN LATERAL (
SELECT CASE lr.endpoint
WHEN 'jimi.user.device.location.list' THEN 60 -- live sweep (60s)
WHEN 'jimi.device.alarm.list' THEN 300 -- alarms (5m)
WHEN 'jimi.device.track.mileage' THEN 900 -- trips (15m)
WHEN 'jimi.open.platform.report.parking' THEN 900 -- parking (15m)
WHEN 'jimi.device.track.list' THEN 1800 -- high-res trail (30m)
ELSE 3600 -- default (1h)
END AS expected_interval_s
) ex
ORDER BY seconds_ago DESC;
COMMENT ON VIEW reporting.v_ingest_health IS
'Per-endpoint ingest freshness from tracksolid.ingestion_log. Replaces the '
'Grafana pipeline-health panels (Grafana removed 2026-06-10). Surfaced by '
'dashboard_api GET /health/ingest. freshness = ok|stale|error (stale = past 3x '
'the ingest_worker_rev.py cadence).';
-- Read-only access for the dashboard roles (guarded; idempotent).
DO $grants$
BEGIN
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'grafana_ro') THEN
GRANT SELECT ON reporting.v_ingest_health TO grafana_ro;
END IF;
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'dashboard_ro') THEN
GRANT SELECT ON reporting.v_ingest_health TO dashboard_ro;
END IF;
END $grants$;