tracksolid_timescale_grafan.../dwh/261004_dwh_observability_views.sql

80 lines
3 KiB
MySQL
Raw Normal View History

-- =============================================================
-- DWH OBSERVABILITY VIEWS
-- Target Database: tracksolid_dwh
-- Purpose: Surface pipeline health for Grafana dashboards. Three views,
-- one concern each:
-- v_table_freshness — how long since each table was last loaded
-- v_recent_failures — failed runs in the last 24h
-- v_watermark_lag — per-table watermark vs. now
-- Applies after: 261001_dwh_control.sql
-- Readability: owned by dwh_owner → grafana_ro inherits SELECT via the
-- ALTER DEFAULT PRIVILEGES set in 261001. Explicit GRANT below
-- covers the case where defaults were set AFTER this file runs.
-- =============================================================
BEGIN;
SET ROLE dwh_owner;
-- 1. FRESHNESS
-- One row per table that has ever loaded successfully. `lag` drives the
-- freshness panel; `loads_last_24h` sanity-checks the cron cadence.
CREATE OR REPLACE VIEW dwh_control.v_table_freshness AS
SELECT
table_name,
MAX(run_finished_at) AS last_loaded_at,
NOW() - MAX(run_finished_at) AS lag,
COUNT(*) FILTER (WHERE run_started_at > NOW() - INTERVAL '24 hours') AS loads_last_24h
FROM dwh_control.extract_runs
WHERE status = 'loaded'
GROUP BY table_name;
COMMENT ON VIEW dwh_control.v_table_freshness IS
'Per-table load lag. Alert when lag > 4h during active hours (05:0023:00 EAT).';
-- 2. RECENT FAILURES
-- Failures retain error_message; the CSV stays in dwh/exports/ for the next
-- scheduled run to retry. Panel should show run_id so operators can grep logs.
CREATE OR REPLACE VIEW dwh_control.v_recent_failures AS
SELECT
run_id,
table_name,
run_started_at,
run_finished_at,
csv_path,
error_message
FROM dwh_control.extract_runs
WHERE status = 'failed'
AND run_started_at > NOW() - INTERVAL '24 hours'
ORDER BY run_started_at DESC;
COMMENT ON VIEW dwh_control.v_recent_failures IS
'Failed extract/load runs in the last 24h. Alert on any row.';
-- 3. WATERMARK LAG
-- Distinguishes "pipeline ran but found nothing" (load_lag small, extract_lag
-- growing) from "pipeline is stuck" (both lags growing). Snapshot tables are
-- not in extract_watermarks so they do not appear here — that is intentional.
CREATE OR REPLACE VIEW dwh_control.v_watermark_lag AS
SELECT
table_name,
last_extracted_at,
last_loaded_at,
rows_loaded_last_run,
NOW() - last_loaded_at AS load_lag,
NOW() - last_extracted_at AS extract_lag
FROM dwh_control.extract_watermarks;
COMMENT ON VIEW dwh_control.v_watermark_lag IS
'Per-table watermark position vs. now. Incremental tables only (6 rows).';
RESET ROLE;
-- Explicit grants: defensive in case ALTER DEFAULT PRIVILEGES from 261001
-- was not in effect when these views were created.
GRANT SELECT ON dwh_control.v_table_freshness TO grafana_ro;
GRANT SELECT ON dwh_control.v_recent_failures TO grafana_ro;
GRANT SELECT ON dwh_control.v_watermark_lag TO grafana_ro;
COMMIT;