80 lines
3 KiB
MySQL
80 lines
3 KiB
MySQL
|
|
-- =============================================================
|
|||
|
|
-- DWH OBSERVABILITY VIEWS
|
|||
|
|
-- Target Database: tracksolid_dwh
|
|||
|
|
-- Purpose: Surface pipeline health for Grafana dashboards. Three views,
|
|||
|
|
-- one concern each:
|
|||
|
|
-- v_table_freshness — how long since each table was last loaded
|
|||
|
|
-- v_recent_failures — failed runs in the last 24h
|
|||
|
|
-- v_watermark_lag — per-table watermark vs. now
|
|||
|
|
-- Applies after: 261001_dwh_control.sql
|
|||
|
|
-- Readability: owned by dwh_owner → grafana_ro inherits SELECT via the
|
|||
|
|
-- ALTER DEFAULT PRIVILEGES set in 261001. Explicit GRANT below
|
|||
|
|
-- covers the case where defaults were set AFTER this file runs.
|
|||
|
|
-- =============================================================
|
|||
|
|
|
|||
|
|
BEGIN;
|
|||
|
|
|
|||
|
|
SET ROLE dwh_owner;
|
|||
|
|
|
|||
|
|
-- 1. FRESHNESS
|
|||
|
|
-- One row per table that has ever loaded successfully. `lag` drives the
|
|||
|
|
-- freshness panel; `loads_last_24h` sanity-checks the cron cadence.
|
|||
|
|
CREATE OR REPLACE VIEW dwh_control.v_table_freshness AS
|
|||
|
|
SELECT
|
|||
|
|
table_name,
|
|||
|
|
MAX(run_finished_at) AS last_loaded_at,
|
|||
|
|
NOW() - MAX(run_finished_at) AS lag,
|
|||
|
|
COUNT(*) FILTER (WHERE run_started_at > NOW() - INTERVAL '24 hours') AS loads_last_24h
|
|||
|
|
FROM dwh_control.extract_runs
|
|||
|
|
WHERE status = 'loaded'
|
|||
|
|
GROUP BY table_name;
|
|||
|
|
|
|||
|
|
COMMENT ON VIEW dwh_control.v_table_freshness IS
|
|||
|
|
'Per-table load lag. Alert when lag > 4h during active hours (05:00–23:00 EAT).';
|
|||
|
|
|
|||
|
|
-- 2. RECENT FAILURES
|
|||
|
|
-- Failures retain error_message; the CSV stays in dwh/exports/ for the next
|
|||
|
|
-- scheduled run to retry. Panel should show run_id so operators can grep logs.
|
|||
|
|
CREATE OR REPLACE VIEW dwh_control.v_recent_failures AS
|
|||
|
|
SELECT
|
|||
|
|
run_id,
|
|||
|
|
table_name,
|
|||
|
|
run_started_at,
|
|||
|
|
run_finished_at,
|
|||
|
|
csv_path,
|
|||
|
|
error_message
|
|||
|
|
FROM dwh_control.extract_runs
|
|||
|
|
WHERE status = 'failed'
|
|||
|
|
AND run_started_at > NOW() - INTERVAL '24 hours'
|
|||
|
|
ORDER BY run_started_at DESC;
|
|||
|
|
|
|||
|
|
COMMENT ON VIEW dwh_control.v_recent_failures IS
|
|||
|
|
'Failed extract/load runs in the last 24h. Alert on any row.';
|
|||
|
|
|
|||
|
|
-- 3. WATERMARK LAG
|
|||
|
|
-- Distinguishes "pipeline ran but found nothing" (load_lag small, extract_lag
|
|||
|
|
-- growing) from "pipeline is stuck" (both lags growing). Snapshot tables are
|
|||
|
|
-- not in extract_watermarks so they do not appear here — that is intentional.
|
|||
|
|
CREATE OR REPLACE VIEW dwh_control.v_watermark_lag AS
|
|||
|
|
SELECT
|
|||
|
|
table_name,
|
|||
|
|
last_extracted_at,
|
|||
|
|
last_loaded_at,
|
|||
|
|
rows_loaded_last_run,
|
|||
|
|
NOW() - last_loaded_at AS load_lag,
|
|||
|
|
NOW() - last_extracted_at AS extract_lag
|
|||
|
|
FROM dwh_control.extract_watermarks;
|
|||
|
|
|
|||
|
|
COMMENT ON VIEW dwh_control.v_watermark_lag IS
|
|||
|
|
'Per-table watermark position vs. now. Incremental tables only (6 rows).';
|
|||
|
|
|
|||
|
|
RESET ROLE;
|
|||
|
|
|
|||
|
|
-- Explicit grants: defensive in case ALTER DEFAULT PRIVILEGES from 261001
|
|||
|
|
-- was not in effect when these views were created.
|
|||
|
|
GRANT SELECT ON dwh_control.v_table_freshness TO grafana_ro;
|
|||
|
|
GRANT SELECT ON dwh_control.v_recent_failures TO grafana_ro;
|
|||
|
|
GRANT SELECT ON dwh_control.v_watermark_lag TO grafana_ro;
|
|||
|
|
|
|||
|
|
COMMIT;
|