From 073db9b5b8988f757d091de597b3753414ec22db Mon Sep 17 00:00:00 2001 From: david kiania Date: Mon, 15 Jun 2026 23:08:31 +0300 Subject: [PATCH] feat: unpack tickets.inc.raw into typed generated columns (migration 03) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add STORED generated columns derived from raw (text/numeric/bool/double + EAT timestamptz via an IMMUTABLE tickets.eat_ts() wrapper). Computed for all existing rows and auto-populated on every future ingest — raw stays the source of truth, no loader change. Indexes on status/cluster/team/closed_at/is_actionable for the SLA/team/closure queries. Co-Authored-By: Claude Opus 4.8 --- README.md | 9 ++++- migrations/03_inc_columns.sql | 74 +++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 migrations/03_inc_columns.sql diff --git a/README.md b/README.md index 624db1f..0c1ea13 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Field-ops **INC ticket** ingestion, geocoding, and read-schema that powers the |---|---| | `migrations/01_tickets_schema.sql` | The `tickets` schema: `tickets.inc` / `tickets.crq` (raw-jsonb-first), `tickets.geo_clusters` + `tickets.geo_locations` gazetteers, geom-resolution trigger, and `reporting.fn_tickets_for_map` (the GeoJSON read function) | | `migrations/02_import_meta.sql` | `tickets.import_meta` (per-dataset snapshot envelope metadata) + `fn_tickets_for_map` re-defined to expose it as `summary.freshness` (same signature — dashboard_api unchanged) | +| `migrations/03_inc_columns.sql` | Unpacks `tickets.inc.raw` into **typed STORED generated columns** (status, cluster, region, team, owner, sla_status, mttr, lat/lng, is_* booleans, and EAT→`timestamptz` timestamps via `tickets.eat_ts()`). Computed for all rows + auto-populated on every ingest; `raw` stays the source of truth | | `import_tickets.py` | Ingests the **newest INC CSV** from the rustfs `tickets` bucket (`automations/inc/.csv`) and upserts on `ticket_id`; geocodes clusters + INC locations | | `run_migrations.py` | Applies `migrations/*.sql` in order (ledger: `tickets.schema_migrations`) | | `shared.py` | Minimal DB/logging helpers (self-contained — no tracksolid dependency) | @@ -26,9 +27,13 @@ Field-ops **INC ticket** ingestion, geocoding, and read-schema that powers the ## Data model (raw-first) -Each row is just `ticket_id` + `raw` (the full source record as `jsonb`) + a derived +Each row is `ticket_id` + `raw` (the full source record as `jsonb`) + a derived `geom` / `geo_source`. Everything reads from `raw`, so a change to the source schema -needs no migration. `geom` is resolved: **feed** coords (`raw` lat/lng) → **location** +needs no migration. For convenient typed/indexable access, `raw` is also **unpacked +into STORED generated columns** (migration 03) — e.g. `normalized_status`, `cluster`, +`region`, `assigned_team`, `owner`, `sla_status`, `mttr`, `is_actionable`, +`created_at_service`/`closed_at` (as EAT→`timestamptz`). These stay in lock-step with +`raw` automatically (no loader change); `raw` remains the source of truth. `geom` is resolved: **feed** coords (`raw` lat/lng) → **location** (geocoded `location_name`) → **cluster** centroid → **none**. Source coordinates are empty in the feed, so geocoding is required: diff --git a/migrations/03_inc_columns.sql b/migrations/03_inc_columns.sql new file mode 100644 index 0000000..3948843 --- /dev/null +++ b/migrations/03_inc_columns.sql @@ -0,0 +1,74 @@ +-- 03_inc_columns.sql — fleettickets · unpack tickets.inc.raw into typed columns +-- ───────────────────────────────────────────────────────────────────────────── +-- Adds STORED generated columns derived from `raw`, so the INC dataset has real +-- typed, indexable columns while `raw` stays the source of truth (drift-safe). +-- STORED generated columns are computed for ALL existing rows on creation and +-- auto-recomputed on every future insert/update — no loader change needed. +-- +-- Timestamps are the source's EAT wall-clock; we convert them to timestamptz via +-- tickets.eat_ts(). text->timestamp is technically DateStyle-dependent (so the +-- raw cast is only STABLE and can't sit in a generated column), but our source is +-- unambiguous ISO 'YYYY-MM-DD HH24:MI:SS', so the result is genuinely invariant — +-- hence eat_ts() is safely declared IMMUTABLE, which lets it back a generated col. +-- +-- Idempotent: safe on a fresh DB and re-appliable on the live DB. +-- ───────────────────────────────────────────────────────────────────────────── + +SET search_path = tickets, public; + +-- EAT (Africa/Nairobi) text -> timestamptz; IMMUTABLE so it can back generated cols. +CREATE OR REPLACE FUNCTION tickets.eat_ts(p text) + RETURNS timestamptz LANGUAGE sql IMMUTABLE PARALLEL SAFE +AS $fn$ SELECT (NULLIF(p, '')::timestamp) AT TIME ZONE 'Africa/Nairobi' $fn$; + +ALTER TABLE tickets.inc + -- text + ADD COLUMN IF NOT EXISTS service_type text GENERATED ALWAYS AS (raw->>'service_type') STORED, + ADD COLUMN IF NOT EXISTS bucket text GENERATED ALWAYS AS (raw->>'bucket') STORED, + ADD COLUMN IF NOT EXISTS raw_status text GENERATED ALWAYS AS (raw->>'raw_status') STORED, + ADD COLUMN IF NOT EXISTS normalized_status text GENERATED ALWAYS AS (raw->>'normalized_status') STORED, + ADD COLUMN IF NOT EXISTS cluster text GENERATED ALWAYS AS (raw->>'cluster') STORED, + ADD COLUMN IF NOT EXISTS region text GENERATED ALWAYS AS (raw->>'region') STORED, + ADD COLUMN IF NOT EXISTS location_name text GENERATED ALWAYS AS (raw->>'location_name') STORED, + ADD COLUMN IF NOT EXISTS assigned_team text GENERATED ALWAYS AS (raw->>'assigned_team') STORED, + ADD COLUMN IF NOT EXISTS owner text GENERATED ALWAYS AS (raw->>'owner') STORED, + ADD COLUMN IF NOT EXISTS sla_status text GENERATED ALWAYS AS (raw->>'sla_status') STORED, + -- numeric / float + ADD COLUMN IF NOT EXISTS mttr numeric GENERATED ALWAYS AS (NULLIF(raw->>'mttr','')::numeric) STORED, + ADD COLUMN IF NOT EXISTS latitude double precision GENERATED ALWAYS AS (NULLIF(raw->>'latitude','')::double precision) STORED, + ADD COLUMN IF NOT EXISTS longitude double precision GENERATED ALWAYS AS (NULLIF(raw->>'longitude','')::double precision) STORED, + -- boolean + ADD COLUMN IF NOT EXISTS is_actionable boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_actionable','')::boolean) STORED, + ADD COLUMN IF NOT EXISTS is_auto_created boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_auto_created','')::boolean) STORED, + ADD COLUMN IF NOT EXISTS is_auto_closed boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_auto_closed','')::boolean) STORED, + ADD COLUMN IF NOT EXISTS is_alarm boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_alarm','')::boolean) STORED, + -- timestamps (EAT wall-clock -> timestamptz). created_at/updated_at are the + -- EXPORT pipeline's bookkeeping (not ticket lifecycle), hence the source_ prefix. + ADD COLUMN IF NOT EXISTS created_at_service timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'created_at_service')) STORED, + ADD COLUMN IF NOT EXISTS scheduled_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'scheduled_at')) STORED, + ADD COLUMN IF NOT EXISTS closed_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'closed_at')) STORED, + ADD COLUMN IF NOT EXISTS last_seen_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'last_seen_at')) STORED, + ADD COLUMN IF NOT EXISTS first_seen_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'first_seen_at')) STORED, + ADD COLUMN IF NOT EXISTS source_created_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'created_at')) STORED, + ADD COLUMN IF NOT EXISTS source_updated_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'updated_at')) STORED; + +-- indexes on the new typed columns (serve SLA / team / closure queries) +CREATE INDEX IF NOT EXISTS ix_inc_norm_status_col ON tickets.inc (normalized_status); +CREATE INDEX IF NOT EXISTS ix_inc_cluster_col ON tickets.inc (cluster); +CREATE INDEX IF NOT EXISTS ix_inc_assigned_team ON tickets.inc (assigned_team); +CREATE INDEX IF NOT EXISTS ix_inc_closed_at ON tickets.inc (closed_at); +CREATE INDEX IF NOT EXISTS ix_inc_actionable_col ON tickets.inc (is_actionable) WHERE is_actionable; + +-- drop the throwaway probe function from interactive testing, if present +DROP FUNCTION IF EXISTS tickets._eat_ts_test(text); + +-- grants (guarded: roles may not exist on a fresh DB) +DO $grants$ +BEGIN + IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'dashboard_ro') THEN + GRANT EXECUTE ON FUNCTION tickets.eat_ts(text) TO dashboard_ro; + END IF; + IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'grafana_ro') THEN + GRANT EXECUTE ON FUNCTION tickets.eat_ts(text) TO grafana_ro; + END IF; +END $grants$;