fleettickets/migrations/03_inc_columns.sql
david kiania dca2c94c75 fix: address valid findings from 20260618 bug report
Verified each finding against the code (+ profiled the 31k-row CSV sample);
implemented only the genuinely valid fixes:

- import_tickets.py: fold _record_meta into the upsert transaction so rows +
  snapshot meta commit atomically (BUG 2); guard _ts_from_key against
  regex-matching-but-invalid dates so the sort can't crash (BUG 11);
  extract_place now splits glued NW prefixes (~1.7k rows, e.g. NWKIAMBU→KIAMBU)
  and only drops a trailing '-<seg>' when it's a unit/instruction code, keeping
  real-word tails like '-MALL' (BUG 14). Scoped glued-split to NW only —
  CO/NE/SE begin real words (COAST/NEW/SEASONS) per the data.
- Dockerfile + pyproject.toml: install from pyproject (single source of truth)
  instead of mirroring deps; add build-system + py-modules so `pip install .`
  works for the flat-module layout (BUG 9).
- migrations/03_inc_columns.sql: document the eat_ts IMMUTABLE/tzdata footgun
  and the manual-recompute path (BUG 6).
- .gitignore: narrow *.json → *.local.json so real fixtures can be versioned;
  ignore build/ and *.egg-info/ (BUG 10).

Reclassified/skipped as invalid or by-design: BUG 1, 3, 4, 5, 7, 8, 12, 13.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 13:41:38 +03:00

79 lines
5.9 KiB
SQL

-- 03_inc_columns.sql — fleettickets · unpack tickets.inc.raw into typed columns
-- ─────────────────────────────────────────────────────────────────────────────
-- Adds STORED generated columns derived from `raw`, so the INC dataset has real
-- typed, indexable columns while `raw` stays the source of truth (drift-safe).
-- STORED generated columns are computed for ALL existing rows on creation and
-- auto-recomputed on every future insert/update — no loader change needed.
--
-- Timestamps are the source's EAT wall-clock; we convert them to timestamptz via
-- tickets.eat_ts(). text->timestamp is technically DateStyle-dependent (so the
-- raw cast is only STABLE and can't sit in a generated column), but our source is
-- unambiguous ISO 'YYYY-MM-DD HH24:MI:SS', so the result is genuinely invariant —
-- hence eat_ts() is safely declared IMMUTABLE, which lets it back a generated col.
--
-- Idempotent: safe on a fresh DB and re-appliable on the live DB.
-- ─────────────────────────────────────────────────────────────────────────────
SET search_path = tickets, public;
-- EAT (Africa/Nairobi) text -> timestamptz; IMMUTABLE so it can back generated cols.
-- FOOTGUN: 'AT TIME ZONE' resolves against the OS tzdata, so IMMUTABLE is a slight
-- lie. It's safe here because Kenya is fixed at UTC+3 (no DST, no pending changes),
-- so the result is genuinely invariant. If that ever changes, a tzdata update will
-- NOT recompute the STORED generated columns below — they'd need a manual rebuild
-- (PG17+: ALTER TABLE tickets.inc ALTER COLUMN <col> SET EXPRESSION AS (...same...)).
CREATE OR REPLACE FUNCTION tickets.eat_ts(p text)
RETURNS timestamptz LANGUAGE sql IMMUTABLE PARALLEL SAFE
AS $fn$ SELECT (NULLIF(p, '')::timestamp) AT TIME ZONE 'Africa/Nairobi' $fn$;
ALTER TABLE tickets.inc
-- text
ADD COLUMN IF NOT EXISTS service_type text GENERATED ALWAYS AS (raw->>'service_type') STORED,
ADD COLUMN IF NOT EXISTS bucket text GENERATED ALWAYS AS (raw->>'bucket') STORED,
ADD COLUMN IF NOT EXISTS raw_status text GENERATED ALWAYS AS (raw->>'raw_status') STORED,
ADD COLUMN IF NOT EXISTS normalized_status text GENERATED ALWAYS AS (raw->>'normalized_status') STORED,
ADD COLUMN IF NOT EXISTS cluster text GENERATED ALWAYS AS (raw->>'cluster') STORED,
ADD COLUMN IF NOT EXISTS region text GENERATED ALWAYS AS (raw->>'region') STORED,
ADD COLUMN IF NOT EXISTS location_name text GENERATED ALWAYS AS (raw->>'location_name') STORED,
ADD COLUMN IF NOT EXISTS assigned_team text GENERATED ALWAYS AS (raw->>'assigned_team') STORED,
ADD COLUMN IF NOT EXISTS owner text GENERATED ALWAYS AS (raw->>'owner') STORED,
ADD COLUMN IF NOT EXISTS sla_status text GENERATED ALWAYS AS (raw->>'sla_status') STORED,
-- numeric / float
ADD COLUMN IF NOT EXISTS mttr numeric GENERATED ALWAYS AS (NULLIF(raw->>'mttr','')::numeric) STORED,
ADD COLUMN IF NOT EXISTS latitude double precision GENERATED ALWAYS AS (NULLIF(raw->>'latitude','')::double precision) STORED,
ADD COLUMN IF NOT EXISTS longitude double precision GENERATED ALWAYS AS (NULLIF(raw->>'longitude','')::double precision) STORED,
-- boolean
ADD COLUMN IF NOT EXISTS is_actionable boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_actionable','')::boolean) STORED,
ADD COLUMN IF NOT EXISTS is_auto_created boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_auto_created','')::boolean) STORED,
ADD COLUMN IF NOT EXISTS is_auto_closed boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_auto_closed','')::boolean) STORED,
ADD COLUMN IF NOT EXISTS is_alarm boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_alarm','')::boolean) STORED,
-- timestamps (EAT wall-clock -> timestamptz). created_at/updated_at are the
-- EXPORT pipeline's bookkeeping (not ticket lifecycle), hence the source_ prefix.
ADD COLUMN IF NOT EXISTS created_at_service timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'created_at_service')) STORED,
ADD COLUMN IF NOT EXISTS scheduled_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'scheduled_at')) STORED,
ADD COLUMN IF NOT EXISTS closed_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'closed_at')) STORED,
ADD COLUMN IF NOT EXISTS last_seen_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'last_seen_at')) STORED,
ADD COLUMN IF NOT EXISTS first_seen_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'first_seen_at')) STORED,
ADD COLUMN IF NOT EXISTS source_created_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'created_at')) STORED,
ADD COLUMN IF NOT EXISTS source_updated_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'updated_at')) STORED;
-- indexes on the new typed columns (serve SLA / team / closure queries)
CREATE INDEX IF NOT EXISTS ix_inc_norm_status_col ON tickets.inc (normalized_status);
CREATE INDEX IF NOT EXISTS ix_inc_cluster_col ON tickets.inc (cluster);
CREATE INDEX IF NOT EXISTS ix_inc_assigned_team ON tickets.inc (assigned_team);
CREATE INDEX IF NOT EXISTS ix_inc_closed_at ON tickets.inc (closed_at);
CREATE INDEX IF NOT EXISTS ix_inc_actionable_col ON tickets.inc (is_actionable) WHERE is_actionable;
-- drop the throwaway probe function from interactive testing, if present
DROP FUNCTION IF EXISTS tickets._eat_ts_test(text);
-- grants (guarded: roles may not exist on a fresh DB)
DO $grants$
BEGIN
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'dashboard_ro') THEN
GRANT EXECUTE ON FUNCTION tickets.eat_ts(text) TO dashboard_ro;
END IF;
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'grafana_ro') THEN
GRANT EXECUTE ON FUNCTION tickets.eat_ts(text) TO grafana_ro;
END IF;
END $grants$;