Verified each finding against the code (+ profiled the 31k-row CSV sample); implemented only the genuinely valid fixes: - import_tickets.py: fold _record_meta into the upsert transaction so rows + snapshot meta commit atomically (BUG 2); guard _ts_from_key against regex-matching-but-invalid dates so the sort can't crash (BUG 11); extract_place now splits glued NW prefixes (~1.7k rows, e.g. NWKIAMBU→KIAMBU) and only drops a trailing '-<seg>' when it's a unit/instruction code, keeping real-word tails like '-MALL' (BUG 14). Scoped glued-split to NW only — CO/NE/SE begin real words (COAST/NEW/SEASONS) per the data. - Dockerfile + pyproject.toml: install from pyproject (single source of truth) instead of mirroring deps; add build-system + py-modules so `pip install .` works for the flat-module layout (BUG 9). - migrations/03_inc_columns.sql: document the eat_ts IMMUTABLE/tzdata footgun and the manual-recompute path (BUG 6). - .gitignore: narrow *.json → *.local.json so real fixtures can be versioned; ignore build/ and *.egg-info/ (BUG 10). Reclassified/skipped as invalid or by-design: BUG 1, 3, 4, 5, 7, 8, 12, 13. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
79 lines
5.9 KiB
SQL
79 lines
5.9 KiB
SQL
-- 03_inc_columns.sql — fleettickets · unpack tickets.inc.raw into typed columns
|
|
-- ─────────────────────────────────────────────────────────────────────────────
|
|
-- Adds STORED generated columns derived from `raw`, so the INC dataset has real
|
|
-- typed, indexable columns while `raw` stays the source of truth (drift-safe).
|
|
-- STORED generated columns are computed for ALL existing rows on creation and
|
|
-- auto-recomputed on every future insert/update — no loader change needed.
|
|
--
|
|
-- Timestamps are the source's EAT wall-clock; we convert them to timestamptz via
|
|
-- tickets.eat_ts(). text->timestamp is technically DateStyle-dependent (so the
|
|
-- raw cast is only STABLE and can't sit in a generated column), but our source is
|
|
-- unambiguous ISO 'YYYY-MM-DD HH24:MI:SS', so the result is genuinely invariant —
|
|
-- hence eat_ts() is safely declared IMMUTABLE, which lets it back a generated col.
|
|
--
|
|
-- Idempotent: safe on a fresh DB and re-appliable on the live DB.
|
|
-- ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
SET search_path = tickets, public;
|
|
|
|
-- EAT (Africa/Nairobi) text -> timestamptz; IMMUTABLE so it can back generated cols.
|
|
-- FOOTGUN: 'AT TIME ZONE' resolves against the OS tzdata, so IMMUTABLE is a slight
|
|
-- lie. It's safe here because Kenya is fixed at UTC+3 (no DST, no pending changes),
|
|
-- so the result is genuinely invariant. If that ever changes, a tzdata update will
|
|
-- NOT recompute the STORED generated columns below — they'd need a manual rebuild
|
|
-- (PG17+: ALTER TABLE tickets.inc ALTER COLUMN <col> SET EXPRESSION AS (...same...)).
|
|
CREATE OR REPLACE FUNCTION tickets.eat_ts(p text)
|
|
RETURNS timestamptz LANGUAGE sql IMMUTABLE PARALLEL SAFE
|
|
AS $fn$ SELECT (NULLIF(p, '')::timestamp) AT TIME ZONE 'Africa/Nairobi' $fn$;
|
|
|
|
ALTER TABLE tickets.inc
|
|
-- text
|
|
ADD COLUMN IF NOT EXISTS service_type text GENERATED ALWAYS AS (raw->>'service_type') STORED,
|
|
ADD COLUMN IF NOT EXISTS bucket text GENERATED ALWAYS AS (raw->>'bucket') STORED,
|
|
ADD COLUMN IF NOT EXISTS raw_status text GENERATED ALWAYS AS (raw->>'raw_status') STORED,
|
|
ADD COLUMN IF NOT EXISTS normalized_status text GENERATED ALWAYS AS (raw->>'normalized_status') STORED,
|
|
ADD COLUMN IF NOT EXISTS cluster text GENERATED ALWAYS AS (raw->>'cluster') STORED,
|
|
ADD COLUMN IF NOT EXISTS region text GENERATED ALWAYS AS (raw->>'region') STORED,
|
|
ADD COLUMN IF NOT EXISTS location_name text GENERATED ALWAYS AS (raw->>'location_name') STORED,
|
|
ADD COLUMN IF NOT EXISTS assigned_team text GENERATED ALWAYS AS (raw->>'assigned_team') STORED,
|
|
ADD COLUMN IF NOT EXISTS owner text GENERATED ALWAYS AS (raw->>'owner') STORED,
|
|
ADD COLUMN IF NOT EXISTS sla_status text GENERATED ALWAYS AS (raw->>'sla_status') STORED,
|
|
-- numeric / float
|
|
ADD COLUMN IF NOT EXISTS mttr numeric GENERATED ALWAYS AS (NULLIF(raw->>'mttr','')::numeric) STORED,
|
|
ADD COLUMN IF NOT EXISTS latitude double precision GENERATED ALWAYS AS (NULLIF(raw->>'latitude','')::double precision) STORED,
|
|
ADD COLUMN IF NOT EXISTS longitude double precision GENERATED ALWAYS AS (NULLIF(raw->>'longitude','')::double precision) STORED,
|
|
-- boolean
|
|
ADD COLUMN IF NOT EXISTS is_actionable boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_actionable','')::boolean) STORED,
|
|
ADD COLUMN IF NOT EXISTS is_auto_created boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_auto_created','')::boolean) STORED,
|
|
ADD COLUMN IF NOT EXISTS is_auto_closed boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_auto_closed','')::boolean) STORED,
|
|
ADD COLUMN IF NOT EXISTS is_alarm boolean GENERATED ALWAYS AS (NULLIF(raw->>'is_alarm','')::boolean) STORED,
|
|
-- timestamps (EAT wall-clock -> timestamptz). created_at/updated_at are the
|
|
-- EXPORT pipeline's bookkeeping (not ticket lifecycle), hence the source_ prefix.
|
|
ADD COLUMN IF NOT EXISTS created_at_service timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'created_at_service')) STORED,
|
|
ADD COLUMN IF NOT EXISTS scheduled_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'scheduled_at')) STORED,
|
|
ADD COLUMN IF NOT EXISTS closed_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'closed_at')) STORED,
|
|
ADD COLUMN IF NOT EXISTS last_seen_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'last_seen_at')) STORED,
|
|
ADD COLUMN IF NOT EXISTS first_seen_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'first_seen_at')) STORED,
|
|
ADD COLUMN IF NOT EXISTS source_created_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'created_at')) STORED,
|
|
ADD COLUMN IF NOT EXISTS source_updated_at timestamptz GENERATED ALWAYS AS (tickets.eat_ts(raw->>'updated_at')) STORED;
|
|
|
|
-- indexes on the new typed columns (serve SLA / team / closure queries)
|
|
CREATE INDEX IF NOT EXISTS ix_inc_norm_status_col ON tickets.inc (normalized_status);
|
|
CREATE INDEX IF NOT EXISTS ix_inc_cluster_col ON tickets.inc (cluster);
|
|
CREATE INDEX IF NOT EXISTS ix_inc_assigned_team ON tickets.inc (assigned_team);
|
|
CREATE INDEX IF NOT EXISTS ix_inc_closed_at ON tickets.inc (closed_at);
|
|
CREATE INDEX IF NOT EXISTS ix_inc_actionable_col ON tickets.inc (is_actionable) WHERE is_actionable;
|
|
|
|
-- drop the throwaway probe function from interactive testing, if present
|
|
DROP FUNCTION IF EXISTS tickets._eat_ts_test(text);
|
|
|
|
-- grants (guarded: roles may not exist on a fresh DB)
|
|
DO $grants$
|
|
BEGIN
|
|
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'dashboard_ro') THEN
|
|
GRANT EXECUTE ON FUNCTION tickets.eat_ts(text) TO dashboard_ro;
|
|
END IF;
|
|
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'grafana_ro') THEN
|
|
GRANT EXECUTE ON FUNCTION tickets.eat_ts(text) TO grafana_ro;
|
|
END IF;
|
|
END $grants$;
|