tracksolid_timescale_grafan.../migrations/23_tickets_raw_first.sql
david kiania cae64167eb
Some checks failed
Static Analysis / static (push) Waiting to run
Tests / test (push) Waiting to run
Static Analysis / static (pull_request) Has been cancelled
Tests / test (pull_request) Has been cancelled
feat(tickets): INC/CRQ tickets schema, geocoding + read-API
- migrations 21->23: dedicated `tickets` schema (tickets.inc / tickets.crq,
  raw-jsonb-first), geo_clusters + geo_locations gazetteers, geom-resolution
  trigger (feed -> location -> cluster -> none), reporting.fn_tickets_for_map
- dashboard_api: GET /webhook/tickets (INC/CRQ GeoJSON for the FleetOps map)
- tools/import_tickets.py: raw-first bucket ingest + cluster/location geocoding
  (LocationIQ/OpenCage, viewbox-bounded with a cluster-distance sanity guard)
- docs/CONNECTIONS.md: geocoder env var names

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 17:30:22 +03:00

246 lines
12 KiB
PL/PgSQL

-- 23_tickets_raw_first.sql
-- Make `raw` (jsonb) the single source of truth for tickets, and add a
-- location-level geocode cache so INC pins can be placed precisely (parsed from
-- region + cluster + location_name) instead of all stacking on a cluster centroid.
--
-- * tickets.inc / tickets.crq --> slim to (ticket_id, raw, geom, geo_source, ingested_at)
-- (the ~31 flattened typed columns are DROPPED; raw keeps them)
-- * tickets.geo_locations --> NEW: cleaned-location -> coordinates cache
-- * geom resolution --> feed(raw lat/lng) -> location(cache) -> cluster(gazetteer) -> none
--
-- The trigger/resolve/read-function are repointed to read from `raw`. The read
-- function reporting.fn_tickets_for_map keeps its SAME signature, so dashboard_api
-- and the FleetOps map need no change. Safe to re-apply.
SET search_path = tickets, public;
-- ── 1. slim the per-type tables to raw-first ─────────────────────────────────
DO $slim$
DECLARE c text;
BEGIN
FOREACH c IN ARRAY ARRAY[
'source_type','service_type','bucket','raw_status','normalized_status',
'created_at_service','scheduled_at','closed_at','last_seen_at','first_seen_at',
'week_start','week_end','cluster','region','location_name','latitude','longitude',
'department','assigned_team','owner','sla_status','mttr','is_auto_created',
'is_auto_closed','is_alarm','is_actionable','source_s3_bucket','source_s3_key',
'source_snapshot_id','created_at','updated_at']
LOOP
EXECUTE format('ALTER TABLE tickets.inc DROP COLUMN IF EXISTS %I', c);
EXECUTE format('ALTER TABLE tickets.crq DROP COLUMN IF EXISTS %I', c);
END LOOP;
END $slim$;
-- raw is now mandatory (all rows already carry it).
ALTER TABLE tickets.inc ALTER COLUMN raw SET NOT NULL;
ALTER TABLE tickets.crq ALTER COLUMN raw SET NOT NULL;
-- ── 2. location geocode cache ────────────────────────────────────────────────
-- query_key = tickets.norm_cluster(location_name): a normalised key on the source
-- location string (norm_cluster just upper/collapse/trims — generic). The loader
-- geocodes the *cleaned* place (region+cluster+location_name, codes stripped) but
-- caches under the raw location_name key so resolve_ticket_geoms can join in SQL
-- without re-deriving the regex.
CREATE TABLE IF NOT EXISTS tickets.geo_locations (
query_key text PRIMARY KEY,
location_name text,
cluster text,
region text,
query text, -- the cleaned string actually sent to the geocoder
lat double precision,
lng double precision,
geom geometry(Point, 4326),
confidence numeric,
provider text,
verified boolean NOT NULL DEFAULT false,
updated_at timestamptz NOT NULL DEFAULT now()
);
CREATE OR REPLACE FUNCTION tickets.tg_geo_locations_geom()
RETURNS trigger LANGUAGE plpgsql AS $fn$
BEGIN
IF NEW.lat IS NOT NULL AND NEW.lng IS NOT NULL
AND NEW.lat BETWEEN -90 AND 90 AND NEW.lng BETWEEN -180 AND 180
AND NOT (NEW.lat = 0 AND NEW.lng = 0) THEN
NEW.geom := ST_SetSRID(ST_MakePoint(NEW.lng, NEW.lat), 4326);
ELSE
NEW.geom := NULL;
END IF;
NEW.updated_at := now();
RETURN NEW;
END $fn$;
DROP TRIGGER IF EXISTS trg_geo_locations_geom ON tickets.geo_locations;
CREATE TRIGGER trg_geo_locations_geom BEFORE INSERT OR UPDATE ON tickets.geo_locations
FOR EACH ROW EXECUTE FUNCTION tickets.tg_geo_locations_geom();
-- ── 3. geom trigger — read from raw; never clobber a deliberate geom-only update ─
CREATE OR REPLACE FUNCTION tickets.tg_ticket_geom()
RETURNS trigger LANGUAGE plpgsql AS $fn$
DECLARE
v_lat double precision := NULLIF(NEW.raw->>'latitude','')::double precision;
v_lng double precision := NULLIF(NEW.raw->>'longitude','')::double precision;
g geometry(Point, 4326);
BEGIN
-- A geom/geo_source-only UPDATE (raw unchanged — e.g. resolve_ticket_geoms or the
-- location geocode pass) must keep the caller's geom, not recompute it.
IF TG_OP = 'UPDATE' AND NEW.raw IS NOT DISTINCT FROM OLD.raw THEN
RETURN NEW;
END IF;
IF v_lat IS NOT NULL AND v_lng IS NOT NULL
AND v_lat BETWEEN -90 AND 90 AND v_lng BETWEEN -180 AND 180
AND NOT (v_lat = 0 AND v_lng = 0) THEN
NEW.geom := ST_SetSRID(ST_MakePoint(v_lng, v_lat), 4326);
NEW.geo_source := 'feed';
ELSE
SELECT gc.geom INTO g FROM tickets.geo_clusters gc
WHERE gc.cluster_key = tickets.norm_cluster(NEW.raw->>'cluster') AND gc.geom IS NOT NULL LIMIT 1;
IF g IS NOT NULL THEN NEW.geom := g; NEW.geo_source := 'cluster';
ELSE NEW.geom := NULL; NEW.geo_source := 'none'; END IF;
END IF;
RETURN NEW;
END $fn$;
-- (triggers trg_inc_geom / trg_crq_geom from migration 22 already call this fn.)
-- ── 4. resolve — prefer location cache, else cluster centroid (non-feed rows) ──
CREATE OR REPLACE FUNCTION tickets.resolve_ticket_geoms()
RETURNS integer LANGUAGE plpgsql AS $fn$
DECLARE n integer; m integer;
BEGIN
UPDATE tickets.inc t
SET geom = COALESCE(loc.geom, gc.geom),
geo_source = CASE WHEN loc.geom IS NOT NULL THEN 'location'
WHEN gc.geom IS NOT NULL THEN 'cluster'
ELSE 'none' END
FROM tickets.inc base
LEFT JOIN tickets.geo_locations loc
ON loc.query_key = tickets.norm_cluster(base.raw->>'location_name') AND loc.geom IS NOT NULL
LEFT JOIN tickets.geo_clusters gc
ON gc.cluster_key = tickets.norm_cluster(base.raw->>'cluster') AND gc.geom IS NOT NULL
WHERE t.ticket_id = base.ticket_id
AND t.geo_source IS DISTINCT FROM 'feed'
AND (t.geom IS DISTINCT FROM COALESCE(loc.geom, gc.geom)
OR t.geo_source IS DISTINCT FROM CASE WHEN loc.geom IS NOT NULL THEN 'location'
WHEN gc.geom IS NOT NULL THEN 'cluster'
ELSE 'none' END);
GET DIAGNOSTICS n = ROW_COUNT;
UPDATE tickets.crq t
SET geom = COALESCE(loc.geom, gc.geom),
geo_source = CASE WHEN loc.geom IS NOT NULL THEN 'location'
WHEN gc.geom IS NOT NULL THEN 'cluster'
ELSE 'none' END
FROM tickets.crq base
LEFT JOIN tickets.geo_locations loc
ON loc.query_key = tickets.norm_cluster(base.raw->>'location_name') AND loc.geom IS NOT NULL
LEFT JOIN tickets.geo_clusters gc
ON gc.cluster_key = tickets.norm_cluster(base.raw->>'cluster') AND gc.geom IS NOT NULL
WHERE t.ticket_id = base.ticket_id
AND t.geo_source IS DISTINCT FROM 'feed'
AND (t.geom IS DISTINCT FROM COALESCE(loc.geom, gc.geom)
OR t.geo_source IS DISTINCT FROM CASE WHEN loc.geom IS NOT NULL THEN 'location'
WHEN gc.geom IS NOT NULL THEN 'cluster'
ELSE 'none' END);
GET DIAGNOSTICS m = ROW_COUNT;
RETURN n + m;
END $fn$;
-- ── 5. expression indexes (replace the dropped column indexes) ────────────────
CREATE INDEX IF NOT EXISTS ix_inc_status_raw ON tickets.inc ((raw->>'normalized_status'));
CREATE INDEX IF NOT EXISTS ix_inc_actionable_raw ON tickets.inc (((raw->>'is_actionable')::boolean))
WHERE (raw->>'is_actionable')::boolean;
CREATE INDEX IF NOT EXISTS ix_inc_cluster_raw ON tickets.inc (tickets.norm_cluster(raw->>'cluster'));
CREATE INDEX IF NOT EXISTS ix_inc_loc_raw ON tickets.inc (tickets.norm_cluster(raw->>'location_name'));
CREATE INDEX IF NOT EXISTS ix_inc_geom ON tickets.inc USING gist (geom);
CREATE INDEX IF NOT EXISTS ix_crq_status_raw ON tickets.crq ((raw->>'normalized_status'));
CREATE INDEX IF NOT EXISTS ix_crq_actionable_raw ON tickets.crq (((raw->>'is_actionable')::boolean))
WHERE (raw->>'is_actionable')::boolean;
CREATE INDEX IF NOT EXISTS ix_crq_cluster_raw ON tickets.crq (tickets.norm_cluster(raw->>'cluster'));
CREATE INDEX IF NOT EXISTS ix_crq_loc_raw ON tickets.crq (tickets.norm_cluster(raw->>'location_name'));
CREATE INDEX IF NOT EXISTS ix_crq_geom ON tickets.crq USING gist (geom);
-- ── 6. read function — extract every property from raw (same signature) ───────
CREATE OR REPLACE FUNCTION reporting.fn_tickets_for_map(
p_service_type text DEFAULT NULL,
p_status text DEFAULT NULL,
p_open_only boolean DEFAULT true
)
RETURNS jsonb LANGUAGE plpgsql STABLE AS $fn$
DECLARE v_result jsonb;
BEGIN
p_service_type := lower(NULLIF(p_service_type, ''));
p_status := NULLIF(p_status, '');
WITH filtered AS (
SELECT 'inc'::text AS service_type, raw, geom, geo_source FROM tickets.inc
WHERE geom IS NOT NULL
AND (p_service_type IS NULL OR p_service_type = 'inc')
AND (p_status IS NULL OR raw->>'normalized_status' = p_status)
AND (NOT p_open_only OR (raw->>'is_actionable')::boolean IS TRUE)
UNION ALL
SELECT 'crq'::text AS service_type, raw, geom, geo_source FROM tickets.crq
WHERE geom IS NOT NULL
AND (p_service_type IS NULL OR p_service_type = 'crq')
AND (p_status IS NULL OR raw->>'normalized_status' = p_status)
AND (NOT p_open_only OR (raw->>'is_actionable')::boolean IS TRUE)
)
SELECT jsonb_build_object(
'summary', jsonb_build_object(
'ticket_count', COUNT(*),
'inc', COUNT(*) FILTER (WHERE service_type = 'inc'),
'crq', COUNT(*) FILTER (WHERE service_type = 'crq'),
'open', COUNT(*) FILTER (WHERE (raw->>'is_actionable')::boolean IS TRUE),
'by_status', (SELECT jsonb_object_agg(s, c)
FROM (SELECT raw->>'normalized_status' AS s, COUNT(*) AS c
FROM filtered GROUP BY raw->>'normalized_status') z)
),
'geojson', jsonb_build_object(
'type', 'FeatureCollection',
'features', COALESCE(jsonb_agg(
jsonb_build_object(
'type', 'Feature',
'properties', jsonb_build_object(
'ticket_id', raw->>'ticket_id',
'service_type', service_type,
'status', raw->>'normalized_status',
'raw_status', raw->>'raw_status',
'cluster', raw->>'cluster',
'region', raw->>'region',
'location_name', raw->>'location_name',
'department', raw->>'department',
'owner', raw->>'owner',
'assigned_team', raw->>'assigned_team',
'sla_status', raw->>'sla_status',
'is_actionable', (raw->>'is_actionable')::boolean,
'geo_source', geo_source,
'created_at', raw->>'created_at_service',
'scheduled_at', raw->>'scheduled_at'
),
'geometry', ST_AsGeoJSON(geom)::jsonb
)
), '[]'::jsonb)
)
) INTO v_result FROM filtered;
RETURN v_result;
END $fn$;
COMMENT ON FUNCTION reporting.fn_tickets_for_map(text, text, boolean) IS
'INC/CRQ tickets (tickets.inc + tickets.crq, raw-jsonb-first) as GeoJSON. Migration 23.';
-- ── 7. grants ─────────────────────────────────────────────────────────────────
DO $grants$
BEGIN
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'tracksolid_owner') THEN
GRANT USAGE, CREATE ON SCHEMA tickets TO tracksolid_owner;
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA tickets TO tracksolid_owner;
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA tickets TO tracksolid_owner;
END IF;
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'dashboard_ro') THEN
GRANT USAGE ON SCHEMA tickets TO dashboard_ro;
GRANT SELECT ON tickets.inc, tickets.crq, tickets.geo_clusters, tickets.geo_locations TO dashboard_ro;
GRANT EXECUTE ON FUNCTION reporting.fn_tickets_for_map(text, text, boolean) TO dashboard_ro;
END IF;
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'grafana_ro') THEN
GRANT USAGE ON SCHEMA tickets TO grafana_ro;
GRANT SELECT ON tickets.inc, tickets.crq, tickets.geo_clusters, tickets.geo_locations TO grafana_ro;
GRANT EXECUTE ON FUNCTION reporting.fn_tickets_for_map(text, text, boolean) TO grafana_ro;
END IF;
END $grants$;