tracksolid_timescale_grafan.../08_fix_etl_vehicle_key.sql

105 lines
4.8 KiB
MySQL
Raw Normal View History

fix: BUG-01 ETL type crash, BUG-02 multi-account audit, BUG-03 diagnostic BUG-01 (CRITICAL): dwh_gold.refresh_daily_metrics inserted t.imei (TEXT) into fact_daily_fleet_metrics.vehicle_key (INTEGER REFERENCES dim_vehicles), so the nightly ETL would have raised "invalid input syntax for type integer" on every run. Migration 08 backfills dim_vehicles from tracksolid.devices and rewrites the function to JOIN through dim_vehicles, returning the serial vehicle_key. The function also re-syncs dim_vehicles at the top of each call so newly registered devices appear in the warehouse without manual seeding. BUG-02 (HIGH): sync_driver_audit.py only queried TARGET_ACCOUNT, ignoring the Fireside@HQ and Fireside_MSA sub-accounts. The audit now iterates TARGETS (matching FIX-M19 in ingest_movement_rev.sync_devices), dedupes devices by IMEI, and tolerates per-target failures. BUG-03 (HIGH, diagnostic only): the webhook trip handler stores item["miles"] straight into distance_km. The field name is suspicious and FIX-M16 already proved the polling endpoint mis-documents its units. Added a SQL diagnostic that compares the distribution of stored-km / great-circle-km for push-source vs poll-source trips over 30 days — the ratio test will tell us whether the push value needs a /1.609 (miles), /1000 (metres), or no conversion. The existing calculation is left unchanged until the data confirms the unit; the old FIX-M11 comment is replaced with a BUG-03 pointer to the diagnostic. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 12:34:43 +00:00
-- 08_fix_etl_vehicle_key.sql
-- Fixes BUG-01: dwh_gold.refresh_daily_metrics() inserted t.imei (TEXT) into
-- fact_daily_fleet_metrics.vehicle_key (INTEGER REFERENCES dim_vehicles), so
-- every nightly call raised "invalid input syntax for type integer".
--
-- The fix has two parts:
-- 1. Seed dwh_gold.dim_vehicles from tracksolid.devices so every IMEI has
-- a serial vehicle_key to point at.
-- 2. Rewrite refresh_daily_metrics() to JOIN through dim_vehicles and
-- SELECT the serial key instead of the raw IMEI. The function also
-- upserts dim_vehicles at the top of each run so newly-registered
-- devices appear in the warehouse without manual intervention.
BEGIN;
-- ── 1. Backfill dim_vehicles ─────────────────────────────────────────────────
-- One row per device. is_active mirrors enabled_flag; vehicle_number tracks
-- the plate so dashboards can label charts without joining back to devices.
INSERT INTO dwh_gold.dim_vehicles (imei, vehicle_number, is_active)
SELECT
d.imei,
d.vehicle_number,
COALESCE(d.enabled_flag, 1) = 1
FROM tracksolid.devices d
ON CONFLICT (imei) DO UPDATE SET
vehicle_number = EXCLUDED.vehicle_number,
is_active = EXCLUDED.is_active;
-- ── 2. Rewrite ETL function ──────────────────────────────────────────────────
CREATE OR REPLACE FUNCTION dwh_gold.refresh_daily_metrics(target_date DATE)
RETURNS void LANGUAGE plpgsql AS $$
BEGIN
-- Sync dim_vehicles first so any IMEI seen in trips has a vehicle_key.
-- Without this, a brand-new device would have trip rows but no
-- dim_vehicles entry, and the JOIN below would drop its metrics.
INSERT INTO dwh_gold.dim_vehicles (imei, vehicle_number, is_active)
SELECT
d.imei,
d.vehicle_number,
COALESCE(d.enabled_flag, 1) = 1
FROM tracksolid.devices d
ON CONFLICT (imei) DO UPDATE SET
vehicle_number = EXCLUDED.vehicle_number,
is_active = EXCLUDED.is_active;
INSERT INTO dwh_gold.fact_daily_fleet_metrics (
day,
vehicle_key,
total_distance_km,
total_trips,
total_drive_hours,
total_idle_hours,
fuel_consumed_l,
alarm_count,
overspeed_count,
day_start_time,
day_end_time,
avg_speed_kmh,
peak_speed_kmh
)
SELECT
target_date AS day,
dv.vehicle_key AS vehicle_key,
ROUND(SUM(t.distance_km)::numeric, 3) AS total_distance_km,
COUNT(*) AS total_trips,
ROUND((SUM(t.driving_time_s) / 3600.0)::numeric, 2) AS total_drive_hours,
ROUND((SUM(t.idle_time_s) / 3600.0)::numeric, 2) AS total_idle_hours,
ROUND(SUM(t.fuel_consumed_l)::numeric, 3) AS fuel_consumed_l,
COUNT(a.id) AS alarm_count,
COUNT(a.id) FILTER (WHERE a.alarm_type ILIKE '%speed%') AS overspeed_count,
MIN(t.start_time AT TIME ZONE 'Africa/Nairobi')::TIME AS day_start_time,
MAX(t.end_time AT TIME ZONE 'Africa/Nairobi')::TIME AS day_end_time,
ROUND(AVG(t.avg_speed_kmh)::numeric, 2) AS avg_speed_kmh,
MAX(t.max_speed_kmh) AS peak_speed_kmh
FROM tracksolid.trips t
JOIN dwh_gold.dim_vehicles dv
ON dv.imei = t.imei
LEFT JOIN tracksolid.alarms a
ON a.imei = t.imei
AND DATE(a.alarm_time AT TIME ZONE 'Africa/Nairobi') = target_date
WHERE DATE(t.start_time AT TIME ZONE 'Africa/Nairobi') = target_date
AND t.end_time IS NOT NULL
GROUP BY dv.vehicle_key
ON CONFLICT (day, vehicle_key) DO UPDATE SET
total_distance_km = EXCLUDED.total_distance_km,
total_trips = EXCLUDED.total_trips,
total_drive_hours = EXCLUDED.total_drive_hours,
total_idle_hours = EXCLUDED.total_idle_hours,
fuel_consumed_l = EXCLUDED.fuel_consumed_l,
alarm_count = EXCLUDED.alarm_count,
overspeed_count = EXCLUDED.overspeed_count,
day_start_time = EXCLUDED.day_start_time,
day_end_time = EXCLUDED.day_end_time,
avg_speed_kmh = EXCLUDED.avg_speed_kmh,
peak_speed_kmh = EXCLUDED.peak_speed_kmh;
END;
$$;
COMMENT ON FUNCTION dwh_gold.refresh_daily_metrics(DATE)
IS 'Populates or refreshes fact_daily_fleet_metrics for the given date. '
'Joins tracksolid.trips through dwh_gold.dim_vehicles to map IMEI → vehicle_key. '
'Call nightly: SELECT dwh_gold.refresh_daily_metrics(CURRENT_DATE - 1);';
COMMIT;