-- 08_fix_etl_vehicle_key.sql -- Fixes two distinct bugs in dwh_gold.refresh_daily_metrics(): -- -- BUG-01a (type crash): the original function inserted t.imei (TEXT) into -- fact_daily_fleet_metrics.vehicle_key (INTEGER REFERENCES dim_vehicles), -- so every nightly call raised "invalid input syntax for type integer". -- -- BUG-01b (cartesian explosion): the original function joined -- trips × alarms in a single SELECT. For every trip row it produced one -- output row per matching alarm, multiplying every SUM/COUNT over trip -- columns by the per-IMEI alarm count. Spot-checking the broken output -- showed total_trips identical to alarm_count and drive_hours > 1000/day. -- -- The fix has three parts: -- 1. Seed dwh_gold.dim_vehicles from tracksolid.devices so every IMEI -- has a serial vehicle_key to point at. -- 2. Rewrite refresh_daily_metrics() so trip aggregates and alarm -- aggregates are computed in separate CTEs and then joined on imei. -- 3. Map IMEI → vehicle_key via dim_vehicles inside the same statement. BEGIN; -- ── 1. Backfill dim_vehicles ───────────────────────────────────────────────── -- One row per device. is_active mirrors enabled_flag; vehicle_number tracks -- the plate so dashboards can label charts without joining back to devices. INSERT INTO dwh_gold.dim_vehicles (imei, vehicle_number, is_active) SELECT d.imei, d.vehicle_number, COALESCE(d.enabled_flag, 1) = 1 FROM tracksolid.devices d ON CONFLICT (imei) DO UPDATE SET vehicle_number = EXCLUDED.vehicle_number, is_active = EXCLUDED.is_active; -- ── 2. Rewrite ETL function ────────────────────────────────────────────────── CREATE OR REPLACE FUNCTION dwh_gold.refresh_daily_metrics(target_date DATE) RETURNS void LANGUAGE plpgsql AS $$ BEGIN -- Sync dim_vehicles first so any IMEI seen in trips has a vehicle_key. -- Without this, a brand-new device would have trip rows but no -- dim_vehicles entry, and the JOIN below would drop its metrics. INSERT INTO dwh_gold.dim_vehicles (imei, vehicle_number, is_active) SELECT d.imei, d.vehicle_number, COALESCE(d.enabled_flag, 1) = 1 FROM tracksolid.devices d ON CONFLICT (imei) DO UPDATE SET vehicle_number = EXCLUDED.vehicle_number, is_active = EXCLUDED.is_active; -- Aggregate trips and alarms in separate CTEs to avoid the cartesian -- multiplication that the original single-SELECT version produced. WITH trip_agg AS ( SELECT t.imei, SUM(t.distance_km) AS total_distance_km, COUNT(*) AS total_trips, SUM(t.driving_time_s) AS total_drive_seconds, SUM(t.idle_time_s) AS total_idle_seconds, SUM(t.fuel_consumed_l) AS fuel_consumed_l, MIN(t.start_time AT TIME ZONE 'Africa/Nairobi')::TIME AS day_start_time, MAX(t.end_time AT TIME ZONE 'Africa/Nairobi')::TIME AS day_end_time, AVG(t.avg_speed_kmh) AS avg_speed_kmh, MAX(t.max_speed_kmh) AS peak_speed_kmh FROM tracksolid.trips t WHERE DATE(t.start_time AT TIME ZONE 'Africa/Nairobi') = target_date AND t.end_time IS NOT NULL GROUP BY t.imei ), alarm_agg AS ( SELECT a.imei, COUNT(*) AS alarm_count, COUNT(*) FILTER (WHERE a.alarm_type ILIKE '%speed%') AS overspeed_count FROM tracksolid.alarms a WHERE DATE(a.alarm_time AT TIME ZONE 'Africa/Nairobi') = target_date GROUP BY a.imei ) INSERT INTO dwh_gold.fact_daily_fleet_metrics ( day, vehicle_key, total_distance_km, total_trips, total_drive_hours, total_idle_hours, fuel_consumed_l, alarm_count, overspeed_count, day_start_time, day_end_time, avg_speed_kmh, peak_speed_kmh ) SELECT target_date AS day, dv.vehicle_key AS vehicle_key, ROUND(tr.total_distance_km::numeric, 3) AS total_distance_km, tr.total_trips AS total_trips, ROUND((tr.total_drive_seconds / 3600.0)::numeric, 2) AS total_drive_hours, ROUND((tr.total_idle_seconds / 3600.0)::numeric, 2) AS total_idle_hours, ROUND(tr.fuel_consumed_l::numeric, 3) AS fuel_consumed_l, COALESCE(al.alarm_count, 0) AS alarm_count, COALESCE(al.overspeed_count, 0) AS overspeed_count, tr.day_start_time AS day_start_time, tr.day_end_time AS day_end_time, ROUND(tr.avg_speed_kmh::numeric, 2) AS avg_speed_kmh, tr.peak_speed_kmh AS peak_speed_kmh FROM trip_agg tr JOIN dwh_gold.dim_vehicles dv ON dv.imei = tr.imei LEFT JOIN alarm_agg al ON al.imei = tr.imei ON CONFLICT (day, vehicle_key) DO UPDATE SET total_distance_km = EXCLUDED.total_distance_km, total_trips = EXCLUDED.total_trips, total_drive_hours = EXCLUDED.total_drive_hours, total_idle_hours = EXCLUDED.total_idle_hours, fuel_consumed_l = EXCLUDED.fuel_consumed_l, alarm_count = EXCLUDED.alarm_count, overspeed_count = EXCLUDED.overspeed_count, day_start_time = EXCLUDED.day_start_time, day_end_time = EXCLUDED.day_end_time, avg_speed_kmh = EXCLUDED.avg_speed_kmh, peak_speed_kmh = EXCLUDED.peak_speed_kmh; END; $$; COMMENT ON FUNCTION dwh_gold.refresh_daily_metrics(DATE) IS 'Populates or refreshes fact_daily_fleet_metrics for the given date. ' 'Trips and alarms are aggregated in separate CTEs to avoid cartesian ' 'multiplication. Maps IMEI → vehicle_key via dwh_gold.dim_vehicles. ' 'Call nightly: SELECT dwh_gold.refresh_daily_metrics(CURRENT_DATE - 1);'; COMMIT;