Phase 0 of the three-stakeholder analytics redesign: - 08_analytics_config.sql: ops.cost_rates + ops.kpi_targets with seed fuel rates (KES 195/L NBO+MBA, UGX 5200/L KLA) and 6 seed KPI targets (utilisation_pct, idle_pct global+osp-patrol, fuel_kes_per_100km, mttr_hours, alarms_per_100km). Granted SELECT to grafana_ro. Wired into run_migrations.py MIGRATIONS. - import_drivers_csv.py: full rewrite for the new Mitieng CSV (20260427_FSG_Vehicles_mitieng.csv). Snake_case columns, drops _infer_city() plate-prefix logic in favour of reading assigned_city directly. Adds cost_centre, assigned_route, vehicle_category, vehicle_brand, fuel_100km, depot_address. Treats the literal "NULL" string as missing. Reuses clean(), clean_num(), clean_ts(), get_conn(), get_logger() from ts_shared_rev. Special-cases numeric and timestamptz columns in the UPDATE clause. - audit_device_reconciliation.py: read-only audit comparing the CSV against tracksolid.devices. Reports per-account row counts, IMEIs on one side only, and devices on both sides whose metadata is still NULL. - 260427_device_reconciliation.md + 260427_audit_output.txt: Phase 0.2 reconciliation record. First run: DB has 172 devices, CSV has 162, delta +10 (10 IMEIs in DB-only, mostly fireside-account auto-syncs). Importer run with --only-null --apply filled 154 rows; coverage now assigned_city 152/172, cost_centre 150/172. Applied to stage on 2026-04-27 23:35 UTC. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
289 lines
11 KiB
Python
289 lines
11 KiB
Python
"""
|
|
import_drivers_csv.py — Fireside Communications · Driver & Vehicle CSV Import
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
One-shot script: reads the snake_case Fireside Group vehicle CSV
|
|
(`20260427_FSG_Vehicles_mitieng.csv`), compares each row against the
|
|
current `tracksolid.devices` values, and updates the DB.
|
|
|
|
The CSV columns mirror the DB schema directly — no inference. Cells with the
|
|
literal string "NULL" are treated as missing.
|
|
|
|
Fields imported (per Phase 0.1 of the Business Analytics redesign plan):
|
|
Identity : driver_name, driver_phone, vehicle_number, vehicle_name,
|
|
vehicle_models, mc_type, device_name
|
|
SIM : sim, iccid, imsi
|
|
Lifecycle : activation_time, expiration
|
|
Business meta : assigned_city, cost_centre, assigned_route,
|
|
vehicle_category, vehicle_brand, fuel_100km, depot_address
|
|
|
|
`depot_geom` (PostGIS Point) is intentionally NOT imported — needs WKT and
|
|
isn't present as coordinates in the CSV. Set it via a follow-up migration
|
|
when geofences are loaded.
|
|
|
|
Usage:
|
|
# Dry-run — shows diff, writes nothing
|
|
python import_drivers_csv.py
|
|
|
|
# Filter to a single IMEI (dry-run)
|
|
python import_drivers_csv.py --imei 862798052707896
|
|
|
|
# Apply all changes to DB
|
|
python import_drivers_csv.py --apply
|
|
|
|
# Only fill fields that are currently NULL in the DB (never overwrite)
|
|
python import_drivers_csv.py --only-null --apply
|
|
|
|
# Use a different CSV
|
|
python import_drivers_csv.py --csv path/to/file.csv
|
|
|
|
Pre-requisite:
|
|
Migrations 02, 05, 06 must be applied (they add the metadata columns).
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
"""
|
|
|
|
import argparse
|
|
import csv
|
|
from pathlib import Path
|
|
|
|
from ts_shared_rev import clean, clean_num, clean_ts, get_conn, get_logger
|
|
|
|
log = get_logger("csv_import")
|
|
|
|
DEFAULT_CSV_PATH = Path(__file__).parent / "20260427_FSG_Vehicles_mitieng.csv"
|
|
|
|
# Columns fetched from DB for diff comparison.
|
|
DB_COLS = [
|
|
"imei",
|
|
# Identity
|
|
"driver_name", "driver_phone", "vehicle_number", "vehicle_name",
|
|
"vehicle_models", "mc_type", "device_name",
|
|
# SIM
|
|
"sim", "iccid", "imsi",
|
|
# Lifecycle
|
|
"activation_time", "expiration",
|
|
# Business metadata (Phase 0.1 additions)
|
|
"assigned_city", "cost_centre", "assigned_route",
|
|
"vehicle_category", "vehicle_brand", "fuel_100km", "depot_address",
|
|
]
|
|
|
|
# Driver Name values that are placeholders — skip writing driver_name for these.
|
|
_DRIVER_SKIP = {"identification", "ug"}
|
|
|
|
# Columns that need an explicit cast in the UPDATE statement.
|
|
_TIMESTAMPTZ_COLS = {"activation_time", "expiration"}
|
|
_NUMERIC_COLS = {"fuel_100km"}
|
|
|
|
|
|
def _read(row: dict, col: str) -> str | None:
|
|
"""Read a CSV column treating literal 'NULL'/'None' (case-insensitive) as missing."""
|
|
v = clean(row.get(col))
|
|
if v is None:
|
|
return None
|
|
return None if v.upper() in ("NULL", "NONE") else v
|
|
|
|
|
|
def _read_num(row: dict, col: str) -> float | None:
|
|
v = _read(row, col)
|
|
return clean_num(v) if v is not None else None
|
|
|
|
|
|
def _read_ts(row: dict, col: str) -> str | None:
|
|
v = _read(row, col)
|
|
return clean_ts(v) if v is not None else None
|
|
|
|
|
|
def load_csv(csv_path: Path) -> dict[str, dict]:
|
|
"""Load CSV into a dict keyed by IMEI."""
|
|
rows: dict[str, dict] = {}
|
|
with open(csv_path, encoding="utf-8-sig", newline="") as f:
|
|
for row in csv.DictReader(f):
|
|
imei = (row.get("imei") or "").strip()
|
|
if not imei:
|
|
continue
|
|
rows[imei] = row
|
|
log.info("CSV loaded: %d rows from %s", len(rows), csv_path.name)
|
|
return rows
|
|
|
|
|
|
def load_db_devices() -> dict[str, dict]:
|
|
"""Fetch current device rows from DB, keyed by IMEI."""
|
|
devices: dict[str, dict] = {}
|
|
with get_conn() as conn:
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"SELECT {', '.join(DB_COLS)} FROM tracksolid.devices")
|
|
col_names = [d[0] for d in cur.description]
|
|
for row in cur.fetchall():
|
|
rec = dict(zip(col_names, row))
|
|
devices[rec["imei"]] = rec
|
|
log.info("DB loaded: %d devices", len(devices))
|
|
return devices
|
|
|
|
|
|
def build_update(csv_row: dict, db_row: dict | None, only_null: bool) -> dict[str, object]:
|
|
"""
|
|
Return a dict of column→new_value for fields that need updating.
|
|
When only_null=True, skip any DB column that already has a value.
|
|
The driver_name column is skipped for placeholder-labelled devices.
|
|
"""
|
|
driver_raw = (_read(csv_row, "driver_name") or "")
|
|
is_placeholder = driver_raw.lower() in _DRIVER_SKIP
|
|
if driver_raw.lower() == "identification":
|
|
return {}
|
|
|
|
proposed: dict[str, object] = {
|
|
# Identity
|
|
"driver_phone": _read(csv_row, "driver_phone"),
|
|
"vehicle_number": _read(csv_row, "vehicle_number"),
|
|
"vehicle_name": _read(csv_row, "vehicle_name"),
|
|
"vehicle_models": _read(csv_row, "vehicle_models"),
|
|
"mc_type": _read(csv_row, "mc_type"),
|
|
"device_name": _read(csv_row, "device_name"),
|
|
# SIM
|
|
"sim": _read(csv_row, "sim"),
|
|
"iccid": _read(csv_row, "iccid"),
|
|
"imsi": _read(csv_row, "imsi"),
|
|
# Lifecycle
|
|
"activation_time": _read_ts(csv_row, "activation_time"),
|
|
"expiration": _read_ts(csv_row, "expiration"),
|
|
# Business metadata
|
|
"assigned_city": _read(csv_row, "assigned_city"),
|
|
"cost_centre": _read(csv_row, "cost_centre"),
|
|
"assigned_route": _read(csv_row, "assigned_route"),
|
|
"vehicle_category": _read(csv_row, "vehicle_category"),
|
|
"vehicle_brand": _read(csv_row, "vehicle_brand"),
|
|
"fuel_100km": _read_num(csv_row, "fuel_100km"),
|
|
"depot_address": _read(csv_row, "depot_address"),
|
|
}
|
|
if not is_placeholder and driver_raw:
|
|
proposed["driver_name"] = driver_raw
|
|
|
|
# Drop None values — no point sending NULL to overwrite NULL.
|
|
proposed = {k: v for k, v in proposed.items() if v is not None}
|
|
|
|
if not only_null or db_row is None:
|
|
return proposed
|
|
|
|
# only_null: drop any column that already has a non-null value in the DB.
|
|
return {k: v for k, v in proposed.items() if db_row.get(k) is None}
|
|
|
|
|
|
def print_diff(imei: str, updates: dict[str, object], db_row: dict | None) -> None:
|
|
"""Pretty-print what will change for one device."""
|
|
if not updates:
|
|
return
|
|
db = db_row or {}
|
|
print(f"\n IMEI {imei}:")
|
|
for col, new_val in sorted(updates.items()):
|
|
old_val = db.get(col)
|
|
if str(old_val) != str(new_val):
|
|
print(f" {col:<20} {str(old_val):<30} → {new_val}")
|
|
|
|
|
|
def _set_clause(col: str) -> str:
|
|
"""SQL fragment for `col = ...` honouring per-column casts."""
|
|
if col in _TIMESTAMPTZ_COLS:
|
|
return f"{col} = COALESCE(%s::TIMESTAMPTZ, {col})"
|
|
if col in _NUMERIC_COLS:
|
|
# %s already a float; no NULLIF dance needed.
|
|
return f"{col} = COALESCE(%s::NUMERIC, {col})"
|
|
return f"{col} = COALESCE(NULLIF(%s, ''), {col})"
|
|
|
|
|
|
def _placeholder(col: str) -> str:
|
|
"""SQL fragment for a single VALUES placeholder honouring per-column casts."""
|
|
if col in _TIMESTAMPTZ_COLS:
|
|
return "%s::TIMESTAMPTZ"
|
|
if col in _NUMERIC_COLS:
|
|
return "%s::NUMERIC"
|
|
return "%s"
|
|
|
|
|
|
def run(apply: bool, only_null: bool, filter_imei: str | None, csv_path: Path) -> None:
|
|
csv_rows = load_csv(csv_path)
|
|
db_rows = load_db_devices()
|
|
|
|
if filter_imei:
|
|
csv_rows = {k: v for k, v in csv_rows.items() if k == filter_imei}
|
|
if not csv_rows:
|
|
print(f"IMEI {filter_imei} not found in CSV.")
|
|
return
|
|
|
|
updated = inserted = skipped = no_change = 0
|
|
|
|
with get_conn() as conn:
|
|
with conn.cursor() as cur:
|
|
for imei, csv_row in csv_rows.items():
|
|
db_row = db_rows.get(imei)
|
|
updates = build_update(csv_row, db_row, only_null)
|
|
|
|
if not updates:
|
|
driver_raw = (_read(csv_row, "driver_name") or "").lower()
|
|
if driver_raw == "identification":
|
|
skipped += 1
|
|
else:
|
|
no_change += 1
|
|
continue
|
|
|
|
if db_row is None:
|
|
# Device not yet synced from API — insert a stub row so
|
|
# incoming alarms / positions don't trip the FK constraint.
|
|
print(f"\n [NEW] IMEI {imei}:")
|
|
for col, new_val in sorted(updates.items()):
|
|
print(f" {col:<20} → {new_val}")
|
|
if apply:
|
|
cols = ["imei"] + list(updates.keys())
|
|
vals = [imei] + list(updates.values())
|
|
placeholders = ["%s"] + [_placeholder(c) for c in updates.keys()]
|
|
cur.execute(
|
|
f"INSERT INTO tracksolid.devices ({', '.join(cols)}) "
|
|
f"VALUES ({', '.join(placeholders)}) "
|
|
"ON CONFLICT (imei) DO NOTHING",
|
|
vals,
|
|
)
|
|
inserted += 1
|
|
continue
|
|
|
|
print_diff(imei, updates, db_row)
|
|
|
|
if apply:
|
|
set_clauses = [_set_clause(c) for c in updates.keys()]
|
|
params = list(updates.values())
|
|
set_clauses.append("updated_at = NOW()")
|
|
params.append(imei)
|
|
cur.execute(
|
|
f"UPDATE tracksolid.devices SET {', '.join(set_clauses)} WHERE imei = %s",
|
|
params,
|
|
)
|
|
updated += 1
|
|
else:
|
|
updated += 1 # count as "would update" in dry-run
|
|
|
|
mode = "APPLIED" if apply else "DRY-RUN"
|
|
print(f"\n{'='*60}")
|
|
print(f" {mode} COMPLETE")
|
|
print(f"{'='*60}")
|
|
print(f" Would update / updated : {updated}")
|
|
print(f" Would insert / inserted : {inserted}")
|
|
print(f" No change needed : {no_change}")
|
|
print(f" Skipped (Identification): {skipped}")
|
|
if not apply:
|
|
print("\n Run with --apply to commit changes.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="Import driver/vehicle details from CSV into tracksolid.devices"
|
|
)
|
|
parser.add_argument("--apply", action="store_true", help="Write changes to DB (default: dry-run)")
|
|
parser.add_argument("--only-null", action="store_true", help="Only update fields currently NULL in the DB")
|
|
parser.add_argument("--imei", default=None, help="Limit to a single IMEI")
|
|
parser.add_argument("--csv", default=str(DEFAULT_CSV_PATH),
|
|
help=f"Path to the CSV (default: {DEFAULT_CSV_PATH.name})")
|
|
args = parser.parse_args()
|
|
|
|
csv_path = Path(args.csv)
|
|
if not csv_path.exists():
|
|
log.error("CSV file not found: %s", csv_path)
|
|
raise SystemExit(1)
|
|
|
|
run(apply=args.apply, only_null=args.only_null, filter_imei=args.imei, csv_path=csv_path)
|