tracksolid_timescale_grafan.../import_drivers_csv.py

267 lines
10 KiB
Python
Raw Normal View History

"""
import_drivers_csv.py Fireside Communications · Driver & Vehicle CSV Import
One-shot script: reads 20260414_FS__Logistics - final_fixed.csv, compares
each row against the current tracksolid.devices values, and updates the DB.
Usage:
# Dry-run — shows diff, writes nothing
python import_drivers_csv.py
# Filter to a single IMEI (dry-run)
python import_drivers_csv.py --imei 862798052707896
# Apply all changes to DB
python import_drivers_csv.py --apply
# Only fill fields that are currently NULL in the DB (never overwrite)
python import_drivers_csv.py --only-null --apply
Pre-requisite:
Migration 06 must be applied first (adds assigned_city / cost_centre columns).
"""
import argparse
import csv
import os
import sys
import time
from datetime import date
from pathlib import Path
from ts_shared_rev import clean, clean_num, clean_ts, get_conn, get_logger
log = get_logger("csv_import")
CSV_PATH = Path(__file__).parent / "20260414_FS__Logistics - final_fixed.csv"
# Columns fetched from DB for comparison
DB_COLS = [
"imei", "driver_name", "driver_phone", "vehicle_number", "vehicle_name",
"vehicle_models", "cost_centre", "sim", "iccid", "imsi", "mc_type",
"activation_time", "expiration", "device_name", "assigned_city",
]
# Driver Name values that are placeholders — skip writing driver_name for these
_DRIVER_SKIP = {"identification", "ug"}
def _infer_city(plate: str) -> str | None:
fix: BUG-06..11 — pool lock, clean_int rounding, date-only tz, _infer_city, rowcount naming, double commit BUG-06 (LOW-MED): _get_pool() had a TOCTOU race — two threads hitting the None pool at cold start could each create one and leak the loser's connections. Added a threading.Lock with double-checked locking. BUG-07 (LOW): clean_int truncated via int(float(s)) so "3.9" → 3. All current call sites are intrinsically-integer fields, so behaviour for production traffic is unchanged, but rounding is the safer default for any future field that arrives as a decimal. Unit test updated to match. BUG-08 (LOW): _infer_city mapped every Kenyan plate to NBO, silently misclassifying Coast/Mombasa vehicles. Now returns None for K-series plates and emits a log warning so operators can tag them explicitly. Uganda (UMA / UAG) remains unambiguous → KLA. Analytics views already COALESCE NULLs into the 'unassigned' bucket so no dashboards break. BUG-09 (LOW): clean_ts accepted "2024-04-12" verbatim → Postgres stored 00:00 UTC = 03:00 EAT, three hours off the operator's intent. Date-only strings are now anchored to Africa/Nairobi midnight (T00:00:00+03:00). Strings with a time component pass through unchanged. Unit test added. BUG-10 (LOW): rowcount counters in poll_live_positions and poll_trips were named "upserted"/"inserted" but they sum cur.rowcount from ON CONFLICT DO UPDATE statements — which always returns 1 per touch regardless of whether the row was an insert or an update. Renamed to live_pos_affected / history_inserted / trips_affected, and routed trips_affected to the rows_upserted slot of ingestion_log (it was previously logged as rows_inserted, which was misleading). BUG-11 (COSMETIC): removed the redundant conn.commit() inside the with get_conn() block of _update_token_cache — the context manager already auto-commits on __exit__. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 12:49:54 +00:00
"""Derive assigned_city from license plate prefix.
[BUG-08] Kenyan plates (K-series) span both Nairobi and Mombasa, and
the prefix alone is not a reliable indicator KC-series tends to be
Coast, KD-series tends to be Nairobi, but there are exceptions in both
directions. Rather than misclassify Coast vehicles as Nairobi (the
previous behaviour), return None for any Kenyan plate so they fall
through to `assigned_city IS NULL`. Analytics views already
COALESCE(...) those into the `unassigned` bucket; operators can tag
Mombasa/Nairobi explicitly via the DB or a future onboarding signal
(e.g. SIM MCC).
Uganda (UMA / UAG) remains unambiguous KLA.
"""
p = (plate or "").strip().upper()
if p.startswith("UMA") or p.startswith("UAG"):
return "KLA"
if p.startswith("K"):
fix: BUG-06..11 — pool lock, clean_int rounding, date-only tz, _infer_city, rowcount naming, double commit BUG-06 (LOW-MED): _get_pool() had a TOCTOU race — two threads hitting the None pool at cold start could each create one and leak the loser's connections. Added a threading.Lock with double-checked locking. BUG-07 (LOW): clean_int truncated via int(float(s)) so "3.9" → 3. All current call sites are intrinsically-integer fields, so behaviour for production traffic is unchanged, but rounding is the safer default for any future field that arrives as a decimal. Unit test updated to match. BUG-08 (LOW): _infer_city mapped every Kenyan plate to NBO, silently misclassifying Coast/Mombasa vehicles. Now returns None for K-series plates and emits a log warning so operators can tag them explicitly. Uganda (UMA / UAG) remains unambiguous → KLA. Analytics views already COALESCE NULLs into the 'unassigned' bucket so no dashboards break. BUG-09 (LOW): clean_ts accepted "2024-04-12" verbatim → Postgres stored 00:00 UTC = 03:00 EAT, three hours off the operator's intent. Date-only strings are now anchored to Africa/Nairobi midnight (T00:00:00+03:00). Strings with a time component pass through unchanged. Unit test added. BUG-10 (LOW): rowcount counters in poll_live_positions and poll_trips were named "upserted"/"inserted" but they sum cur.rowcount from ON CONFLICT DO UPDATE statements — which always returns 1 per touch regardless of whether the row was an insert or an update. Renamed to live_pos_affected / history_inserted / trips_affected, and routed trips_affected to the rows_upserted slot of ingestion_log (it was previously logged as rows_inserted, which was misleading). BUG-11 (COSMETIC): removed the redundant conn.commit() inside the with get_conn() block of _update_token_cache — the context manager already auto-commits on __exit__. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 12:49:54 +00:00
log.warning("Plate %s: Kenyan prefix is ambiguous (NBO vs MBA) — "
"leaving assigned_city NULL for manual tagging", p)
return None
return None
def _clean_date(v: str) -> str | None:
"""Accept YYYY-MM-DD and return as ISO string suitable for TIMESTAMPTZ cast."""
s = (v or "").strip()
if not s:
return None
try:
date.fromisoformat(s)
return s
except ValueError:
return None
def load_csv() -> dict[str, dict]:
"""Load CSV into a dict keyed by IMEI."""
rows: dict[str, dict] = {}
with open(CSV_PATH, encoding="utf-8-sig", newline="") as f:
for row in csv.DictReader(f):
imei = (row.get("IMEI") or "").strip()
if not imei:
continue
rows[imei] = row
log.info("CSV loaded: %d rows from %s", len(rows), CSV_PATH.name)
return rows
def load_db_devices() -> dict[str, dict]:
"""Fetch current device rows from DB, keyed by IMEI."""
devices: dict[str, dict] = {}
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute(f"SELECT {', '.join(DB_COLS)} FROM tracksolid.devices")
col_names = [d[0] for d in cur.description]
for row in cur.fetchall():
rec = dict(zip(col_names, row))
devices[rec["imei"]] = rec
log.info("DB loaded: %d devices", len(devices))
return devices
def build_update(csv_row: dict, db_row: dict | None, only_null: bool) -> dict[str, object]:
"""
Return a dict of columnnew_value for fields that need updating.
When only_null=True, skip any DB column that already has a value.
The driver_name column is skipped for placeholder-labelled devices.
"""
driver_raw = clean(csv_row.get("Driver Name")) or ""
plate = clean(csv_row.get("License Plate No.")) or ""
is_placeholder = driver_raw.lower() in _DRIVER_SKIP
skip_row = driver_raw.lower() == "identification"
if skip_row:
return {}
proposed: dict[str, object] = {
"vehicle_number": clean(plate),
"vehicle_name": clean(plate),
"vehicle_models": clean(csv_row.get("Vehicle Model")),
"cost_centre": clean(csv_row.get("Department")),
"sim": clean(csv_row.get("SIM")),
"iccid": clean(csv_row.get("ICCID")),
"imsi": clean(csv_row.get("IMSI")),
"mc_type": clean(csv_row.get("Model")),
"activation_time": _clean_date(csv_row.get("Activated Date", "")),
"expiration": _clean_date(csv_row.get("Subscription Expiration", "")),
"driver_phone": clean(csv_row.get("Telephone")),
"assigned_city": _infer_city(plate),
}
if not is_placeholder:
proposed["driver_name"] = driver_raw or None
# Drop None values — no point sending a NULL to overwrite another NULL
proposed = {k: v for k, v in proposed.items() if v is not None}
if not only_null or db_row is None:
return proposed
# only_null: drop any column that already has a non-null value in the DB
return {
k: v for k, v in proposed.items()
if db_row.get(k) is None
}
def print_diff(imei: str, updates: dict[str, object], db_row: dict | None) -> None:
"""Pretty-print what will change for one device."""
if not updates:
return
db = db_row or {}
print(f"\n IMEI {imei}:")
for col, new_val in sorted(updates.items()):
old_val = db.get(col)
if old_val != new_val:
print(f" {col:<20} {str(old_val):<30}{new_val}")
def run(apply: bool, only_null: bool, filter_imei: str | None) -> None:
csv_rows = load_csv()
db_rows = load_db_devices()
if filter_imei:
csv_rows = {k: v for k, v in csv_rows.items() if k == filter_imei}
if not csv_rows:
print(f"IMEI {filter_imei} not found in CSV.")
return
updated = inserted = skipped = no_change = 0
with get_conn() as conn:
with conn.cursor() as cur:
for imei, csv_row in csv_rows.items():
db_row = db_rows.get(imei)
updates = build_update(csv_row, db_row, only_null)
if not updates:
# Either an "Identification" placeholder or nothing to change
driver_raw = (csv_row.get("Driver Name") or "").strip().lower()
if driver_raw == "identification":
skipped += 1
else:
no_change += 1
continue
if db_row is None:
# Device not yet synced from API — insert a stub row now so
# incoming alarms / positions don't trip the FK constraint.
print(f"\n [NEW] IMEI {imei}:")
for col, new_val in sorted(updates.items()):
print(f" {col:<20}{new_val}")
if apply:
cols = ["imei"] + list(updates.keys())
vals = [imei] + [str(v) if v is not None else None for v in updates.values()]
placeholders = []
for col in cols:
if col in ("activation_time", "expiration"):
placeholders.append("%s::TIMESTAMPTZ")
else:
placeholders.append("%s")
cur.execute(
f"INSERT INTO tracksolid.devices ({', '.join(cols)}) "
f"VALUES ({', '.join(placeholders)}) "
"ON CONFLICT (imei) DO NOTHING",
vals,
)
inserted += 1
continue
print_diff(imei, updates, db_row)
if apply:
set_clauses = []
params = []
for col, val in updates.items():
if col in ("activation_time", "expiration"):
set_clauses.append(f"{col} = COALESCE(%s::TIMESTAMPTZ, {col})")
else:
set_clauses.append(
f"{col} = COALESCE(NULLIF(%s, ''), {col})"
)
params.append(str(val) if val is not None else None)
set_clauses.append("updated_at = NOW()")
params.append(imei)
cur.execute(
f"UPDATE tracksolid.devices SET {', '.join(set_clauses)} WHERE imei = %s",
params,
)
updated += 1
else:
updated += 1 # count as "would update" in dry-run
mode = "APPLIED" if apply else "DRY-RUN"
print(f"\n{'='*60}")
print(f" {mode} COMPLETE")
print(f"{'='*60}")
print(f" Would update / updated : {updated}")
print(f" Would insert / inserted: {inserted}")
print(f" No change needed : {no_change}")
print(f" Skipped (Identification): {skipped}")
if not apply:
print("\n Run with --apply to commit changes.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Import driver/vehicle details from CSV into tracksolid.devices")
parser.add_argument("--apply", action="store_true", help="Write changes to DB (default: dry-run)")
parser.add_argument("--only-null", action="store_true", help="Only update fields currently NULL in the DB")
parser.add_argument("--imei", default=None, help="Limit to a single IMEI")
args = parser.parse_args()
run(apply=args.apply, only_null=args.only_null, filter_imei=args.imei)