""" audit_device_reconciliation.py — 162-vs-182 device delta audit ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Phase 0.2 of the Business Analytics redesign. Compares `20260427_FSG_Vehicles_mitieng.csv` (162 rows) to `tracksolid.devices` (182 rows at last check) and reports: 1. Per-account row counts on each side. 2. IMEIs in DB but not in CSV (the unexplained delta — typically auto-synced API rows with no business metadata). 3. IMEIs in CSV but not in DB (should be empty after a successful import). 4. IMEIs present on both sides where DB metadata is still NULL on key fields. Usage: # Read-only audit, prints to stdout. python audit_device_reconciliation.py # Same, but write output to a file (useful for the reconciliation report). python audit_device_reconciliation.py --out 260427_audit_output.txt # Use a different CSV path python audit_device_reconciliation.py --csv path/to/file.csv This script makes no writes — safe to run on prod. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ """ import argparse import csv import sys from collections import Counter from pathlib import Path from ts_shared_rev import get_conn, get_logger log = get_logger("device_audit") DEFAULT_CSV_PATH = Path(__file__).parent / "20260427_FSG_Vehicles_mitieng.csv" # Fields whose NULL-ness on devices that DO appear in CSV would indicate # a stale import. META_FIELDS = ("assigned_city", "cost_centre", "assigned_route", "vehicle_category", "vehicle_brand", "fuel_100km", "depot_address", "driver_name", "vehicle_number") def load_csv_index(csv_path: Path) -> dict[str, dict]: rows: dict[str, dict] = {} with open(csv_path, encoding="utf-8-sig", newline="") as f: for row in csv.DictReader(f): imei = (row.get("imei") or "").strip() if imei: rows[imei] = row return rows def load_db_index() -> dict[str, dict]: cols = ( "imei", "account", "assigned_city", "city", "cost_centre", "assigned_route", "vehicle_category", "vehicle_brand", "fuel_100km", "depot_address", "driver_name", "vehicle_number", "device_name", "last_synced_at", "created_at", ) devices: dict[str, dict] = {} with get_conn() as conn: with conn.cursor() as cur: cur.execute(f"SELECT {', '.join(cols)} FROM tracksolid.devices") names = [d[0] for d in cur.description] for row in cur.fetchall(): rec = dict(zip(names, row)) devices[rec["imei"]] = rec return devices def _csv_account(row: dict) -> str: return (row.get("account") or "").strip() or "(blank)" def _db_account(row: dict) -> str: return (row.get("account") or "").strip() or "(blank)" def _is_blank(v) -> bool: if v is None: return True s = str(v).strip() return s == "" or s.upper() == "NULL" def main() -> int: parser = argparse.ArgumentParser(description="Reconcile vehicle CSV against tracksolid.devices") parser.add_argument("--csv", default=str(DEFAULT_CSV_PATH)) parser.add_argument("--out", default=None, help="Write report to this file in addition to stdout") args = parser.parse_args() csv_path = Path(args.csv) if not csv_path.exists(): log.error("CSV not found: %s", csv_path) return 1 csv_idx = load_csv_index(csv_path) db_idx = load_db_index() csv_imeis = set(csv_idx) db_imeis = set(db_idx) only_db = sorted(db_imeis - csv_imeis) only_csv = sorted(csv_imeis - db_imeis) both = csv_imeis & db_imeis csv_accounts = Counter(_csv_account(r) for r in csv_idx.values()) db_accounts = Counter(_db_account(r) for r in db_idx.values()) out_lines: list[str] = [] def w(line: str = "") -> None: out_lines.append(line) print(line) w("=" * 76) w(" Device reconciliation — CSV vs tracksolid.devices") w("=" * 76) w(f" CSV file : {csv_path.name}") w(f" CSV row count : {len(csv_idx)}") w(f" DB row count : {len(db_idx)}") w(f" Delta (DB-CSV) : {len(only_db):+d}") w("") w("─ Per-account breakdown ─────────────────────────────────────────────────") all_accounts = sorted(set(csv_accounts) | set(db_accounts)) w(f" {'account':<30} {'CSV':>6} {'DB':>6} {'delta':>7}") for acct in all_accounts: c, d = csv_accounts.get(acct, 0), db_accounts.get(acct, 0) w(f" {acct:<30} {c:>6} {d:>6} {d-c:>+7}") w("") w(f"─ IMEIs in DB but NOT in CSV ({len(only_db)}) ─────────────────────────────") if not only_db: w(" (none — DB is a strict subset of CSV)") else: w(f" {'imei':<18} {'account':<22} {'city':<10} {'last_synced_at':<28} {'device_name'}") for imei in only_db: r = db_idx[imei] w(f" {imei:<18} {(r.get('account') or ''):<22} " f"{(r.get('assigned_city') or r.get('city') or ''):<10} " f"{str(r.get('last_synced_at') or ''):<28} " f"{r.get('device_name') or ''}") w("") w(f"─ IMEIs in CSV but NOT in DB ({len(only_csv)}) ─────────────────────────────") if not only_csv: w(" (none — every CSV row has a corresponding device row)") else: w(f" {'imei':<18} {'account':<22} {'assigned_city':<14} {'cost_centre':<14} {'driver_name'}") for imei in only_csv: r = csv_idx[imei] w(f" {imei:<18} {(r.get('account') or ''):<22} " f"{(r.get('assigned_city') or ''):<14} " f"{(r.get('cost_centre') or ''):<14} " f"{r.get('driver_name') or ''}") w("") # Stale-metadata audit: in both, but DB is still NULL on key fields. stale: list[tuple[str, list[str]]] = [] for imei in sorted(both): d = db_idx[imei] blanks = [f for f in META_FIELDS if _is_blank(d.get(f))] if blanks: stale.append((imei, blanks)) w(f"─ Devices in both, but DB metadata still NULL ({len(stale)}) ──────────────") if not stale: w(" (none — import looks complete on intersecting devices)") else: w(" Likely cause: import_drivers_csv.py has not been re-run with --apply") w(" against the new CSV, or rows had 'Identification' placeholders.") w("") w(f" {'imei':<18} blank_fields") for imei, blanks in stale[:30]: # cap output w(f" {imei:<18} {', '.join(blanks)}") if len(stale) > 30: w(f" ... and {len(stale) - 30} more") w("") w("─ Suggested next step ───────────────────────────────────────────────────") if only_db: w(" Inspect the IMEIs above. Decide one of:") w(" (a) Prune — delete from tracksolid.devices if they are stale " "test/decommissioned units.") w(" (b) Leave-as-NULL — keep them as auto-synced API rows; their " "metadata stays NULL until added to a future CSV.") w(" (c) Addendum — add them to the CSV (or a sidecar CSV) and re-run " "import_drivers_csv.py --apply.") w(" Document the choice in 260427_device_reconciliation.md.") else: w(" CSV and DB are reconciled. No further action.") if args.out: Path(args.out).write_text("\n".join(out_lines), encoding="utf-8") print(f"\nReport also written to {args.out}") return 0 if __name__ == "__main__": sys.exit(main())