206 lines
8.1 KiB
Python
206 lines
8.1 KiB
Python
|
|
"""
|
||
|
|
audit_device_reconciliation.py — 162-vs-182 device delta audit
|
||
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
|
|
Phase 0.2 of the Business Analytics redesign.
|
||
|
|
|
||
|
|
Compares `20260427_FSG_Vehicles_mitieng.csv` (162 rows) to `tracksolid.devices`
|
||
|
|
(182 rows at last check) and reports:
|
||
|
|
1. Per-account row counts on each side.
|
||
|
|
2. IMEIs in DB but not in CSV (the unexplained delta — typically auto-synced
|
||
|
|
API rows with no business metadata).
|
||
|
|
3. IMEIs in CSV but not in DB (should be empty after a successful import).
|
||
|
|
4. IMEIs present on both sides where DB metadata is still NULL on key fields.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
# Read-only audit, prints to stdout.
|
||
|
|
python audit_device_reconciliation.py
|
||
|
|
|
||
|
|
# Same, but write output to a file (useful for the reconciliation report).
|
||
|
|
python audit_device_reconciliation.py --out 260427_audit_output.txt
|
||
|
|
|
||
|
|
# Use a different CSV path
|
||
|
|
python audit_device_reconciliation.py --csv path/to/file.csv
|
||
|
|
|
||
|
|
This script makes no writes — safe to run on prod.
|
||
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
|
|
"""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import csv
|
||
|
|
import sys
|
||
|
|
from collections import Counter
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from ts_shared_rev import get_conn, get_logger
|
||
|
|
|
||
|
|
log = get_logger("device_audit")
|
||
|
|
|
||
|
|
DEFAULT_CSV_PATH = Path(__file__).parent / "20260427_FSG_Vehicles_mitieng.csv"
|
||
|
|
|
||
|
|
# Fields whose NULL-ness on devices that DO appear in CSV would indicate
|
||
|
|
# a stale import.
|
||
|
|
META_FIELDS = ("assigned_city", "cost_centre", "assigned_route",
|
||
|
|
"vehicle_category", "vehicle_brand", "fuel_100km",
|
||
|
|
"depot_address", "driver_name", "vehicle_number")
|
||
|
|
|
||
|
|
|
||
|
|
def load_csv_index(csv_path: Path) -> dict[str, dict]:
|
||
|
|
rows: dict[str, dict] = {}
|
||
|
|
with open(csv_path, encoding="utf-8-sig", newline="") as f:
|
||
|
|
for row in csv.DictReader(f):
|
||
|
|
imei = (row.get("imei") or "").strip()
|
||
|
|
if imei:
|
||
|
|
rows[imei] = row
|
||
|
|
return rows
|
||
|
|
|
||
|
|
|
||
|
|
def load_db_index() -> dict[str, dict]:
|
||
|
|
cols = (
|
||
|
|
"imei", "account", "assigned_city", "city", "cost_centre",
|
||
|
|
"assigned_route", "vehicle_category", "vehicle_brand", "fuel_100km",
|
||
|
|
"depot_address", "driver_name", "vehicle_number", "device_name",
|
||
|
|
"last_synced_at", "created_at",
|
||
|
|
)
|
||
|
|
devices: dict[str, dict] = {}
|
||
|
|
with get_conn() as conn:
|
||
|
|
with conn.cursor() as cur:
|
||
|
|
cur.execute(f"SELECT {', '.join(cols)} FROM tracksolid.devices")
|
||
|
|
names = [d[0] for d in cur.description]
|
||
|
|
for row in cur.fetchall():
|
||
|
|
rec = dict(zip(names, row))
|
||
|
|
devices[rec["imei"]] = rec
|
||
|
|
return devices
|
||
|
|
|
||
|
|
|
||
|
|
def _csv_account(row: dict) -> str:
|
||
|
|
return (row.get("account") or "").strip() or "(blank)"
|
||
|
|
|
||
|
|
|
||
|
|
def _db_account(row: dict) -> str:
|
||
|
|
return (row.get("account") or "").strip() or "(blank)"
|
||
|
|
|
||
|
|
|
||
|
|
def _is_blank(v) -> bool:
|
||
|
|
if v is None:
|
||
|
|
return True
|
||
|
|
s = str(v).strip()
|
||
|
|
return s == "" or s.upper() == "NULL"
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> int:
|
||
|
|
parser = argparse.ArgumentParser(description="Reconcile vehicle CSV against tracksolid.devices")
|
||
|
|
parser.add_argument("--csv", default=str(DEFAULT_CSV_PATH))
|
||
|
|
parser.add_argument("--out", default=None, help="Write report to this file in addition to stdout")
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
csv_path = Path(args.csv)
|
||
|
|
if not csv_path.exists():
|
||
|
|
log.error("CSV not found: %s", csv_path)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
csv_idx = load_csv_index(csv_path)
|
||
|
|
db_idx = load_db_index()
|
||
|
|
|
||
|
|
csv_imeis = set(csv_idx)
|
||
|
|
db_imeis = set(db_idx)
|
||
|
|
|
||
|
|
only_db = sorted(db_imeis - csv_imeis)
|
||
|
|
only_csv = sorted(csv_imeis - db_imeis)
|
||
|
|
both = csv_imeis & db_imeis
|
||
|
|
|
||
|
|
csv_accounts = Counter(_csv_account(r) for r in csv_idx.values())
|
||
|
|
db_accounts = Counter(_db_account(r) for r in db_idx.values())
|
||
|
|
|
||
|
|
out_lines: list[str] = []
|
||
|
|
def w(line: str = "") -> None:
|
||
|
|
out_lines.append(line)
|
||
|
|
print(line)
|
||
|
|
|
||
|
|
w("=" * 76)
|
||
|
|
w(" Device reconciliation — CSV vs tracksolid.devices")
|
||
|
|
w("=" * 76)
|
||
|
|
w(f" CSV file : {csv_path.name}")
|
||
|
|
w(f" CSV row count : {len(csv_idx)}")
|
||
|
|
w(f" DB row count : {len(db_idx)}")
|
||
|
|
w(f" Delta (DB-CSV) : {len(only_db):+d}")
|
||
|
|
w("")
|
||
|
|
|
||
|
|
w("─ Per-account breakdown ─────────────────────────────────────────────────")
|
||
|
|
all_accounts = sorted(set(csv_accounts) | set(db_accounts))
|
||
|
|
w(f" {'account':<30} {'CSV':>6} {'DB':>6} {'delta':>7}")
|
||
|
|
for acct in all_accounts:
|
||
|
|
c, d = csv_accounts.get(acct, 0), db_accounts.get(acct, 0)
|
||
|
|
w(f" {acct:<30} {c:>6} {d:>6} {d-c:>+7}")
|
||
|
|
w("")
|
||
|
|
|
||
|
|
w(f"─ IMEIs in DB but NOT in CSV ({len(only_db)}) ─────────────────────────────")
|
||
|
|
if not only_db:
|
||
|
|
w(" (none — DB is a strict subset of CSV)")
|
||
|
|
else:
|
||
|
|
w(f" {'imei':<18} {'account':<22} {'city':<10} {'last_synced_at':<28} {'device_name'}")
|
||
|
|
for imei in only_db:
|
||
|
|
r = db_idx[imei]
|
||
|
|
w(f" {imei:<18} {(r.get('account') or ''):<22} "
|
||
|
|
f"{(r.get('assigned_city') or r.get('city') or ''):<10} "
|
||
|
|
f"{str(r.get('last_synced_at') or ''):<28} "
|
||
|
|
f"{r.get('device_name') or ''}")
|
||
|
|
w("")
|
||
|
|
|
||
|
|
w(f"─ IMEIs in CSV but NOT in DB ({len(only_csv)}) ─────────────────────────────")
|
||
|
|
if not only_csv:
|
||
|
|
w(" (none — every CSV row has a corresponding device row)")
|
||
|
|
else:
|
||
|
|
w(f" {'imei':<18} {'account':<22} {'assigned_city':<14} {'cost_centre':<14} {'driver_name'}")
|
||
|
|
for imei in only_csv:
|
||
|
|
r = csv_idx[imei]
|
||
|
|
w(f" {imei:<18} {(r.get('account') or ''):<22} "
|
||
|
|
f"{(r.get('assigned_city') or ''):<14} "
|
||
|
|
f"{(r.get('cost_centre') or ''):<14} "
|
||
|
|
f"{r.get('driver_name') or ''}")
|
||
|
|
w("")
|
||
|
|
|
||
|
|
# Stale-metadata audit: in both, but DB is still NULL on key fields.
|
||
|
|
stale: list[tuple[str, list[str]]] = []
|
||
|
|
for imei in sorted(both):
|
||
|
|
d = db_idx[imei]
|
||
|
|
blanks = [f for f in META_FIELDS if _is_blank(d.get(f))]
|
||
|
|
if blanks:
|
||
|
|
stale.append((imei, blanks))
|
||
|
|
|
||
|
|
w(f"─ Devices in both, but DB metadata still NULL ({len(stale)}) ──────────────")
|
||
|
|
if not stale:
|
||
|
|
w(" (none — import looks complete on intersecting devices)")
|
||
|
|
else:
|
||
|
|
w(" Likely cause: import_drivers_csv.py has not been re-run with --apply")
|
||
|
|
w(" against the new CSV, or rows had 'Identification' placeholders.")
|
||
|
|
w("")
|
||
|
|
w(f" {'imei':<18} blank_fields")
|
||
|
|
for imei, blanks in stale[:30]: # cap output
|
||
|
|
w(f" {imei:<18} {', '.join(blanks)}")
|
||
|
|
if len(stale) > 30:
|
||
|
|
w(f" ... and {len(stale) - 30} more")
|
||
|
|
w("")
|
||
|
|
|
||
|
|
w("─ Suggested next step ───────────────────────────────────────────────────")
|
||
|
|
if only_db:
|
||
|
|
w(" Inspect the IMEIs above. Decide one of:")
|
||
|
|
w(" (a) Prune — delete from tracksolid.devices if they are stale "
|
||
|
|
"test/decommissioned units.")
|
||
|
|
w(" (b) Leave-as-NULL — keep them as auto-synced API rows; their "
|
||
|
|
"metadata stays NULL until added to a future CSV.")
|
||
|
|
w(" (c) Addendum — add them to the CSV (or a sidecar CSV) and re-run "
|
||
|
|
"import_drivers_csv.py --apply.")
|
||
|
|
w(" Document the choice in 260427_device_reconciliation.md.")
|
||
|
|
else:
|
||
|
|
w(" CSV and DB are reconciled. No further action.")
|
||
|
|
|
||
|
|
if args.out:
|
||
|
|
Path(args.out).write_text("\n".join(out_lines), encoding="utf-8")
|
||
|
|
print(f"\nReport also written to {args.out}")
|
||
|
|
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
sys.exit(main())
|