fix: BUG-06..11 — pool lock, clean_int rounding, date-only tz, _infer_city, rowcount naming, double commit
Some checks failed
Static Analysis / static (push) Has been cancelled
Tests / test (push) Has been cancelled
Static Analysis / static (pull_request) Has been cancelled
Tests / test (pull_request) Has been cancelled

BUG-06 (LOW-MED): _get_pool() had a TOCTOU race — two threads hitting the
None pool at cold start could each create one and leak the loser's
connections. Added a threading.Lock with double-checked locking.

BUG-07 (LOW): clean_int truncated via int(float(s)) so "3.9" → 3. All
current call sites are intrinsically-integer fields, so behaviour for
production traffic is unchanged, but rounding is the safer default for
any future field that arrives as a decimal. Unit test updated to match.

BUG-08 (LOW): _infer_city mapped every Kenyan plate to NBO, silently
misclassifying Coast/Mombasa vehicles. Now returns None for K-series
plates and emits a log warning so operators can tag them explicitly.
Uganda (UMA / UAG) remains unambiguous → KLA. Analytics views already
COALESCE NULLs into the 'unassigned' bucket so no dashboards break.

BUG-09 (LOW): clean_ts accepted "2024-04-12" verbatim → Postgres stored
00:00 UTC = 03:00 EAT, three hours off the operator's intent. Date-only
strings are now anchored to Africa/Nairobi midnight (T00:00:00+03:00).
Strings with a time component pass through unchanged. Unit test added.

BUG-10 (LOW): rowcount counters in poll_live_positions and poll_trips
were named "upserted"/"inserted" but they sum cur.rowcount from
ON CONFLICT DO UPDATE statements — which always returns 1 per touch
regardless of whether the row was an insert or an update. Renamed to
live_pos_affected / history_inserted / trips_affected, and routed
trips_affected to the rows_upserted slot of ingestion_log (it was
previously logged as rows_inserted, which was misleading).

BUG-11 (COSMETIC): removed the redundant conn.commit() inside the
with get_conn() block of _update_token_cache — the context manager
already auto-commits on __exit__.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
david kiania 2026-05-15 15:49:54 +03:00
parent dc6404a114
commit d66c3bab42
4 changed files with 94 additions and 24 deletions

View file

@ -48,12 +48,27 @@ _DRIVER_SKIP = {"identification", "ug"}
def _infer_city(plate: str) -> str | None: def _infer_city(plate: str) -> str | None:
"""Derive assigned_city from license plate prefix.""" """Derive assigned_city from license plate prefix.
[BUG-08] Kenyan plates (K-series) span both Nairobi and Mombasa, and
the prefix alone is not a reliable indicator KC-series tends to be
Coast, KD-series tends to be Nairobi, but there are exceptions in both
directions. Rather than misclassify Coast vehicles as Nairobi (the
previous behaviour), return None for any Kenyan plate so they fall
through to `assigned_city IS NULL`. Analytics views already
COALESCE(...) those into the `unassigned` bucket; operators can tag
Mombasa/Nairobi explicitly via the DB or a future onboarding signal
(e.g. SIM MCC).
Uganda (UMA / UAG) remains unambiguous KLA.
"""
p = (plate or "").strip().upper() p = (plate or "").strip().upper()
if p.startswith("UMA") or p.startswith("UAG"): if p.startswith("UMA") or p.startswith("UAG"):
return "KLA" return "KLA"
if p.startswith("K"): if p.startswith("K"):
return "NBO" log.warning("Plate %s: Kenyan prefix is ambiguous (NBO vs MBA) — "
"leaving assigned_city NULL for manual tagging", p)
return None
return None return None

View file

@ -198,7 +198,10 @@ def poll_live_positions():
positions_by_imei[imei] = p positions_by_imei[imei] = p
positions = list(positions_by_imei.values()) positions = list(positions_by_imei.values())
upserted, inserted = 0, 0 # [BUG-10] Variable names match the ingestion_log column they feed:
# live_pos_affected → rows_upserted (DO UPDATE: rowcount=1 per touch)
# history_inserted → rows_inserted (DO NOTHING: rowcount=1 only on real insert)
live_pos_affected, history_inserted = 0, 0
with get_conn() as conn: with get_conn() as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
@ -240,7 +243,7 @@ def poll_live_positions():
clean_num(p.get("batteryPowerVal")), clean(p.get("trackerOil")), clean_num(p.get("temperature")), clean_num(p.get("batteryPowerVal")), clean(p.get("trackerOil")), clean_num(p.get("temperature")),
current_mileage, clean(p.get("status")), clean(p.get("locDesc")) current_mileage, clean(p.get("status")), clean(p.get("locDesc"))
)) ))
upserted += cur.rowcount live_pos_affected += cur.rowcount
# History (Hypertable Source) # History (Hypertable Source)
if gps_time: if gps_time:
@ -249,13 +252,15 @@ def poll_live_positions():
VALUES (%s, %s, ST_SetSRID(ST_MakePoint(%s, %s), 4326), %s, %s, %s, %s, %s, %s, %s) VALUES (%s, %s, ST_SetSRID(ST_MakePoint(%s, %s), 4326), %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (imei, gps_time) DO NOTHING ON CONFLICT (imei, gps_time) DO NOTHING
""", (imei, gps_time, lng, lat, lat, lng, speed, direction, acc_status, gps_num, current_mileage)) """, (imei, gps_time, lng, lat, lat, lng, speed, direction, acc_status, gps_num, current_mileage))
inserted += cur.rowcount history_inserted += cur.rowcount
cur.execute("RELEASE SAVEPOINT sp") cur.execute("RELEASE SAVEPOINT sp")
except Exception: except Exception:
cur.execute("ROLLBACK TO SAVEPOINT sp") cur.execute("ROLLBACK TO SAVEPOINT sp")
log.warning("Failed to process live position for %s", p.get("imei"), exc_info=True) log.warning("Failed to process live position for %s", p.get("imei"), exc_info=True)
log_ingestion(cur, "jimi.user.device.location.list", len(positions), upserted, inserted, int((time.time()-t0)*1000), True) log_ingestion(cur, "jimi.user.device.location.list", len(positions),
live_pos_affected, history_inserted,
int((time.time()-t0)*1000), True)
# ── 3. Trip Reports (Every 15m) ─────────────────────────────────────────────── # ── 3. Trip Reports (Every 15m) ───────────────────────────────────────────────
@ -266,7 +271,10 @@ def poll_trips():
end_ts = datetime.now(timezone.utc) end_ts = datetime.now(timezone.utc)
start_ts = end_ts - timedelta(hours=1) start_ts = end_ts - timedelta(hours=1)
inserted = 0 # [BUG-10] trips.cur.rowcount counts both real inserts and DO UPDATE
# touches — i.e. affected rows from an upsert. Route to rows_upserted
# in ingestion_log, not rows_inserted.
trips_affected = 0
with get_conn() as conn: with get_conn() as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
@ -304,14 +312,15 @@ def poll_trips():
clean_num(t.get("maxSpeed")), clean_int(t.get("runTimeSecond")) clean_num(t.get("maxSpeed")), clean_int(t.get("runTimeSecond"))
)) ))
cur.execute("RELEASE SAVEPOINT sp") cur.execute("RELEASE SAVEPOINT sp")
inserted += cur.rowcount trips_affected += cur.rowcount
except Exception: except Exception:
cur.execute("ROLLBACK TO SAVEPOINT sp") cur.execute("ROLLBACK TO SAVEPOINT sp")
log.warning("Failed to process trip for %s", t.get("imei"), exc_info=True) log.warning("Failed to process trip for %s", t.get("imei"), exc_info=True)
log_ingestion(cur, "jimi.device.track.mileage", len(imeis), 0, inserted, log_ingestion(cur, "jimi.device.track.mileage", len(imeis),
trips_affected, 0,
int((time.time() - t0) * 1000), True) int((time.time() - t0) * 1000), True)
log.info("Trips: %d records processed.", inserted) log.info("Trips: %d records affected.", trips_affected)
# ── 4. Parking Events (Every 15m) ───────────────────────────────────────────── # ── 4. Parking Events (Every 15m) ─────────────────────────────────────────────

View file

@ -66,8 +66,18 @@ class TestCleanInt:
def test_integer_string(self): def test_integer_string(self):
assert clean_int("42") == 42 assert clean_int("42") == 42
def test_float_string_truncates(self): def test_float_string_rounds_up(self):
assert clean_int("3.9") == 3 # [BUG-07] Was truncating ("3.9" → 3); now rounds to nearest.
assert clean_int("3.9") == 4
def test_float_string_rounds_down(self):
assert clean_int("3.1") == 3
def test_half_rounds_to_even(self):
# Python 3 uses banker's rounding for round(); harmless for ingestion
# data where half-values don't appear in practice.
assert clean_int("0.5") == 0
assert clean_int("1.5") == 2
def test_non_numeric_returns_none(self): def test_non_numeric_returns_none(self):
assert clean_int("abc") is None assert clean_int("abc") is None
@ -98,6 +108,12 @@ class TestCleanTs:
# BCD format YYMMDDHHmmss is NOT handled by clean_ts (only by _parse_trip_ts) # BCD format YYMMDDHHmmss is NOT handled by clean_ts (only by _parse_trip_ts)
assert clean_ts("220415103000") is None assert clean_ts("220415103000") is None
def test_date_only_anchored_to_nairobi_midnight(self):
# [BUG-09] Previously returned "2024-04-12" verbatim → Postgres
# interpreted as 00:00 UTC = 03:00 EAT. Now returns a tz-aware
# ISO string anchored to Africa/Nairobi midnight.
assert clean_ts("2024-04-12") == "2024-04-12T00:00:00+03:00"
class TestIsValidFix: class TestIsValidFix:
def test_zero_island_filtered(self): def test_zero_island_filtered(self):

View file

@ -22,8 +22,10 @@ from __future__ import annotations
import hashlib import hashlib
import logging import logging
import os import os
import re
import signal import signal
import sys import sys
import threading
import time import time
from contextlib import contextmanager from contextlib import contextmanager
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
@ -82,9 +84,17 @@ _log = get_logger("shared")
# ── Connection Pool (psycopg2) ─────────────────────────────────────────────── # ── Connection Pool (psycopg2) ───────────────────────────────────────────────
_pool: Optional[psycopg2.pool.ThreadedConnectionPool] = None _pool: Optional[psycopg2.pool.ThreadedConnectionPool] = None
# [BUG-06] Guard cold-start creation. Without the lock, two threads racing
# through _get_pool() while _pool is None could each instantiate a pool — the
# second assignment overwrites the first and leaks its checked-out connections.
_pool_lock = threading.Lock()
def _get_pool() -> psycopg2.pool.ThreadedConnectionPool: def _get_pool() -> psycopg2.pool.ThreadedConnectionPool:
global _pool global _pool
if _pool is not None and not _pool.closed:
return _pool
with _pool_lock:
# Double-check inside the lock — another thread may have just created it.
if _pool is None or _pool.closed: if _pool is None or _pool.closed:
_pool = psycopg2.pool.ThreadedConnectionPool( _pool = psycopg2.pool.ThreadedConnectionPool(
_POOL_MIN, _POOL_MAX, DATABASE_URL, _POOL_MIN, _POOL_MAX, DATABASE_URL,
@ -154,18 +164,37 @@ def clean_num(v: Any) -> Optional[float]:
return None return None
def clean_int(v: Any) -> Optional[int]: def clean_int(v: Any) -> Optional[int]:
"""Coerce to int. [BUG-07] Rounds rather than truncates so '3.9' → 4.
Truncation via int(float(s)) silently lost ~half a unit on any float-
valued integer field. All current call sites pass intrinsically-integer
fields (gpsNum, statusFlags, enabledFlag, etc.) so behaviour is
unchanged for them; the rounding semantics are a guard against future
fields that may arrive as decimals."""
s = clean(v) s = clean(v)
if s is None: return None if s is None: return None
try: try:
return int(float(s)) return round(float(s))
except (ValueError, TypeError): except (ValueError, TypeError):
return None return None
_DATE_ONLY_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
def clean_ts(v: Any) -> Optional[str]: def clean_ts(v: Any) -> Optional[str]:
"""Clean timestamp string for PostgreSQL insertion.""" """Clean timestamp string for PostgreSQL insertion.
[BUG-09] datetime.fromisoformat("2024-04-12") is valid in Python 3.11+
and produces a naive datetime at 00:00:00. Postgres then stored it as
midnight UTC, which is 03:00 Africa/Nairobi three hours off the
operator's intent. The fleet runs in Nairobi (UTC+3), and date-only
fields in the onboarding CSV (activation_time, expiration) are meant
to land at local midnight. Append `T00:00:00+03:00` for those inputs.
Inputs that already carry a time component are returned unchanged."""
s = clean(v) s = clean(v)
if s is None: if s is None:
return None return None
if _DATE_ONLY_RE.match(s):
return f"{s}T00:00:00+03:00"
try: try:
datetime.fromisoformat(s.replace("Z", "+00:00")) datetime.fromisoformat(s.replace("Z", "+00:00"))
return s return s
@ -299,6 +328,8 @@ def _refresh_token(refresh_token: str) -> Optional[str]:
def _update_token_cache(r: dict) -> str: def _update_token_cache(r: dict) -> str:
token, expires_in = r["accessToken"], int(r.get("expiresIn", 7200)) token, expires_in = r["accessToken"], int(r.get("expiresIn", 7200))
expires_at = datetime.now(timezone.utc) + timedelta(seconds=expires_in) expires_at = datetime.now(timezone.utc) + timedelta(seconds=expires_in)
# [BUG-11] get_conn() auto-commits on __exit__; the previous explicit
# conn.commit() was redundant. Removed.
with get_conn() as conn: with get_conn() as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(""" cur.execute("""
@ -308,5 +339,4 @@ def _update_token_cache(r: dict) -> str:
access_token=EXCLUDED.access_token, refresh_token=EXCLUDED.refresh_token, access_token=EXCLUDED.access_token, refresh_token=EXCLUDED.refresh_token,
expires_at=EXCLUDED.expires_at, obtained_at=NOW() expires_at=EXCLUDED.expires_at, obtained_at=NOW()
""", (USER_ID, token, r.get("refreshToken"), expires_at)) """, (USER_ID, token, r.get("refreshToken"), expires_at))
conn.commit()
return token return token