fix: BUG-06..11 — pool lock, clean_int rounding, date-only tz, _infer_city, rowcount naming, double commit
BUG-06 (LOW-MED): _get_pool() had a TOCTOU race — two threads hitting the None pool at cold start could each create one and leak the loser's connections. Added a threading.Lock with double-checked locking. BUG-07 (LOW): clean_int truncated via int(float(s)) so "3.9" → 3. All current call sites are intrinsically-integer fields, so behaviour for production traffic is unchanged, but rounding is the safer default for any future field that arrives as a decimal. Unit test updated to match. BUG-08 (LOW): _infer_city mapped every Kenyan plate to NBO, silently misclassifying Coast/Mombasa vehicles. Now returns None for K-series plates and emits a log warning so operators can tag them explicitly. Uganda (UMA / UAG) remains unambiguous → KLA. Analytics views already COALESCE NULLs into the 'unassigned' bucket so no dashboards break. BUG-09 (LOW): clean_ts accepted "2024-04-12" verbatim → Postgres stored 00:00 UTC = 03:00 EAT, three hours off the operator's intent. Date-only strings are now anchored to Africa/Nairobi midnight (T00:00:00+03:00). Strings with a time component pass through unchanged. Unit test added. BUG-10 (LOW): rowcount counters in poll_live_positions and poll_trips were named "upserted"/"inserted" but they sum cur.rowcount from ON CONFLICT DO UPDATE statements — which always returns 1 per touch regardless of whether the row was an insert or an update. Renamed to live_pos_affected / history_inserted / trips_affected, and routed trips_affected to the rows_upserted slot of ingestion_log (it was previously logged as rows_inserted, which was misleading). BUG-11 (COSMETIC): removed the redundant conn.commit() inside the with get_conn() block of _update_token_cache — the context manager already auto-commits on __exit__. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
dc6404a114
commit
d66c3bab42
4 changed files with 94 additions and 24 deletions
|
|
@ -48,12 +48,27 @@ _DRIVER_SKIP = {"identification", "ug"}
|
|||
|
||||
|
||||
def _infer_city(plate: str) -> str | None:
|
||||
"""Derive assigned_city from license plate prefix."""
|
||||
"""Derive assigned_city from license plate prefix.
|
||||
|
||||
[BUG-08] Kenyan plates (K-series) span both Nairobi and Mombasa, and
|
||||
the prefix alone is not a reliable indicator — KC-series tends to be
|
||||
Coast, KD-series tends to be Nairobi, but there are exceptions in both
|
||||
directions. Rather than misclassify Coast vehicles as Nairobi (the
|
||||
previous behaviour), return None for any Kenyan plate so they fall
|
||||
through to `assigned_city IS NULL`. Analytics views already
|
||||
COALESCE(...) those into the `unassigned` bucket; operators can tag
|
||||
Mombasa/Nairobi explicitly via the DB or a future onboarding signal
|
||||
(e.g. SIM MCC).
|
||||
|
||||
Uganda (UMA / UAG) remains unambiguous → KLA.
|
||||
"""
|
||||
p = (plate or "").strip().upper()
|
||||
if p.startswith("UMA") or p.startswith("UAG"):
|
||||
return "KLA"
|
||||
if p.startswith("K"):
|
||||
return "NBO"
|
||||
log.warning("Plate %s: Kenyan prefix is ambiguous (NBO vs MBA) — "
|
||||
"leaving assigned_city NULL for manual tagging", p)
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -198,7 +198,10 @@ def poll_live_positions():
|
|||
positions_by_imei[imei] = p
|
||||
|
||||
positions = list(positions_by_imei.values())
|
||||
upserted, inserted = 0, 0
|
||||
# [BUG-10] Variable names match the ingestion_log column they feed:
|
||||
# live_pos_affected → rows_upserted (DO UPDATE: rowcount=1 per touch)
|
||||
# history_inserted → rows_inserted (DO NOTHING: rowcount=1 only on real insert)
|
||||
live_pos_affected, history_inserted = 0, 0
|
||||
|
||||
with get_conn() as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
|
@ -240,7 +243,7 @@ def poll_live_positions():
|
|||
clean_num(p.get("batteryPowerVal")), clean(p.get("trackerOil")), clean_num(p.get("temperature")),
|
||||
current_mileage, clean(p.get("status")), clean(p.get("locDesc"))
|
||||
))
|
||||
upserted += cur.rowcount
|
||||
live_pos_affected += cur.rowcount
|
||||
|
||||
# History (Hypertable Source)
|
||||
if gps_time:
|
||||
|
|
@ -249,13 +252,15 @@ def poll_live_positions():
|
|||
VALUES (%s, %s, ST_SetSRID(ST_MakePoint(%s, %s), 4326), %s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (imei, gps_time) DO NOTHING
|
||||
""", (imei, gps_time, lng, lat, lat, lng, speed, direction, acc_status, gps_num, current_mileage))
|
||||
inserted += cur.rowcount
|
||||
history_inserted += cur.rowcount
|
||||
cur.execute("RELEASE SAVEPOINT sp")
|
||||
except Exception:
|
||||
cur.execute("ROLLBACK TO SAVEPOINT sp")
|
||||
log.warning("Failed to process live position for %s", p.get("imei"), exc_info=True)
|
||||
|
||||
log_ingestion(cur, "jimi.user.device.location.list", len(positions), upserted, inserted, int((time.time()-t0)*1000), True)
|
||||
log_ingestion(cur, "jimi.user.device.location.list", len(positions),
|
||||
live_pos_affected, history_inserted,
|
||||
int((time.time()-t0)*1000), True)
|
||||
|
||||
# ── 3. Trip Reports (Every 15m) ───────────────────────────────────────────────
|
||||
|
||||
|
|
@ -266,7 +271,10 @@ def poll_trips():
|
|||
|
||||
end_ts = datetime.now(timezone.utc)
|
||||
start_ts = end_ts - timedelta(hours=1)
|
||||
inserted = 0
|
||||
# [BUG-10] trips.cur.rowcount counts both real inserts and DO UPDATE
|
||||
# touches — i.e. affected rows from an upsert. Route to rows_upserted
|
||||
# in ingestion_log, not rows_inserted.
|
||||
trips_affected = 0
|
||||
|
||||
with get_conn() as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
|
@ -304,14 +312,15 @@ def poll_trips():
|
|||
clean_num(t.get("maxSpeed")), clean_int(t.get("runTimeSecond"))
|
||||
))
|
||||
cur.execute("RELEASE SAVEPOINT sp")
|
||||
inserted += cur.rowcount
|
||||
trips_affected += cur.rowcount
|
||||
except Exception:
|
||||
cur.execute("ROLLBACK TO SAVEPOINT sp")
|
||||
log.warning("Failed to process trip for %s", t.get("imei"), exc_info=True)
|
||||
|
||||
log_ingestion(cur, "jimi.device.track.mileage", len(imeis), 0, inserted,
|
||||
log_ingestion(cur, "jimi.device.track.mileage", len(imeis),
|
||||
trips_affected, 0,
|
||||
int((time.time() - t0) * 1000), True)
|
||||
log.info("Trips: %d records processed.", inserted)
|
||||
log.info("Trips: %d records affected.", trips_affected)
|
||||
|
||||
# ── 4. Parking Events (Every 15m) ─────────────────────────────────────────────
|
||||
|
||||
|
|
|
|||
|
|
@ -66,8 +66,18 @@ class TestCleanInt:
|
|||
def test_integer_string(self):
|
||||
assert clean_int("42") == 42
|
||||
|
||||
def test_float_string_truncates(self):
|
||||
assert clean_int("3.9") == 3
|
||||
def test_float_string_rounds_up(self):
|
||||
# [BUG-07] Was truncating ("3.9" → 3); now rounds to nearest.
|
||||
assert clean_int("3.9") == 4
|
||||
|
||||
def test_float_string_rounds_down(self):
|
||||
assert clean_int("3.1") == 3
|
||||
|
||||
def test_half_rounds_to_even(self):
|
||||
# Python 3 uses banker's rounding for round(); harmless for ingestion
|
||||
# data where half-values don't appear in practice.
|
||||
assert clean_int("0.5") == 0
|
||||
assert clean_int("1.5") == 2
|
||||
|
||||
def test_non_numeric_returns_none(self):
|
||||
assert clean_int("abc") is None
|
||||
|
|
@ -98,6 +108,12 @@ class TestCleanTs:
|
|||
# BCD format YYMMDDHHmmss is NOT handled by clean_ts (only by _parse_trip_ts)
|
||||
assert clean_ts("220415103000") is None
|
||||
|
||||
def test_date_only_anchored_to_nairobi_midnight(self):
|
||||
# [BUG-09] Previously returned "2024-04-12" verbatim → Postgres
|
||||
# interpreted as 00:00 UTC = 03:00 EAT. Now returns a tz-aware
|
||||
# ISO string anchored to Africa/Nairobi midnight.
|
||||
assert clean_ts("2024-04-12") == "2024-04-12T00:00:00+03:00"
|
||||
|
||||
|
||||
class TestIsValidFix:
|
||||
def test_zero_island_filtered(self):
|
||||
|
|
|
|||
|
|
@ -22,8 +22,10 @@ from __future__ import annotations
|
|||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
|
@ -82,9 +84,17 @@ _log = get_logger("shared")
|
|||
# ── Connection Pool (psycopg2) ───────────────────────────────────────────────
|
||||
|
||||
_pool: Optional[psycopg2.pool.ThreadedConnectionPool] = None
|
||||
# [BUG-06] Guard cold-start creation. Without the lock, two threads racing
|
||||
# through _get_pool() while _pool is None could each instantiate a pool — the
|
||||
# second assignment overwrites the first and leaks its checked-out connections.
|
||||
_pool_lock = threading.Lock()
|
||||
|
||||
def _get_pool() -> psycopg2.pool.ThreadedConnectionPool:
|
||||
global _pool
|
||||
if _pool is not None and not _pool.closed:
|
||||
return _pool
|
||||
with _pool_lock:
|
||||
# Double-check inside the lock — another thread may have just created it.
|
||||
if _pool is None or _pool.closed:
|
||||
_pool = psycopg2.pool.ThreadedConnectionPool(
|
||||
_POOL_MIN, _POOL_MAX, DATABASE_URL,
|
||||
|
|
@ -154,18 +164,37 @@ def clean_num(v: Any) -> Optional[float]:
|
|||
return None
|
||||
|
||||
def clean_int(v: Any) -> Optional[int]:
|
||||
"""Coerce to int. [BUG-07] Rounds rather than truncates so '3.9' → 4.
|
||||
|
||||
Truncation via int(float(s)) silently lost ~half a unit on any float-
|
||||
valued integer field. All current call sites pass intrinsically-integer
|
||||
fields (gpsNum, statusFlags, enabledFlag, etc.) so behaviour is
|
||||
unchanged for them; the rounding semantics are a guard against future
|
||||
fields that may arrive as decimals."""
|
||||
s = clean(v)
|
||||
if s is None: return None
|
||||
try:
|
||||
return int(float(s))
|
||||
return round(float(s))
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
_DATE_ONLY_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
|
||||
|
||||
def clean_ts(v: Any) -> Optional[str]:
|
||||
"""Clean timestamp string for PostgreSQL insertion."""
|
||||
"""Clean timestamp string for PostgreSQL insertion.
|
||||
|
||||
[BUG-09] datetime.fromisoformat("2024-04-12") is valid in Python 3.11+
|
||||
and produces a naive datetime at 00:00:00. Postgres then stored it as
|
||||
midnight UTC, which is 03:00 Africa/Nairobi — three hours off the
|
||||
operator's intent. The fleet runs in Nairobi (UTC+3), and date-only
|
||||
fields in the onboarding CSV (activation_time, expiration) are meant
|
||||
to land at local midnight. Append `T00:00:00+03:00` for those inputs.
|
||||
Inputs that already carry a time component are returned unchanged."""
|
||||
s = clean(v)
|
||||
if s is None:
|
||||
return None
|
||||
if _DATE_ONLY_RE.match(s):
|
||||
return f"{s}T00:00:00+03:00"
|
||||
try:
|
||||
datetime.fromisoformat(s.replace("Z", "+00:00"))
|
||||
return s
|
||||
|
|
@ -299,6 +328,8 @@ def _refresh_token(refresh_token: str) -> Optional[str]:
|
|||
def _update_token_cache(r: dict) -> str:
|
||||
token, expires_in = r["accessToken"], int(r.get("expiresIn", 7200))
|
||||
expires_at = datetime.now(timezone.utc) + timedelta(seconds=expires_in)
|
||||
# [BUG-11] get_conn() auto-commits on __exit__; the previous explicit
|
||||
# conn.commit() was redundant. Removed.
|
||||
with get_conn() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
|
|
@ -308,5 +339,4 @@ def _update_token_cache(r: dict) -> str:
|
|||
access_token=EXCLUDED.access_token, refresh_token=EXCLUDED.refresh_token,
|
||||
expires_at=EXCLUDED.expires_at, obtained_at=NOW()
|
||||
""", (USER_ID, token, r.get("refreshToken"), expires_at))
|
||||
conn.commit()
|
||||
return token
|
||||
Loading…
Reference in a new issue