2026-04-12 18:38:20 +00:00
|
|
|
"""Unit tests for ts_shared_rev data cleaning helpers."""
|
|
|
|
|
import sys
|
|
|
|
|
import os
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
# Add parent directory to path so we can import ts_shared_rev
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
|
|
|
|
|
|
|
|
|
# Set required env vars before import
|
|
|
|
|
os.environ.setdefault("TRACKSOLID_APP_KEY", "test_key")
|
|
|
|
|
os.environ.setdefault("TRACKSOLID_APP_SECRET", "test_secret")
|
|
|
|
|
os.environ.setdefault("TRACKSOLID_USER_ID", "test_user")
|
|
|
|
|
os.environ.setdefault("TRACKSOLID_PWD_MD5", "test_md5")
|
|
|
|
|
os.environ.setdefault("DATABASE_URL", "postgresql://test:test@localhost:5432/test")
|
|
|
|
|
|
|
|
|
|
from ts_shared_rev import clean, clean_num, clean_int, clean_ts, is_valid_fix
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestClean:
|
|
|
|
|
def test_none_returns_none(self):
|
|
|
|
|
assert clean(None) is None
|
|
|
|
|
|
|
|
|
|
def test_empty_string_returns_none(self):
|
|
|
|
|
assert clean("") is None
|
|
|
|
|
|
|
|
|
|
def test_whitespace_only_returns_none(self):
|
|
|
|
|
assert clean(" ") is None
|
|
|
|
|
|
|
|
|
|
def test_normal_string_preserved(self):
|
|
|
|
|
assert clean("hello") == "hello"
|
|
|
|
|
|
|
|
|
|
def test_strips_whitespace(self):
|
|
|
|
|
assert clean(" hello ") == "hello"
|
|
|
|
|
|
|
|
|
|
def test_non_string_converted(self):
|
|
|
|
|
assert clean(123) == "123"
|
|
|
|
|
|
|
|
|
|
def test_zero_preserved(self):
|
|
|
|
|
assert clean(0) == "0"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestCleanNum:
|
|
|
|
|
def test_valid_float_string(self):
|
|
|
|
|
assert clean_num("3.14") == pytest.approx(3.14)
|
|
|
|
|
|
|
|
|
|
def test_valid_integer_string(self):
|
|
|
|
|
assert clean_num("42") == pytest.approx(42.0)
|
|
|
|
|
|
|
|
|
|
def test_non_numeric_returns_none(self):
|
|
|
|
|
assert clean_num("abc") is None
|
|
|
|
|
|
|
|
|
|
def test_none_returns_none(self):
|
|
|
|
|
assert clean_num(None) is None
|
|
|
|
|
|
|
|
|
|
def test_empty_string_returns_none(self):
|
|
|
|
|
assert clean_num("") is None
|
|
|
|
|
|
|
|
|
|
def test_numeric_value_passthrough(self):
|
|
|
|
|
assert clean_num(45.5) == pytest.approx(45.5)
|
|
|
|
|
|
|
|
|
|
def test_negative_value(self):
|
|
|
|
|
assert clean_num("-1.5") == pytest.approx(-1.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestCleanInt:
|
|
|
|
|
def test_integer_string(self):
|
|
|
|
|
assert clean_int("42") == 42
|
|
|
|
|
|
fix: BUG-06..11 — pool lock, clean_int rounding, date-only tz, _infer_city, rowcount naming, double commit
BUG-06 (LOW-MED): _get_pool() had a TOCTOU race — two threads hitting the
None pool at cold start could each create one and leak the loser's
connections. Added a threading.Lock with double-checked locking.
BUG-07 (LOW): clean_int truncated via int(float(s)) so "3.9" → 3. All
current call sites are intrinsically-integer fields, so behaviour for
production traffic is unchanged, but rounding is the safer default for
any future field that arrives as a decimal. Unit test updated to match.
BUG-08 (LOW): _infer_city mapped every Kenyan plate to NBO, silently
misclassifying Coast/Mombasa vehicles. Now returns None for K-series
plates and emits a log warning so operators can tag them explicitly.
Uganda (UMA / UAG) remains unambiguous → KLA. Analytics views already
COALESCE NULLs into the 'unassigned' bucket so no dashboards break.
BUG-09 (LOW): clean_ts accepted "2024-04-12" verbatim → Postgres stored
00:00 UTC = 03:00 EAT, three hours off the operator's intent. Date-only
strings are now anchored to Africa/Nairobi midnight (T00:00:00+03:00).
Strings with a time component pass through unchanged. Unit test added.
BUG-10 (LOW): rowcount counters in poll_live_positions and poll_trips
were named "upserted"/"inserted" but they sum cur.rowcount from
ON CONFLICT DO UPDATE statements — which always returns 1 per touch
regardless of whether the row was an insert or an update. Renamed to
live_pos_affected / history_inserted / trips_affected, and routed
trips_affected to the rows_upserted slot of ingestion_log (it was
previously logged as rows_inserted, which was misleading).
BUG-11 (COSMETIC): removed the redundant conn.commit() inside the
with get_conn() block of _update_token_cache — the context manager
already auto-commits on __exit__.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 12:49:54 +00:00
|
|
|
def test_float_string_rounds_up(self):
|
|
|
|
|
# [BUG-07] Was truncating ("3.9" → 3); now rounds to nearest.
|
|
|
|
|
assert clean_int("3.9") == 4
|
|
|
|
|
|
|
|
|
|
def test_float_string_rounds_down(self):
|
|
|
|
|
assert clean_int("3.1") == 3
|
|
|
|
|
|
|
|
|
|
def test_half_rounds_to_even(self):
|
|
|
|
|
# Python 3 uses banker's rounding for round(); harmless for ingestion
|
|
|
|
|
# data where half-values don't appear in practice.
|
|
|
|
|
assert clean_int("0.5") == 0
|
|
|
|
|
assert clean_int("1.5") == 2
|
2026-04-12 18:38:20 +00:00
|
|
|
|
|
|
|
|
def test_non_numeric_returns_none(self):
|
|
|
|
|
assert clean_int("abc") is None
|
|
|
|
|
|
|
|
|
|
def test_none_returns_none(self):
|
|
|
|
|
assert clean_int(None) is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestCleanTs:
|
|
|
|
|
def test_valid_iso_timestamp(self):
|
|
|
|
|
result = clean_ts("2024-04-12 08:00:00")
|
|
|
|
|
assert result == "2024-04-12 08:00:00"
|
|
|
|
|
|
|
|
|
|
def test_valid_iso_with_timezone(self):
|
|
|
|
|
result = clean_ts("2024-04-12T08:00:00Z")
|
|
|
|
|
assert result is not None
|
|
|
|
|
|
|
|
|
|
def test_garbage_returns_none(self):
|
|
|
|
|
assert clean_ts("not-a-date") is None
|
|
|
|
|
|
|
|
|
|
def test_none_returns_none(self):
|
|
|
|
|
assert clean_ts(None) is None
|
|
|
|
|
|
|
|
|
|
def test_empty_string_returns_none(self):
|
|
|
|
|
assert clean_ts("") is None
|
|
|
|
|
|
|
|
|
|
def test_bcd_format_returns_none(self):
|
|
|
|
|
# BCD format YYMMDDHHmmss is NOT handled by clean_ts (only by _parse_trip_ts)
|
|
|
|
|
assert clean_ts("220415103000") is None
|
|
|
|
|
|
fix: BUG-06..11 — pool lock, clean_int rounding, date-only tz, _infer_city, rowcount naming, double commit
BUG-06 (LOW-MED): _get_pool() had a TOCTOU race — two threads hitting the
None pool at cold start could each create one and leak the loser's
connections. Added a threading.Lock with double-checked locking.
BUG-07 (LOW): clean_int truncated via int(float(s)) so "3.9" → 3. All
current call sites are intrinsically-integer fields, so behaviour for
production traffic is unchanged, but rounding is the safer default for
any future field that arrives as a decimal. Unit test updated to match.
BUG-08 (LOW): _infer_city mapped every Kenyan plate to NBO, silently
misclassifying Coast/Mombasa vehicles. Now returns None for K-series
plates and emits a log warning so operators can tag them explicitly.
Uganda (UMA / UAG) remains unambiguous → KLA. Analytics views already
COALESCE NULLs into the 'unassigned' bucket so no dashboards break.
BUG-09 (LOW): clean_ts accepted "2024-04-12" verbatim → Postgres stored
00:00 UTC = 03:00 EAT, three hours off the operator's intent. Date-only
strings are now anchored to Africa/Nairobi midnight (T00:00:00+03:00).
Strings with a time component pass through unchanged. Unit test added.
BUG-10 (LOW): rowcount counters in poll_live_positions and poll_trips
were named "upserted"/"inserted" but they sum cur.rowcount from
ON CONFLICT DO UPDATE statements — which always returns 1 per touch
regardless of whether the row was an insert or an update. Renamed to
live_pos_affected / history_inserted / trips_affected, and routed
trips_affected to the rows_upserted slot of ingestion_log (it was
previously logged as rows_inserted, which was misleading).
BUG-11 (COSMETIC): removed the redundant conn.commit() inside the
with get_conn() block of _update_token_cache — the context manager
already auto-commits on __exit__.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 12:49:54 +00:00
|
|
|
def test_date_only_anchored_to_nairobi_midnight(self):
|
|
|
|
|
# [BUG-09] Previously returned "2024-04-12" verbatim → Postgres
|
|
|
|
|
# interpreted as 00:00 UTC = 03:00 EAT. Now returns a tz-aware
|
|
|
|
|
# ISO string anchored to Africa/Nairobi midnight.
|
|
|
|
|
assert clean_ts("2024-04-12") == "2024-04-12T00:00:00+03:00"
|
|
|
|
|
|
2026-04-12 18:38:20 +00:00
|
|
|
|
|
|
|
|
class TestIsValidFix:
|
|
|
|
|
def test_zero_island_filtered(self):
|
|
|
|
|
assert is_valid_fix(0.0, 0.0) is False
|
|
|
|
|
|
|
|
|
|
def test_valid_nairobi_coords(self):
|
|
|
|
|
assert is_valid_fix(-1.2921, 36.8219) is True
|
|
|
|
|
|
|
|
|
|
def test_none_lat_returns_false(self):
|
|
|
|
|
assert is_valid_fix(None, 36.8219) is False
|
|
|
|
|
|
|
|
|
|
def test_none_lng_returns_false(self):
|
|
|
|
|
assert is_valid_fix(-1.2921, None) is False
|
|
|
|
|
|
|
|
|
|
def test_out_of_range_lat(self):
|
|
|
|
|
assert is_valid_fix(91.0, 36.8219) is False
|
|
|
|
|
|
|
|
|
|
def test_out_of_range_lng(self):
|
|
|
|
|
assert is_valid_fix(-1.2921, 181.0) is False
|
|
|
|
|
|
|
|
|
|
def test_valid_extreme_coords(self):
|
|
|
|
|
assert is_valid_fix(90.0, 180.0) is True
|
|
|
|
|
|
|
|
|
|
def test_string_coords_accepted(self):
|
|
|
|
|
assert is_valid_fix("-1.2921", "36.8219") is True
|