From f94d14864fe1c6df796997ccaaa79f50f5b4e562 Mon Sep 17 00:00:00 2001 From: David Kiania Date: Fri, 1 May 2026 22:12:07 +0300 Subject: [PATCH] feat(trips): add --skip-geocode flag to backfill script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The historical trips table is much larger than the spec assumed (7,634 rows on prod, not the 8 the CLAUDE.md snapshot suggested). Reverse-geocoding all of them via Nominatim's 1 req/sec TOS throttle would take ~4¼ hours end-to-end. --skip-geocode bypasses the Nominatim calls entirely. Geometry, plate, and idle backfills run in minutes; addresses stay NULL on historical rows and will only be populated for future trips by poll_trips. Co-Authored-By: Claude Opus 4.7 --- backfill_trips_enrichment.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/backfill_trips_enrichment.py b/backfill_trips_enrichment.py index a619b9e..810e0de 100644 --- a/backfill_trips_enrichment.py +++ b/backfill_trips_enrichment.py @@ -27,6 +27,12 @@ Usage: # Limit to trips since a date (UTC) python backfill_trips_enrichment.py --since 2026-04-01 --apply + + # Skip Nominatim reverse-geocoding (geometry/plate/idle only — runs in + # minutes instead of hours when backfilling thousands of rows). Addresses + # remain NULL for these rows and will be filled by future poll_trips + # cycles only for new trips, not retroactively. + python backfill_trips_enrichment.py --skip-geocode --apply ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ """ @@ -100,9 +106,12 @@ def _load_plates_cache(cur) -> dict[str, str]: return {imei: plate for imei, plate in cur.fetchall()} -def run(apply: bool, filter_imei: str | None, since: str | None) -> None: +def run(apply: bool, filter_imei: str | None, since: str | None, + skip_geocode: bool = False) -> None: t0 = time.time() enriched = degenerate = no_fixes = failed = 0 + if skip_geocode: + log.info("Reverse-geocoding disabled (--skip-geocode). Addresses will stay NULL.") with get_conn() as conn: with conn.cursor() as cur: @@ -149,8 +158,11 @@ def run(apply: bool, filter_imei: str | None, since: str | None) -> None: trip_id, imei, waypoints_count, ) - start_address = reverse_geocode(start_lat, start_lng) - end_address = reverse_geocode(end_lat, end_lng) + if skip_geocode: + start_address = end_address = None + else: + start_address = reverse_geocode(start_lat, start_lng) + end_address = reverse_geocode(end_lat, end_lng) vehicle_plate = existing_plate or plates.get(imei) log.info( @@ -207,9 +219,11 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( description="Backfill route_geom / start_geom / end_geom / addresses on tracksolid.trips." ) - parser.add_argument("--apply", action="store_true", help="Write changes to DB (default: dry-run)") - parser.add_argument("--imei", default=None, help="Limit to a single IMEI") - parser.add_argument("--since", default=None, help="Only trips with start_time >= YYYY-MM-DD (UTC)") + parser.add_argument("--apply", action="store_true", help="Write changes to DB (default: dry-run)") + parser.add_argument("--imei", default=None, help="Limit to a single IMEI") + parser.add_argument("--since", default=None, help="Only trips with start_time >= YYYY-MM-DD (UTC)") + parser.add_argument("--skip-geocode", action="store_true", help="Skip Nominatim reverse-geocoding (fast path for large backfills)") args = parser.parse_args() - run(apply=args.apply, filter_imei=args.imei, since=args.since) + run(apply=args.apply, filter_imei=args.imei, since=args.since, + skip_geocode=args.skip_geocode)