feat(trips): add --skip-geocode flag to backfill script
Some checks failed
Static Analysis / static (push) Has been cancelled
Tests / test (push) Has been cancelled

The historical trips table is much larger than the spec assumed (7,634
rows on prod, not the 8 the CLAUDE.md snapshot suggested). Reverse-geocoding
all of them via Nominatim's 1 req/sec TOS throttle would take ~4¼ hours
end-to-end.

--skip-geocode bypasses the Nominatim calls entirely. Geometry, plate, and
idle backfills run in minutes; addresses stay NULL on historical rows and
will only be populated for future trips by poll_trips.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
David Kiania 2026-05-01 22:12:07 +03:00
parent 144dedee90
commit f94d14864f

View file

@ -27,6 +27,12 @@ Usage:
# Limit to trips since a date (UTC) # Limit to trips since a date (UTC)
python backfill_trips_enrichment.py --since 2026-04-01 --apply python backfill_trips_enrichment.py --since 2026-04-01 --apply
# Skip Nominatim reverse-geocoding (geometry/plate/idle only — runs in
# minutes instead of hours when backfilling thousands of rows). Addresses
# remain NULL for these rows and will be filled by future poll_trips
# cycles only for new trips, not retroactively.
python backfill_trips_enrichment.py --skip-geocode --apply
""" """
@ -100,9 +106,12 @@ def _load_plates_cache(cur) -> dict[str, str]:
return {imei: plate for imei, plate in cur.fetchall()} return {imei: plate for imei, plate in cur.fetchall()}
def run(apply: bool, filter_imei: str | None, since: str | None) -> None: def run(apply: bool, filter_imei: str | None, since: str | None,
skip_geocode: bool = False) -> None:
t0 = time.time() t0 = time.time()
enriched = degenerate = no_fixes = failed = 0 enriched = degenerate = no_fixes = failed = 0
if skip_geocode:
log.info("Reverse-geocoding disabled (--skip-geocode). Addresses will stay NULL.")
with get_conn() as conn: with get_conn() as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
@ -149,6 +158,9 @@ def run(apply: bool, filter_imei: str | None, since: str | None) -> None:
trip_id, imei, waypoints_count, trip_id, imei, waypoints_count,
) )
if skip_geocode:
start_address = end_address = None
else:
start_address = reverse_geocode(start_lat, start_lng) start_address = reverse_geocode(start_lat, start_lng)
end_address = reverse_geocode(end_lat, end_lng) end_address = reverse_geocode(end_lat, end_lng)
vehicle_plate = existing_plate or plates.get(imei) vehicle_plate = existing_plate or plates.get(imei)
@ -210,6 +222,8 @@ if __name__ == "__main__":
parser.add_argument("--apply", action="store_true", help="Write changes to DB (default: dry-run)") parser.add_argument("--apply", action="store_true", help="Write changes to DB (default: dry-run)")
parser.add_argument("--imei", default=None, help="Limit to a single IMEI") parser.add_argument("--imei", default=None, help="Limit to a single IMEI")
parser.add_argument("--since", default=None, help="Only trips with start_time >= YYYY-MM-DD (UTC)") parser.add_argument("--since", default=None, help="Only trips with start_time >= YYYY-MM-DD (UTC)")
parser.add_argument("--skip-geocode", action="store_true", help="Skip Nominatim reverse-geocoding (fast path for large backfills)")
args = parser.parse_args() args = parser.parse_args()
run(apply=args.apply, filter_imei=args.imei, since=args.since) run(apply=args.apply, filter_imei=args.imei, since=args.since,
skip_geocode=args.skip_geocode)