feat(trips): add --skip-geocode flag to backfill script
The historical trips table is much larger than the spec assumed (7,634 rows on prod, not the 8 the CLAUDE.md snapshot suggested). Reverse-geocoding all of them via Nominatim's 1 req/sec TOS throttle would take ~4¼ hours end-to-end. --skip-geocode bypasses the Nominatim calls entirely. Geometry, plate, and idle backfills run in minutes; addresses stay NULL on historical rows and will only be populated for future trips by poll_trips. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
144dedee90
commit
f94d14864f
1 changed files with 21 additions and 7 deletions
|
|
@ -27,6 +27,12 @@ Usage:
|
|||
|
||||
# Limit to trips since a date (UTC)
|
||||
python backfill_trips_enrichment.py --since 2026-04-01 --apply
|
||||
|
||||
# Skip Nominatim reverse-geocoding (geometry/plate/idle only — runs in
|
||||
# minutes instead of hours when backfilling thousands of rows). Addresses
|
||||
# remain NULL for these rows and will be filled by future poll_trips
|
||||
# cycles only for new trips, not retroactively.
|
||||
python backfill_trips_enrichment.py --skip-geocode --apply
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
"""
|
||||
|
||||
|
|
@ -100,9 +106,12 @@ def _load_plates_cache(cur) -> dict[str, str]:
|
|||
return {imei: plate for imei, plate in cur.fetchall()}
|
||||
|
||||
|
||||
def run(apply: bool, filter_imei: str | None, since: str | None) -> None:
|
||||
def run(apply: bool, filter_imei: str | None, since: str | None,
|
||||
skip_geocode: bool = False) -> None:
|
||||
t0 = time.time()
|
||||
enriched = degenerate = no_fixes = failed = 0
|
||||
if skip_geocode:
|
||||
log.info("Reverse-geocoding disabled (--skip-geocode). Addresses will stay NULL.")
|
||||
|
||||
with get_conn() as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
|
@ -149,8 +158,11 @@ def run(apply: bool, filter_imei: str | None, since: str | None) -> None:
|
|||
trip_id, imei, waypoints_count,
|
||||
)
|
||||
|
||||
start_address = reverse_geocode(start_lat, start_lng)
|
||||
end_address = reverse_geocode(end_lat, end_lng)
|
||||
if skip_geocode:
|
||||
start_address = end_address = None
|
||||
else:
|
||||
start_address = reverse_geocode(start_lat, start_lng)
|
||||
end_address = reverse_geocode(end_lat, end_lng)
|
||||
vehicle_plate = existing_plate or plates.get(imei)
|
||||
|
||||
log.info(
|
||||
|
|
@ -207,9 +219,11 @@ if __name__ == "__main__":
|
|||
parser = argparse.ArgumentParser(
|
||||
description="Backfill route_geom / start_geom / end_geom / addresses on tracksolid.trips."
|
||||
)
|
||||
parser.add_argument("--apply", action="store_true", help="Write changes to DB (default: dry-run)")
|
||||
parser.add_argument("--imei", default=None, help="Limit to a single IMEI")
|
||||
parser.add_argument("--since", default=None, help="Only trips with start_time >= YYYY-MM-DD (UTC)")
|
||||
parser.add_argument("--apply", action="store_true", help="Write changes to DB (default: dry-run)")
|
||||
parser.add_argument("--imei", default=None, help="Limit to a single IMEI")
|
||||
parser.add_argument("--since", default=None, help="Only trips with start_time >= YYYY-MM-DD (UTC)")
|
||||
parser.add_argument("--skip-geocode", action="store_true", help="Skip Nominatim reverse-geocoding (fast path for large backfills)")
|
||||
args = parser.parse_args()
|
||||
|
||||
run(apply=args.apply, filter_imei=args.imei, since=args.since)
|
||||
run(apply=args.apply, filter_imei=args.imei, since=args.since,
|
||||
skip_geocode=args.skip_geocode)
|
||||
|
|
|
|||
Loading…
Reference in a new issue