Compare commits
No commits in common. "716b72367524d1dcb0a3c1d53e30a5c51175ef8c" and "85b171543adbb44541badf239996d6490b23c8e0" have entirely different histories.
716b723675
...
85b171543a
53 changed files with 0 additions and 4103 deletions
39
.env.example
39
.env.example
|
|
@ -1,39 +0,0 @@
|
|||
# Postgres
|
||||
POSTGRES_DB=fleet
|
||||
POSTGRES_USER=fleet
|
||||
POSTGRES_PASSWORD=change-me
|
||||
|
||||
# App connection (via pgbouncer in prod, direct in dev is also fine)
|
||||
DATABASE_URL=postgresql://fleet:change-me@pgbouncer:6432/fleet
|
||||
|
||||
# Auth
|
||||
JWT_SECRET=replace-with-64-bytes-of-random
|
||||
JWT_ACCESS_TTL_MIN=15
|
||||
JWT_REFRESH_TTL_DAYS=30
|
||||
|
||||
# Tracksolid push (shared token Tracksolid POSTs in form body)
|
||||
TRACKSOLID_PUSH_TOKEN=set-from-tracksolid-console
|
||||
|
||||
# Tracksolid polled API
|
||||
TRACKSOLID_API_BASE_URL=https://eu-open.tracksolidpro.com/route/rest
|
||||
TRACKSOLID_PROD_ACCOUNTS= # JSON array of {account_id, app_key, secret} — populated from current TARGETS env
|
||||
TRACKSOLID_SANDBOX_ACCOUNT_ID=
|
||||
TRACKSOLID_SANDBOX_APP_KEY=
|
||||
TRACKSOLID_SANDBOX_SECRET=
|
||||
|
||||
# Geocoding (P2)
|
||||
NOMINATIM_BASE_URL=
|
||||
MAPBOX_TOKEN=
|
||||
|
||||
# Alerting (D9)
|
||||
NTFY_BASE_URL=https://ntfy.rahamafresh.com
|
||||
NTFY_TOPIC=fleet-slo-breach
|
||||
NTFY_TOKEN=
|
||||
|
||||
# Runtime
|
||||
APP_MODE=dev # dev|prod — dev enables uvicorn --reload
|
||||
APP_ROLE=gateway # gateway|worker|cron — selected per container
|
||||
APP_LOG_LEVEL=INFO
|
||||
|
||||
# Build-time (injected by CI)
|
||||
APP_GIT_SHA=local
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
name: build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
env:
|
||||
REGISTRY: repo.rahamafresh.com
|
||||
IMAGE_NAME: fleet-platform
|
||||
|
||||
jobs:
|
||||
lint-test:
|
||||
runs-on: docker
|
||||
container:
|
||||
image: python:3.12-slim
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install deps
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install .[dev]
|
||||
|
||||
- name: ruff
|
||||
run: ruff check .
|
||||
|
||||
- name: mypy
|
||||
run: mypy app
|
||||
|
||||
- name: pytest
|
||||
run: pytest
|
||||
|
||||
build-push:
|
||||
needs: lint-test
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
runs-on: docker
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Forgejo registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ secrets.REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.REGISTRY_TOKEN }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
build-args: |
|
||||
GIT_SHA=${{ github.sha }}
|
||||
tags: |
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||
24
.gitignore
vendored
24
.gitignore
vendored
|
|
@ -1,24 +0,0 @@
|
|||
__pycache__/
|
||||
*.py[cod]
|
||||
*.egg-info/
|
||||
.pytest_cache/
|
||||
.ruff_cache/
|
||||
.mypy_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
|
||||
.venv/
|
||||
venv/
|
||||
|
||||
.env
|
||||
.env.local
|
||||
*.local
|
||||
|
||||
dist/
|
||||
build/
|
||||
|
||||
.DS_Store
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
db/schema.sql.tmp
|
||||
|
|
@ -1,527 +0,0 @@
|
|||
# Fleet Platform — Greenfield Architecture
|
||||
|
||||
**Date:** 2026-05-22 (rev. b — incorporates engineering review of 2026-05-22)
|
||||
**Author posture:** Senior systems architect
|
||||
**Status:** Design document. Not a migration plan. Not a port.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## 2. Architectural principles
|
||||
|
||||
These are the load-bearing decisions. Everything below is derived from them.
|
||||
|
||||
### 2.1 The system is an event log with derived state
|
||||
|
||||
Every inbound signal — Jimicloud push, polled API response, manual import, ops entry — is appended to an immutable log (`events.raw`). The raw payload is preserved verbatim with metadata (`received_at`, `source`, `signature`). A separate parser produces typed events in `events.parsed`. Projectors read `events.parsed` and update "current state" tables (`state.live_positions`, `state.trips`, etc.) deterministically.
|
||||
|
||||
This is event sourcing kept lightweight: no Kafka, no Debezium, no separate event store. The log lives in Postgres as a hypertable. Projectors are SQL functions invoked by triggers or scheduled jobs.
|
||||
|
||||
What this fixes structurally:
|
||||
- Race conditions in write paths — projectors are single-writer per state table.
|
||||
- Contract drift — re-parse from `events.raw` with corrected logic, no upstream re-fetch.
|
||||
- Time-travel debugging — "what did Jimi send at 03:47 on Tuesday?" is a `SELECT`.
|
||||
- Replay — rebuild any state table from the log.
|
||||
|
||||
### 2.2 Contracts are typed, in code, and continuously verified
|
||||
|
||||
Every endpoint (Jimicloud → us, us → dashboard, us → routing) has a Pydantic model in the codebase, version-pinned per upstream API minor revision. A scheduled contract-check job hits a sandbox account daily and asserts the shape still matches. Drift produces an alert, not a `NULL` in production.
|
||||
|
||||
### 2.3 The platform is one codebase, one image, three container roles
|
||||
|
||||
One FastAPI application, one Git repository, one Docker image. The same image runs in three container roles selected by entrypoint command — `platform-gateway` (push receivers + dashboard API + routing endpoint + auth), `platform-worker` (parser + projectors + geocode worker + matview refresher), `platform-cron` (contract checker + OSM loader + scheduled polls + SLO measurement). Configuration and database connection are shared; process memory is not.
|
||||
|
||||
This is the same pattern Sidekiq / Celery / Rails apps use: single codebase, single deployment artefact, multiple runtime roles. It is not microservices — there is no service mesh, no inter-service contracts, no separate repos, no per-service teams. There is one PR review surface and one image to roll back. What it is is **fate isolation**: a heavy historical query that triggers an OOM in the worker role does not take down the gateway; an OSM loader that pegs CPU in the cron role does not stall webhook receivers. The original "one process" design conflated single-codebase (correct) with single-process (an avoidable failure mode).
|
||||
|
||||
Microservices solve organisational scaling problems we don't have. Container-role separation solves the fate-sharing problem we do have. Two developers and one fleet do not need a service mesh; they do need the gateway to keep responding to Jimi pushes while a 90-day report runs.
|
||||
|
||||
### 2.4 Lifecycle state and operational state are orthogonal and explicit
|
||||
|
||||
Devices have a **lifecycle state machine** (`provisioned → active → suspended → decommissioned`) stored explicitly in `domain.devices.lifecycle`. Vehicles have an **operational state** (`moving | parked | offline | unknown`) derived from the latest fix — never stored, always computed.
|
||||
|
||||
### 2.5 SLOs are first-class, not implicit thresholds
|
||||
|
||||
Service-level objectives live in `slo.targets`, one row per metric, with the current threshold value. Dashboards render against them. Grafana monitors against them. Changing a threshold is one `UPDATE`, not a scavenger hunt through SQL + JS.
|
||||
|
||||
### 2.6 Deploys are by image tag, not by branch
|
||||
|
||||
CI builds an image on push to `main`, tags it `<registry>/fleet-platform:<git-sha>` plus a moving `:latest`. Coolify deploys by tag. "What's deployed?" is `docker inspect`.
|
||||
|
||||
### 2.7 Dashboards are thin renderers over a typed read model
|
||||
|
||||
Each dashboard polls one endpoint that returns a complete, render-ready payload. The endpoint's shape is the contract. All state logic, palette assignment, plate-tail extraction, EAT formatting, KPI counting happens server-side. The HTML/JS is a render layer.
|
||||
|
||||
### 2.8 Projection is event-driven, not poll-driven
|
||||
|
||||
Parsers and projectors do not run on fixed schedules. The gateway, after inserting into `events.raw`, issues a Postgres `NOTIFY events_raw_new` in the same transaction. The worker role holds long-lived `LISTEN` connections and wakes immediately, parses the new row, writes `events.parsed`, issues `NOTIFY events_parsed_new`, and the relevant projector wakes and updates `state.*`. The chain is parse → project in tens of milliseconds, not stacked polling windows.
|
||||
|
||||
APScheduler is retained — but only for things that are genuinely time-triggered: the daily contract checker, the monthly OSM loader, the per-minute SLO measurement, the 60s polled-ingest sweep. Anything that should fire on data arrival fires on data arrival.
|
||||
|
||||
This structurally avoids the "parser every 10s + projector every 10s = up-to-20s of internal lag before the SLO timer even starts" problem. The freshness budget is spent on Jimi's transport, not on our scheduler.
|
||||
|
||||
### 2.9 External enrichment is asynchronous and replayable
|
||||
|
||||
Nominatim, Mapbox, weather, traffic — all run as separate projectors reading `events.parsed`, writing to side tables. Non-critical. Can be down for an hour without affecting the platform. Can be re-run without re-fetching upstream.
|
||||
|
||||
---
|
||||
|
||||
## 3. The system, at a glance
|
||||
|
||||
```
|
||||
Browser
|
||||
│
|
||||
┌──────────────────┴──────────────────┐
|
||||
│ live history routes │ 3 HTML pages
|
||||
│ │ importing fleet-core.js
|
||||
└──────────────────┬──────────────────┘
|
||||
│ HTTPS · JWT (mandatory)
|
||||
api.rahamafresh.com
|
||||
│
|
||||
┌──────────────┴──────────────┐
|
||||
│ platform-gateway │ ← one image,
|
||||
│ (FastAPI) │ three roles
|
||||
│ /push/* │
|
||||
│ /api/views/* │
|
||||
│ /api/routes │
|
||||
│ /api/auth/token │
|
||||
│ │
|
||||
│ gateway contract: │
|
||||
│ HMAC-verify + INSERT │
|
||||
│ events.raw + NOTIFY │
|
||||
│ + 200 OK. Nothing else. │
|
||||
└──────────────┬──────────────┘
|
||||
│
|
||||
┌──────────────┴──────────────┐
|
||||
│ platform-worker │ ← same image,
|
||||
│ (FastAPI workers) │ worker role
|
||||
│ LISTEN events_raw_new │
|
||||
│ → parser → events.parsed │
|
||||
│ → projectors → state.* │
|
||||
│ geocode_worker │
|
||||
│ matview_refresh │
|
||||
│ map_match (geo) │
|
||||
└──────────────┬──────────────┘
|
||||
│
|
||||
┌──────────────┴──────────────┐
|
||||
│ platform-cron │ ← same image,
|
||||
│ (APScheduler) │ cron role
|
||||
│ polls: live/trips/ │
|
||||
│ parking/track/devices/ │
|
||||
│ stale (60s / 10m) │
|
||||
│ contract_check (daily) │
|
||||
│ osm_loader (monthly) │
|
||||
│ slo_measurement (1m) │
|
||||
│ hr_sync (3h) │
|
||||
└──────────────┬──────────────┘
|
||||
│
|
||||
pgbouncer (txn mode)
|
||||
│
|
||||
┌──────────────┴──────────────┐
|
||||
│ TimescaleDB-HA │
|
||||
│ + PostGIS + pgRouting │
|
||||
│ │
|
||||
│ events / state / domain │
|
||||
│ geo / serve / slo / auth │
|
||||
└──────────────┬──────────────┘
|
||||
│
|
||||
┌─────────┴─────────┐
|
||||
│ db_backup → rustfs│
|
||||
│ grafana │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
One codebase. One image. Three container roles. One database. One repo. One branch. One image tag is what production is running.
|
||||
|
||||
The three roles do not share a Python process. They share configuration, database connection string, and Pydantic models. The gateway can keep returning 200 OK to Jimi while the worker is busy parsing a backlog, and the cron role can run a contract check that consumes CPU without affecting either. A failure in one role does not take the others down.
|
||||
|
||||
---
|
||||
|
||||
## 4. Tech stack
|
||||
|
||||
| Layer | Choice | Why this |
|
||||
|---|---|---|
|
||||
| **Language** | Python 3.12 | Team already knows it. No second language to operate. |
|
||||
| **Web framework** | FastAPI | Typed (Pydantic), async-native, OpenAPI for free, fast enough. |
|
||||
| **In-process scheduler** | APScheduler | Sturdier successor to the `schedule` library; cron syntax + persistence. |
|
||||
| **Database** | PostgreSQL 16 + TimescaleDB 2.15 | Hypertables and continuous aggregates are real value. |
|
||||
| **GIS** | PostGIS 3 | Required. Non-negotiable. |
|
||||
| **Routing** | pgRouting | In-database A\*. One connection string. Upgrade path to OSRM exists. |
|
||||
| **Connection pool** | pgbouncer (transaction mode) | Works. |
|
||||
| **Migrations** | dbmate | Single static binary, forward-only SQL, diff-able `schema.sql`. |
|
||||
| **Auth** | JWT (FastAPI native) + bcrypt | Short-lived tokens for dashboards. HMAC for webhooks. |
|
||||
| **Rate limiting** | slowapi | FastAPI-native. |
|
||||
| **Frontend** | Vanilla JS, ES modules, no bundler | Three HTML pages, one shared `fleet-core.js`. |
|
||||
| **Map library** | MapLibre GL JS 4.x | Already in current dashboards. Free for OSM/Carto basemaps. |
|
||||
| **Basemap** | Carto Voyager (default), Mapbox dark-v11 (scaffolded) | No mandatory token. |
|
||||
| **Reverse geocoding** | Nominatim primary, Mapbox fallback | Asynchronous, queued, never blocks ingest. |
|
||||
| **Edge / proxy** | Coolify-managed Traefik → nginx:alpine → rustfs / fleet-platform | Continues current pattern. We know it works. |
|
||||
| **Object store** | rustfs | Already deployed. Hosts static dashboards + DB backups. |
|
||||
| **Observability** | Grafana + Postgres views + container logs (structlog JSON) | One Grafana instance reads `slo.*` directly. No separate metrics stack. |
|
||||
| **CI/CD** | GitHub Actions → image registry → Coolify image-tag deploy | Build on push to `main`, tag with git SHA. |
|
||||
| **OSM data** | Geofabrik Kenya + Uganda extracts, monthly refresh | Reproducible, atomic swap via `ALTER SCHEMA RENAME`. |
|
||||
| **Secrets** | `.env` (dev) + Coolify env vars (prod) | Nothing in git. `.env.example` documents required keys. |
|
||||
|
||||
Explicitly **not** in the stack: message queue (Redis/RabbitMQ/SQS), separate event store (Kafka/Debezium), n8n for service-layer work, frontend framework (React/Vue/Svelte), ORM (psycopg + SQL).
|
||||
|
||||
---
|
||||
|
||||
## 5. Schema — layered by purpose, not by feature
|
||||
|
||||
```
|
||||
events — immutable log; the source of truth
|
||||
events.raw — hypertable, append-only, partition by received_at
|
||||
events.parsed — hypertable, derived from events.raw by parser
|
||||
|
||||
state — derived projections (rebuildable from events)
|
||||
state.devices — current device metadata
|
||||
state.live_positions — one row per device, latest fix
|
||||
state.position_history — hypertable, full timeline
|
||||
state.trips — closed trips
|
||||
state.parking_events
|
||||
state.alarms — hypertable
|
||||
state.obd_readings — hypertable
|
||||
state.heartbeats — hypertable
|
||||
state.device_events — hypertable (login/logout)
|
||||
state.fuel_readings — hypertable
|
||||
state.temperature_readings — hypertable
|
||||
state.lbs_readings — hypertable
|
||||
state.geocoded_positions — async enrichment side table
|
||||
|
||||
domain — business entities + lifecycle
|
||||
domain.accounts — Jimi sub-accounts (FK target)
|
||||
domain.vehicles — vehicle identity (plate, model, depot)
|
||||
domain.devices — device <-> vehicle mapping with lifecycle
|
||||
domain.drivers
|
||||
domain.cost_centres
|
||||
domain.assigned_cities
|
||||
|
||||
geo — PostGIS + pgRouting + OSM
|
||||
geo.ways — pgRouting topology
|
||||
geo.ways_vertices_pgr
|
||||
geo.segment_observations — hypertable, map-matched fixes
|
||||
geo.cagg_segment_speed_band — CAGG (segment_id, dow, hod) → avg speed
|
||||
geo.pois — Fireside HQ + future depots
|
||||
|
||||
slo — explicit service level objectives
|
||||
slo.targets — one row per SLO with current threshold
|
||||
slo.measurements — hypertable, computed every minute
|
||||
slo.v_current_status — view: is each SLO currently met?
|
||||
|
||||
serve — dashboard + API contracts (SQL functions)
|
||||
serve.normalize_plate(text)
|
||||
serve.fn_live_view(filters jsonb)
|
||||
serve.fn_history_view(filters jsonb)
|
||||
serve.fn_route(origin, dest, depart_at)
|
||||
serve.fn_dispatch_view()
|
||||
serve.v_fleet_overview
|
||||
|
||||
ops — dispatch / tickets / CRM-ish
|
||||
ops.tickets
|
||||
ops.dispatch_log
|
||||
ops.cost_rates
|
||||
ops.service_log
|
||||
ops.odometer_readings
|
||||
|
||||
auth — service auth
|
||||
auth.accounts
|
||||
auth.tokens
|
||||
```
|
||||
|
||||
**Read this schema top-down:** events are immutable truth, state is derived, domain owns business identity, geo owns geometry, slo owns commitments, serve owns the dashboard contract, ops owns workflow, auth owns access. Each schema has one reason to exist.
|
||||
|
||||
**Where the round-6 dedup logic lives:** in `serve.fn_live_view` and `serve.v_live_dedup`. One function, one place. Changing the rule changes one place.
|
||||
|
||||
**Where SLO thresholds live:** `slo.targets`. Not scattered constants.
|
||||
|
||||
---
|
||||
|
||||
## 6. The ingest pipeline
|
||||
|
||||
```
|
||||
Jimicloud push ─────┐
|
||||
Jimicloud poll ─────┼─→ events.raw ─NOTIFY→ parser ─→ events.parsed ─NOTIFY→ projectors ─→ state.*
|
||||
CSV import ─────┤ ↘
|
||||
Ops action ─────┘ geocode_worker ─→ state.geocoded_positions
|
||||
```
|
||||
|
||||
**The gateway contract is minimal and inviolable.** The `platform-gateway` role does, per request, exactly: (1) HMAC signature verification, (2) one `INSERT` into `events.raw`, (3) one `NOTIFY events_raw_new`, (4) return `200 OK`. No Pydantic parsing, no PostGIS calculation, no geocoding, no projector work. All of that happens in the worker role, downstream. p95 < 100 ms is achievable with ~10x headroom under this contract; the Jimi-side timeout is never the constraint.
|
||||
|
||||
This is what makes Postgres-as-queue safe at this scale: the gateway's per-request work is constant-time and CPU-trivial. The event-loop-starvation risk that a co-tenanted parser/projector would create is eliminated by container-role separation (§2.3), not by introducing a separate buffer.
|
||||
|
||||
**`events.raw` is the contract.** Every gateway endpoint writes here first, verbatim. No parsing at the gateway. If parsing is wrong, we re-parse later. The raw row is the immutable record that "we received this" — no in-memory buffer sits between Jimi and durable storage.
|
||||
|
||||
**The parser is versioned.** `events.raw.parser_version` records which parser handled each row. If `alertTypeId` becomes `alertId`, we bump the parser, re-parse affected rows. `events.raw` is untouched.
|
||||
|
||||
**Projectors are single-writer per state table.** One projector owns `state.live_positions`. It reads `events.parsed` of kind `position_fix`, applies dedup, writes the upsert. No other code writes there. Race conditions between the 60 s sweep, alarm cross-feed, and stale rescue cease to exist because they all produce events the projector orders monotonically.
|
||||
|
||||
**Parser and projectors wake on NOTIFY, not on a timer.** Workers hold long-lived `LISTEN events_raw_new` / `LISTEN events_parsed_new` connections. New raw row → parser wakes within milliseconds → writes parsed row → projector wakes within milliseconds → updates state. There is no stacked polling delay between stages. A timer-based fallback (every 5 s) catches the rare case of a missed `NOTIFY` (e.g. connection blip); under normal operation the timer never fires because the listener already drained the row. For workers competing for the same queue, draining uses `SELECT … FOR UPDATE SKIP LOCKED` so multiple worker instances can scale out without double-processing.
|
||||
|
||||
**The contract checker runs daily.** Calls each Jimi endpoint against a sandbox account, validates response against the current Pydantic model, alerts on drift.
|
||||
|
||||
**Polling workers go through `events.raw`.** The cron role's `poll_live_positions` calls `jimi.user.device.location.list`, persists each device's payload to `events.raw`, ACKs. The parser and projector handle the rest. Polls and pushes are indistinguishable downstream of `events.raw`.
|
||||
|
||||
**Backfill / replay is trivial.** "Re-run the projector for trips between 2026-01-01 and 2026-03-01" is a SQL statement.
|
||||
|
||||
**On not using Redis as an ingest buffer.** A common reflex at this point is to put Redis between the gateway and Postgres to absorb spikes. We are not doing that, for the following reasons:
|
||||
|
||||
- The gateway's per-request work under the contract above is single-digit milliseconds against a healthy Postgres. The 100 ms Jimi timeout has ~10x headroom. There is no current latency problem to solve.
|
||||
- Redis would create a window in which an event has been acknowledged to Jimi but is not yet in `events.raw`. That weakens the "every signal is captured immutably before it is interpreted" invariant from §2.1: the durable record would become "Redis OR Postgres," not "Postgres". On a Redis crash without `appendfsync always`, in-flight events are lost with no trace.
|
||||
- Replay semantics become more complicated: `events.raw` ceases to be a complete record of inbound traffic until after Redis has drained. Reasoning about "did we receive that?" requires checking two systems.
|
||||
- Operational surface grows: one more container to monitor, back up, tune memory on, and reason about during incidents.
|
||||
|
||||
If the gateway's own workload — not its co-tenants' — ever exceeds what a synchronous Postgres `INSERT` can handle (a roughly 10x-from-today problem, per §10.3 P1 in the PRD), we revisit. Tracked as open question Q7 in §15.
|
||||
|
||||
---
|
||||
|
||||
## 7. The serving layer
|
||||
|
||||
Three dashboard endpoints, one shape:
|
||||
|
||||
```
|
||||
GET /api/views/live?filters=... → {summary, geojson, slo_status}
|
||||
GET /api/views/history?filters=... → {summary, geojson, slo_status}
|
||||
POST /api/routes → {route_geojson, eta_sec, distance_m, observed_basis}
|
||||
```
|
||||
|
||||
Each endpoint maps 1:1 to a SQL function in `serve.fn_*`. The Pydantic response model is the contract.
|
||||
|
||||
**Filters are a single `jsonb` parameter** (cost_centre, assigned_city, vehicle_numbers[], date_range). Adding a filter is one SQL change, no API signature break.
|
||||
|
||||
**Auth:** JWT mandatory on every endpoint, read and write — including dashboard reads, which are no longer public. The legacy public-read posture is not preserved (see §15 Q1, closed). `/api/auth/token` issues short-lived JWTs from `auth.accounts` credentials. Dashboards request a token on load, cache locally, refresh before expiry. Scopes: `read:fleet` (all dashboard reads), `write:ops` (driver assignments, alarm acks, service log entries, admin actions on operational records), `admin:fleet` (lifecycle transitions, device provisioning, audit access). Push endpoints use HMAC shared secret per source (Jimi push, WhatsApp fuel microservice, HR extract).
|
||||
|
||||
**Rationale for closing Q1 in favor of authenticated:** by Phase 4 the platform carries driver shift start-locations (home-area information), driver names and phone numbers from the HR extract, customer-site visit patterns through trip endpoints, and plate-to-cost-centre mappings that reveal commercial relationships. This is no longer the same security posture as "anonymous vehicle dots on a public map" — even ignoring the original case for not exposing live fleet positions to competitors, the dataset has grown into one that cannot be public-read responsibly.
|
||||
|
||||
**Rate limits:** dashboards 60 req/min/IP, routes 10 req/min/IP, push 1000 req/min.
|
||||
|
||||
---
|
||||
|
||||
## 8. Frontend — three pages, one renderer
|
||||
|
||||
```
|
||||
fleet-core.js ES module, ~600 lines, MapLibre as only dependency
|
||||
initMap(elementId, opts)
|
||||
renderView(payload) ← the universal renderer
|
||||
initFilters(state, onChange)
|
||||
poiLayer(map, pois)
|
||||
costCentrePalette(name) → colour
|
||||
normalisePlate(s)
|
||||
apiFetch(path, params)
|
||||
clockEAT(elementId)
|
||||
authClient (token cache + refresh)
|
||||
|
||||
index-live.html ~100 lines, polls /api/views/live every 15 s
|
||||
index-history.html ~100 lines, polls /api/views/history on form submit
|
||||
index-routes.html ~100 lines, click-and-route, calls /api/routes
|
||||
```
|
||||
|
||||
The renderer is dumb: take a `{summary, geojson, slo_status}` payload, populate KPIs, replace the GeoJSON source, render SLO badges. It doesn't know what "OFFLINE" means; the server attaches the right `style_class` to each feature. The renderer paints.
|
||||
|
||||
This structurally fixes the "1,400-line dashboard with embedded business logic" problem.
|
||||
|
||||
---
|
||||
|
||||
## 9. Routing layer (geo)
|
||||
|
||||
OSM Kenya + Uganda extracts loaded by `osm_loader` (monthly cron) into a staging schema. `osm2pgsql` produces `geo.ways` + `geo.ways_vertices_pgr`. Staging → live is `ALTER SCHEMA RENAME`.
|
||||
|
||||
**Edge weights are hybrid:**
|
||||
1. `map_match` projector reads `events.parsed` of kind `position_fix`, finds nearest `geo.ways` segment within 30 m, writes `(segment_id, observed_at, speed_kmh)` to `geo.segment_observations`.
|
||||
2. A CAGG rolls it into `geo.cagg_segment_speed_band` keyed by `(segment_id, dow, hod)`.
|
||||
3. `serve.fn_route` calls `pgr_aStar` with a cost function that reads the CAGG for the departure hour-of-day, falls back to OSM `maxspeed`, finally to a global average.
|
||||
|
||||
**Response includes `observed_basis`** — which segments came from observed data, which from tags, which from fallback. Dispatch can see "this route is 80% observed-data based". Trust calibration belongs in the response.
|
||||
|
||||
**Active re-routing is out of scope for v1.** Endpoint is "suggest a route at this departure time".
|
||||
|
||||
---
|
||||
|
||||
## 10. SLOs and observability
|
||||
|
||||
```sql
|
||||
slo.targets:
|
||||
metric | threshold | window
|
||||
─────────────────────────────┼───────────┼────────
|
||||
fix_freshness_pct_60s | 95 | 5 min
|
||||
trip_lag_p95_sec | 600 | 1 h
|
||||
route_p95_ms | 500 | 5 min
|
||||
parser_lag_p95_sec | 30 | 5 min
|
||||
contract_drift_days | 1 | 1 d
|
||||
```
|
||||
|
||||
`slo.measurements` is a hypertable populated every minute by a worker. `slo.v_current_status` exposes the live state. Grafana dashboards and alerts read directly from `slo.*`.
|
||||
|
||||
Dashboards display SLO-aware status: "Fleet below freshness SLO: 3 vehicles" instead of "3 vehicles OFFLINE 24h+".
|
||||
|
||||
Logs ship to container stdout (Coolify aggregates) as structured JSON (`structlog`).
|
||||
|
||||
---
|
||||
|
||||
## 11. Expected benefits over current architecture and functionality
|
||||
|
||||
This is the section that justifies the rebuild. Each row is a concrete, measurable improvement.
|
||||
|
||||
### 11.1 Reliability and correctness
|
||||
|
||||
| Pain in current system | What changes | Why |
|
||||
|---|---|---|
|
||||
| ~10 `[FIX-MNN]` hot-patches per year for write-path races, contract drift, dedup logic | Each category is structurally impossible | Single-writer projectors + versioned parser + one-place dedup rule |
|
||||
| Silent data loss when Jimi renames a field (weeks to detect) | Drift caught within 24 h | Daily contract checker against sandbox API |
|
||||
| `STALE_GPS_MS=10min` / `OFFLINE=24h` / `freshness magic` scattered across 12+ places | One row per threshold in `slo.targets` | SLO-first design |
|
||||
| "OFFLINE" mixes broken device, parked vehicle, expired subscription, decommissioned | Lifecycle and operational state are separate | `domain.devices.lifecycle` + computed operational state |
|
||||
| Production runs from a non-`main` branch; cherry-picks needed | One branch. Image-tag deploys | CI builds tagged image. Coolify pulls by tag |
|
||||
| Nominatim slowdown stalls trip ingest | Geocoding is its own worker, never blocks | Async projector pattern |
|
||||
| No way to replay a dropped event | Every event is in `events.raw`; re-parse anytime | Event sourcing |
|
||||
| Heavy historical query / OOM in one component can crash everything (single-process fate sharing) | A heavy query in the worker role does not affect the gateway; a runaway cron does not affect either | Container-role separation (same image, gateway / worker / cron) |
|
||||
| Internal stage-to-stage lag (parser poll + projector poll = up to 20s before SLO timer starts) | Parser and projector wake on `NOTIFY` within milliseconds of the upstream write | Event-driven projection chain |
|
||||
| Live dashboard publicly readable (legacy posture) | All endpoints require JWT; scope-gated reads | Mandatory auth from day one |
|
||||
|
||||
### 11.2 Operations
|
||||
|
||||
| Pain in current system | What changes | Why |
|
||||
|---|---|---|
|
||||
| Three Docker images, three rebuild cycles per shared-helper change | One image, one rebuild — same image runs in three container roles | Single FastAPI codebase consolidates ingest + API + workers; runtime roles separated by entrypoint |
|
||||
| ~7 containers (webhook_receiver, ingest_movement, ingest_events, timescale, grafana, pgbouncer, db_backup) | 8 containers: `db`, `pgbouncer`, `platform-gateway`, `platform-worker`, `platform-cron`, `dashboard-proxy`, `grafana`, `db_backup` | Three runtime roles from one image replace three independent Python services; fate isolation gained, build complexity unchanged |
|
||||
| Coolify per-service redeploy required for shared-helper changes | One redeploy ships everything | One image |
|
||||
| "What's deployed?" requires `git log` + Coolify UI + container exec | `docker inspect` returns image digest = git SHA | Image-tag deploys |
|
||||
| Rollback is `git revert + rebuild + redeploy` (~5 minutes) | Rollback is `coolify deploy :<prev-tag>` (~30 seconds) | Pre-built images in registry |
|
||||
| Schema migrations are bespoke Python with no formal "down" path | dbmate handles `up`/`down`, generates `schema.sql` snapshot for PR review | Standard tool |
|
||||
| n8n workflow JSON holds dashboard contracts; not code-reviewable | Pydantic models in code; OpenAPI generated; PRs review contracts | Contracts in code |
|
||||
| Grafana queries assemble metrics ad-hoc | `slo.*` schema is the metrics layer; Grafana is a thin renderer | Pre-aggregated SLO measurements |
|
||||
|
||||
### 11.3 Development velocity
|
||||
|
||||
| Pain in current system | What changes | Why |
|
||||
|---|---|---|
|
||||
| Adding a third dashboard means re-implementing palette + POI + EAT clock + map setup | `fleet-core.js` exports those primitives; new dashboard is ~100 lines of HTML | Shared renderer |
|
||||
| Dedup logic change (round 6) required modifying SQL CTE + JS `vehicleState()` | Dedup change is one SQL function | Server-side state computation |
|
||||
| Trip enrichment (FIX-M20) required modifying `poll_trips` + adding migration + adjusting webhook handler | Enrichment is a new projector reading existing events | Decoupled enrichment |
|
||||
| Source code is image-baked in dev; typo fix requires rebuild | Bind-mounted in dev; baked in prod (via `APP_MODE` build arg) | Dev/prod parity without dev pain |
|
||||
| Test suite must use mock DB because shared module assumes pool exists at import time | Tests use real Postgres (docker-compose), shared module is lazy-init | Cleaner module boundaries |
|
||||
| Adding multi-account support required retrofitting `TARGETS` env var across all polling code | Multi-account is a NOT NULL FK from commit one | Designed-in, not bolted-on |
|
||||
|
||||
### 11.4 New capabilities not present today
|
||||
|
||||
| Capability | What it enables |
|
||||
|---|---|
|
||||
| **Event replay** | "Re-build the last 90 days of trips with corrected enrichment logic" is a SQL statement |
|
||||
| **Time-travel debug** | "What payload did Jimi send for IMEI X at 03:47 on Tuesday?" is `SELECT FROM events.raw WHERE imei = ...` |
|
||||
| **Routing (A\* with time-banded weights)** | Dispatch can suggest a route at a given departure time, with `observed_basis` showing trust level per segment |
|
||||
| **SLO-driven alerting** | Grafana alert when fix-freshness falls below 95% during business hours; dashboards render SLO state, not arbitrary thresholds |
|
||||
| **Lifecycle state machine** | Decommissioned devices don't appear in operational dashboards. Suspended devices show a distinct visual state. No more "OFFLINE 24h+" sweep including retired vehicles |
|
||||
| **Versioned parsers** | When Jimi changes a field name, bump parser version, backfill `events.parsed`, no data lost |
|
||||
| **Daily contract check** | Upstream API drift caught next day, not next quarter |
|
||||
| **`observed_basis` in route responses** | Operator trust calibration: "this ETA is based on 80% observed data" vs "mostly OSM tags" |
|
||||
| **One-place dedup rule** | Future fleet expansion (e.g. adding a third device class) is a one-line change to `serve.fn_live_view` |
|
||||
|
||||
### 11.5 Quantified expectations (best-effort estimates)
|
||||
|
||||
These are forecasts based on the architectural changes, not measured. Re-baseline after Phase G.
|
||||
|
||||
- **Mean time to detect API contract drift:** ~90 days today → **<24 hours** (contract checker).
|
||||
- **Mean time to detect data freshness regression:** unbounded today → **5 minutes** (SLO alerting).
|
||||
- **Rollback time:** ~5 minutes today (rebuild + redeploy) → **~30 seconds** (image-tag swap).
|
||||
- **Add a new dashboard:** ~2 weeks today (re-implement scaffolding) → **~2 days** (~100 lines of HTML against existing renderer).
|
||||
- **Add a new ingest source (e.g. a 4th sub-account or a new push type):** ~3-5 days today (touch 3 Python files + migration + n8n) → **~half a day** (new gateway endpoint + new parser + new projector — each a single file).
|
||||
- **Reproduce a production data issue locally:** Hours-days today (re-fetch from Jimi, hope it returns same data) → **Minutes** (`pg_dump events.raw`, restore, replay).
|
||||
- **Cold-start a new dev:** ~1 day today (figure out which container does what) → **<10 minutes** (`git clone && docker compose up`).
|
||||
|
||||
### 11.6 What does NOT improve
|
||||
|
||||
Honest pushback — not everything gets better.
|
||||
|
||||
- **Raw write throughput:** identical. Both systems are nowhere near Postgres limits at ~80 vehicles.
|
||||
- **Map rendering performance:** identical. MapLibre is the same library.
|
||||
- **Geocoding latency:** identical. Nominatim is still rate-limited.
|
||||
- **Jimi API rate limits:** unchanged. Their problem, not ours.
|
||||
- **Operator UI learning curve:** dashboards look familiar but the SLO terminology is new; expect a brief training window.
|
||||
|
||||
---
|
||||
|
||||
## 12. Phased rollout
|
||||
|
||||
The architectural shift is large; the rollout is incremental. Each phase produces a verifiable artefact.
|
||||
|
||||
| Phase | Weeks | Deliverable | DoD |
|
||||
|---|---|---|---|
|
||||
| **A. Foundation** | 1-2 | Repo, docker-compose, all schemas, dbmate, FastAPI `/health`. CI builds image, Coolify deploys `:latest` | `curl /health` returns DB connectivity from a tagged image |
|
||||
| **B. Event log + parser** | 3 | `/push/*` endpoints write `events.raw`. Parser worker drains to `events.parsed`. Pydantic models versioned. Contract checker scheduled | Replayed historical Jimi push lands in both `raw` and `parsed` |
|
||||
| **C. Projectors** | 4-5 | Each `state.*` table has a projector. Multi-account from commit one. Polling workers write `events.raw` only | 24-h soak; `slo.v_current_status` all green; no duplicate fixes |
|
||||
| **D. Serve layer** | 6 | `serve.fn_*` functions. Geocode worker. Matview refresh inside scheduler | Every dashboard endpoint returns valid Pydantic JSON |
|
||||
| **E. Dashboards** | 7-8 | `fleet-core.js` + three HTML pages | Feature parity for a chosen 30-day test window |
|
||||
| **F. Routing** | 9-11 | OSM loader, `geo.ways`, map-match projector, `cagg_segment_speed_band`, `fn_route`, `index-routes.html` | <500 ms p95 routing endpoint |
|
||||
| **G. Cutover** | 12 | Push mirror forwards events to both old and new for 7 days. DNS cut. 48 h hot-standby. Old stack decommissioned | 7 days post-cutover with no rollback |
|
||||
|
||||
**Realistic: 12 weeks for two devs** to ship feature parity + routing v1 + the architectural invariants.
|
||||
|
||||
---
|
||||
|
||||
## 13. Deployment
|
||||
|
||||
**Containers:** `db` (TimescaleDB-HA + PostGIS + pgRouting), `pgbouncer`, `platform-gateway` (FastAPI, gateway role), `platform-worker` (FastAPI, worker role — parser + projectors + geocoder + matview refresh + map-match), `platform-cron` (FastAPI, cron role — polls + contract check + OSM loader + SLO measurement + HR sync), `dashboard-proxy` (nginx → rustfs), `grafana`, `db_backup`. Eight containers. The three `platform-*` containers run the same image with different entrypoint commands.
|
||||
|
||||
**Image strategy:** CI builds on push to `main`, tags `<registry>/fleet-platform:<sha>` and `:latest`. Coolify deploys by tag. `docker inspect` answers "what's running". Rollback is `coolify deploy :<prev-tag>` and is the same operation for all three roles.
|
||||
|
||||
**Migrations:** `dbmate up` runs on the `platform-worker` container start (only) before FastAPI boots. Forward-only. `schema.sql` is `dbmate dump`, committed, PR-reviewed. The other two roles wait on a startup probe that confirms migration completion before they start serving traffic.
|
||||
|
||||
**Healthchecks:**
|
||||
- `db`: `pg_isready`
|
||||
- `pgbouncer`: `pg_isready -p 6432`
|
||||
- `platform-gateway`: `GET /health/gateway` (DB conn + last successful HMAC verify)
|
||||
- `platform-worker`: `GET /health/worker` (DB conn + last parser run age + LISTEN connection alive)
|
||||
- `platform-cron`: `GET /health/cron` (DB conn + last scheduled-job tick age)
|
||||
- `dashboard-proxy`: nginx `/healthz`
|
||||
- `grafana`: `/api/health`
|
||||
- `db_backup`: touchfile updated by cron
|
||||
|
||||
**Secrets:** `.env` in dev, Coolify env vars in prod. `.env.example` lists every key.
|
||||
|
||||
**Domains:** `api.rahamafresh.com` (fleet-platform), `live.rahamafresh.com`, `fleetintelligence.rahamafresh.com`, `routes.rahamafresh.com`, `grafana.rahamafresh.com`.
|
||||
|
||||
**Backups:** rustfs sidecar (existing pattern). Add weekly `--schema=events` slice + monthly `--schema=geo` slice for fast partial restore.
|
||||
|
||||
**Local dev:** `git clone && cp .env.example .env && docker compose up`. Source bind-mounted in dev mode (driven by `APP_MODE=dev`); `uvicorn --reload` picks up edits. Build is for prod.
|
||||
|
||||
---
|
||||
|
||||
## 14. What we explicitly drop
|
||||
|
||||
- **n8n for dashboard contracts.** Service-layer logic is in code.
|
||||
- **Three independent Python processes with a shared module imported into each.** Replaced by one codebase running in three container roles from the same image. The fate-sharing failure mode goes away; the rebuild-three-images-for-one-change failure mode goes away. Operationally it is three containers, but architecturally it is one service.
|
||||
- **`reporting` and `tracksolid` schemas as a mental model.** Replaced by `events / state / domain / geo / serve / slo / ops / auth`.
|
||||
- **`enabled_flag=1` magic.** Replaced by a `state.active_devices` view.
|
||||
- **Magic-number thresholds scattered through SQL/JS.** Replaced by rows in `slo.targets`.
|
||||
- **Synchronous Nominatim in the write path.** Replaced by an async projector.
|
||||
- **Manual branch-to-prod mapping.** Replaced by image tags.
|
||||
- **Per-feature bolt-on tables.** New domain entities go in `domain` or a new schema with one reason to exist.
|
||||
- **Public-read dashboards.** Replaced by JWT-required reads from day one.
|
||||
- **Poll-driven internal stages.** Replaced by `LISTEN/NOTIFY`-driven parse → project; APScheduler retained only for genuinely time-triggered jobs.
|
||||
|
||||
---
|
||||
|
||||
## 15. Open architectural questions
|
||||
|
||||
Decisions the team needs to make before / during execution:
|
||||
|
||||
1. **Auth posture.** ~~Dashboards public-read (current) or login-gated?~~ **Closed: login-gated.** All endpoints require JWT from day one. Public-read is not preserved; the dataset has grown into one that cannot be public-read responsibly (see §7 rationale). Three scopes: `read:fleet`, `write:ops`, `admin:fleet`.
|
||||
2. **Routing v1 scope.** Suggest-route only, or proactive deviation alerts? v1 = suggest. v2 = active. *(Note: routing is scope-deferred from the PRD into a companion project; this question is preserved for that project's reference.)*
|
||||
3. **SLO targets.** Actual numbers? Freshness < 60 s? 90 s? 120 s? Pick before Phase G.
|
||||
4. **n8n retention.** Drop entirely, or keep for cross-system orchestration (Slack alerts, CRM bridges)? Default = drop unless a concrete workflow needs it.
|
||||
5. **Image registry.** `ghcr.io` (free, GitHub) or self-hosted (`registry.rahamafresh.com`)? Affects CI complexity.
|
||||
6. **Analytics layer.** Out of scope here. If longitudinal reporting becomes a need, design as a separate concern reading from `state.position_history` / `state.trips` — not folded into the operational stack.
|
||||
7. **Redis ingest buffer — re-evaluate trigger.** Not adopted in v1 (see §6 rationale: weakens immutability invariant, adds failure surface, no current latency problem). Re-evaluate when *any* of the following becomes true: (a) gateway p95 latency exceeds 50 ms sustained for a week against the contracted "HMAC + INSERT + 200 OK" workload; (b) Postgres `INSERT` rate against `events.raw` approaches the chunk-write throughput ceiling on the current VPS class; (c) push receiver concurrency exceeds 200 in-flight requests during a normal hour. Until then, container-role separation provides the fate isolation; `LISTEN/NOTIFY` provides the wake-on-arrival pattern.
|
||||
|
||||
---
|
||||
|
||||
## 16. Verification — done when
|
||||
|
||||
1. Every dashboard URL has a working equivalent with feature parity for a chosen 30-day test window.
|
||||
2. `slo.v_current_status` shows all SLOs green for 7 consecutive days post-cutover.
|
||||
3. `events.raw` can be replayed to rebuild `state.*` from scratch within an hour. (Demonstrate by truncating `state.live_positions` and re-projecting.)
|
||||
4. The contract checker has caught at least one synthetic API change in staging, then run green for 7 days in production.
|
||||
5. `git log origin/main` is the source of truth for what Coolify runs.
|
||||
6. The old three-repo stack is archived; the `webhook_receiver` in the old stack receives no traffic.
|
||||
7. Routing endpoint returns a valid LineString in <500 ms p95.
|
||||
8. A new dev clones the repo, `docker compose up`, working local stack in <10 minutes.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -1,730 +0,0 @@
|
|||
# Product Requirements Document — Fleet Telematics Platform (Rebuild)
|
||||
|
||||
|
||||
| **Document** | PRD v1.1 |
|
||||
| **Date** | 2026-05-22 (rev. b — incorporates engineering review of 2026-05-22) |
|
||||
| **Status** | Draft for approval |
|
||||
| **Author posture** | Senior product manager ||
|
||||
|
||||
---
|
||||
|
||||
## 0. How to read this document
|
||||
|
||||
This PRD describes **what** the fleet platform must do for the business and **why**, in product terms. It pairs with the architecture document which describes **how** engineering will build it. PRD phases are organised by **user-visible value delivered**, not by engineering layers. Engineering's 12-week rollout (Phases A–G in the architecture doc) is the build sequence that *produces* the product phases described here; the mapping is given in §13.
|
||||
|
||||
A reader should be able to:
|
||||
- Approve or reject the scope of any phase without reading the architecture doc.
|
||||
- Trace any requirement to a user persona and a measurable success criterion.
|
||||
- Identify what is **deliberately out of scope** so we don't relitigate it later.
|
||||
|
||||
---
|
||||
|
||||
## 1. Executive summary
|
||||
|
||||
We operate a fleet of ~80 vehicles across Kenya and Uganda, instrumented with Jimicloud/Tracksolid trackers (GT06E, X3, AT4 families) and JC400P dashcams — roughly 180 devices in total. The current platform, built incrementally across three Git repositories over roughly twelve months, has reached the limits of incremental patching: data races, contract-drift surprises, silent data loss, branch divergence between production and `main`, and dashboards whose business logic is encoded in 1,400-line HTML files.
|
||||
|
||||
The product is sound. The architecture is the problem.
|
||||
|
||||
This PRD proposes a **greenfield rebuild** of the platform on a unified architecture (one repo, one service, one database, one branch, image-tag deploys, event-sourced ingest, explicit SLOs, lifecycle-aware device state). Built in four product phases over approximately nine to ten weeks, the new platform delivers feature parity with today's system by Phase 3 and unlocks driver KPIs, behaviour scoring, service-cycle tracking, and finance/executive intelligence in Phase 4.
|
||||
|
||||
The business case rests on three pillars:
|
||||
|
||||
1. **Operational reliability** — SLO-driven monitoring replaces the current pattern of "find out from a dispatcher that the dashboard is wrong". Mean time to detect a contract drift drops from ~90 days to <24 hours.
|
||||
2. **Development velocity** — adding a new dashboard, new ingest source, or new business rule becomes a one-day change in one place, not a three-process, three-deploy rebuild.
|
||||
3. **Driver and service intelligence** — first-class shift reporting (ACC sign-on / sign-off with geocoded location), driver-behaviour scoring, and km-based service-cycle tracking, all of which the current platform structurally cannot provide.
|
||||
|
||||
The rebuild does **not** require buying new hardware, changing telematics vendors, retraining operators, or pausing current operations. The current stack runs in parallel during build and for 48 hours after cutover as hot-standby.
|
||||
|
||||
**Scope boundary — routing and ticket-driven dispatch.** Suggest-route capabilities and ticket-allocation (vehicle-to-ServiceNow-ticket) are **explicitly out of scope of this PRD** and are tracked as a separate companion project. The reasoning: those capabilities are driven by external inputs (client tickets raised in ServiceNow) and constitute a distinct product domain — ticket lifecycle, allocation rules, and route optimisation — that deserves its own PRD, its own stakeholders, and its own technology choices. This platform provides the telematics foundation that any future routing project will consume; it does not deliver routing itself.
|
||||
|
||||
---
|
||||
|
||||
## 2. Background and problem statement
|
||||
|
||||
### 2.1 What we have today
|
||||
|
||||
- **Source-of-truth telematics:** Jimicloud Pro / Tracksolid Pro APIs, multi-account (TARGETS env var holds N sub-account credentials).
|
||||
- **Ingest path:** Three Python containers (`webhook_receiver`, `ingest_movement`, `ingest_events`) plus n8n workflows, writing to a TimescaleDB instance with PostGIS.
|
||||
- **Serving path:** Two static HTML dashboards (live + historical) talking to the database via n8n-proxied endpoints; a Grafana instance for ad-hoc operational queries.
|
||||
- **Deployment:** Coolify on a single VPS, Traefik termination, rustfs as the static-asset and backup target.
|
||||
- **Production branch:** `quality-program-2026-04-12`. `main` exists but has drifted. The most recent FIX-M21 had to be cherry-picked manually.
|
||||
|
||||
### 2.2 What hurts
|
||||
|
||||
The current platform has been kept healthy by a stream of point fixes labelled FIX-M01 through FIX-M21 (and FIX-E01 through FIX-E06 on the alarm side). These fixes share recurring patterns:
|
||||
|
||||
| Pattern | Examples |
|
||||
|---|---|
|
||||
| Upstream API field rename caught months late | FIX-E06 (`alarmType` → `alertTypeId`), FIX-M11 (distance unit drift) |
|
||||
| Race conditions across multiple writers to the same "current state" table | FIX-M21 (time-guarded upsert for cross-feed + sweep + rescue) |
|
||||
| Bad data slipping past at ingest time | FIX-M03 (zero-island fixes), FIX-M12 (BCD timestamps) |
|
||||
| Dedup logic rewritten in production six times | Rounds 1–6 of `reporting.v_live_positions` |
|
||||
| Synchronous external calls in the write path | Nominatim reverse-geocode inside `poll_trips` |
|
||||
| "Just one more flag" in queries | `WHERE enabled_flag=1` repeated ~40 times across SQL |
|
||||
|
||||
These are not bugs. They are **categories** of latent failure that the architecture invites. A fresh codebase on the same architecture will, on a long enough timeline, produce the same categories.
|
||||
|
||||
### 2.3 What we want instead
|
||||
|
||||
A platform where:
|
||||
- Every signal is captured immutably before it is interpreted, so contract drift becomes a re-parse, not a data loss.
|
||||
- There is one writer per state table, so write-path race conditions cannot occur.
|
||||
- SLO thresholds are data, not constants scattered across SQL and JS.
|
||||
- Lifecycle state (provisioned / active / suspended / decommissioned) is separate from operational state (moving / parked / offline).
|
||||
- Dashboards are render-only; business logic lives in the API.
|
||||
- "What's in production?" is answerable from `docker inspect`, not by guessing which branch was deployed.
|
||||
|
||||
---
|
||||
|
||||
## 3. Goals, non-goals, and guiding principles
|
||||
|
||||
### 3.1 Product goals
|
||||
|
||||
| # | Goal | Measure |
|
||||
|---|---|---|
|
||||
| G1 | Reduce mean time to detect upstream contract drift | From ~90 days today to <24 hours |
|
||||
| G2 | Eliminate write-path race conditions on current-state tables | Zero retroactive guards added after Phase 2 |
|
||||
| G3 | Achieve and monitor an explicit fix-freshness SLO | ≥95% of active devices have a fix within 90 s, measured continuously |
|
||||
| G4 | Reduce time to add a new dashboard | From ~5 days today to <1 day |
|
||||
| G5 | Reduce time to add a new ingest source | From ~3 days today to <0.5 day |
|
||||
| G6 | Make "what's in production?" trivially answerable | One command returns the running image SHA |
|
||||
| G7 | Separate lifecycle from operational state in all dashboards | Decommissioned devices do not appear in operational views |
|
||||
| G8 | Keep the driver roster current as a first-class operational tool | ≥95% of active vehicles have a non-null currently-assigned driver; reassignment-for-leave flow used in production without spreadsheet fallback |
|
||||
| G9 | Surface vehicles due for service before they become overdue | Service-due dashboard is the canonical "what's due this week" source; service spreadsheet retired |
|
||||
| G10 | Make driver KPIs (shift start/end + behaviour) data-driven, not anecdote-driven | Per-driver shift records with geocoded start/end, behaviour score published weekly, used in at least one operational decision per ops manager per month |
|
||||
|
||||
### 3.2 Non-goals
|
||||
|
||||
The following are explicitly **out of scope** for this rebuild. Each can be a future initiative; none gates Phase 4.
|
||||
|
||||
- Replacing Jimicloud / Tracksolid as the telematics provider.
|
||||
- Replacing the hardware (GT06E, X3, AT4, JC400P).
|
||||
- Multi-region deployment or HA across regions.
|
||||
- A native mobile app (the dashboards are responsive web; native is a separate product).
|
||||
- A customer-facing tracking portal (current platform is internal-ops only; that stays).
|
||||
- Migration of historical raw data beyond what is required for analytics continuity (we will migrate the last 90 days of parsed events; older data stays in the legacy DB as a read-only archive).
|
||||
- Real-time video streaming from the JC400P cameras (out of scope; current platform does not do this either).
|
||||
- Driver-side mobile app for trip-tagging, dispatch acknowledgement, or proof-of-delivery.
|
||||
- Customer-facing APIs (the platform's API is internal; partner integrations are a future phase).
|
||||
- **Routing, route suggestion, ETA prediction, multi-stop optimisation.** These are scoped as a separate companion project because they are downstream of ticket allocation, which is itself downstream of ServiceNow tickets raised by external clients. The companion project owns the ticket-lifecycle integration, the allocation policy, the routing engine choice (pgRouting, OSRM, Valhalla, or third-party API), and the dispatch UX. This platform's responsibility is to expose the telematics primitives that project will consume (live vehicle position, vehicle assignment to driver, segment-of-road observed speeds if needed) — not to ship routing itself.
|
||||
- **Ticket-driven dispatch / ServiceNow integration.** Same reasoning as above. The dispatch-decision audit trail in this platform (if retained at all — see §14 Q16) is a thin local log, not the dispatch system. The companion project will define how tickets, allocation, dispatch state, and proof of completion flow.
|
||||
|
||||
### 3.3 Guiding principles
|
||||
|
||||
These principles inform every requirement that follows.
|
||||
|
||||
1. **Event log first, derived state second.** No write path mutates "current state" directly. Every signal lands in an immutable log; projectors derive state from it.
|
||||
2. **Contracts are typed and verified.** Pydantic models for every endpoint, daily contract-check against sandbox upstreams.
|
||||
3. **One codebase, one database, one repo — fate-isolated runtime roles.** Microservices are a scaling answer for organisations we don't have. But running gateway + workers + cron in a single Python process is a fate-sharing failure mode we don't need. The platform is one codebase that runs in three container roles (gateway / worker / cron) from the same image. Architecturally one service; operationally three containers that can fail independently.
|
||||
4. **Lifecycle ≠ operational state.** Devices have a lifecycle (provisioned/active/suspended/decommissioned); vehicles have an operational state (moving/parked/offline/unknown). They are computed and stored separately.
|
||||
5. **SLOs are first-class data.** Thresholds live in `slo.targets`, not in constants.
|
||||
6. **Image-tag deploys, not branch deploys.** CI builds and tags; production runs a specific tag.
|
||||
7. **Dashboards are thin renderers.** All business logic server-side; the JS draws what the API tells it to draw.
|
||||
8. **External enrichment is async.** Nominatim, Mapbox, future enrichers never block ingest. Internal stage-to-stage continuation is event-driven (`LISTEN/NOTIFY`), not poll-driven, so we don't manufacture our own delay budget.
|
||||
9. **Secure by default.** Every endpoint requires authentication from day one. Public-read access to operational data — even "just the live map" — is not a posture the rebuilt platform inherits from the legacy system.
|
||||
|
||||
---
|
||||
|
||||
## 4. Target users and personas
|
||||
|
||||
The platform serves five primary personas. Each persona's needs drive specific phases.
|
||||
|
||||
### 4.1 Dispatcher — "Where are my vehicles right now?"
|
||||
|
||||
- **Day-in-the-life:** Monitors a wall of live vehicle positions. Coordinates drivers around incidents via phone/WhatsApp. Confirms arrival at customer sites. Handles "the customer says the truck never showed up" calls.
|
||||
- **Tools used today:** `live.rahamafresh.com` (live dashboard), Slack, phone, WhatsApp with drivers.
|
||||
- **Pain points today:** OFFLINE markers that turn out to be "device powered off two weeks ago, decommissioned", not "in trouble right now". Limited ability to coordinate from a single screen — relies on phone for the actual dispatch conversation.
|
||||
- **Phase that primarily serves them:** Phase 1 (live tracking), Phase 2 (historical playback for retrospective queries). Suggest-route tooling is delivered by the separate companion project.
|
||||
- **Key KPIs:** Fix freshness, time-to-detect device anomaly, accuracy of vehicle-at-site confirmation.
|
||||
|
||||
### 4.2 Operations manager — "Is the fleet healthy this week?"
|
||||
|
||||
- **Day-in-the-life:** Reviews weekly fleet performance. Identifies vehicles with degraded telematics. Triages device replacements. Reconciles trips against driver timesheets and customer deliveries.
|
||||
- **Tools used today:** Historical dashboard (`fleetintelligence.rahamafresh.com`), Grafana, ad-hoc CSV exports.
|
||||
- **Pain points today:** No SLO view ("am I above or below promise?"). The "OFFLINE 24h+" KPI mixes decommissioned vehicles with broken ones. Trip reports are slow to query.
|
||||
- **Phase that primarily serves them:** Phase 2 (historical + trip analytics), Phase 3 (operations tooling).
|
||||
- **Key KPIs:** Trip ingest lag, SLO attainment, decommissioned-device hygiene.
|
||||
|
||||
### 4.3 Fleet administrator — "Provision, retire, reassign, service."
|
||||
|
||||
The fleet administrator owns three running rosters: **devices**, **driver assignments**, and **service schedule**. These three rosters are the operational backbone of the fleet team's week.
|
||||
|
||||
- **Day-in-the-life:**
|
||||
- *Devices.* Activates new devices, suspends devices for non-payment, decommissions retired vehicles, reconciles invoices against active devices.
|
||||
- *Driver assignments.* Assigns a primary driver per vehicle (drawing from the HR-synced driver list — names, phones, and employment status auto-arrive). Reassigns vehicles when a driver is on leave, off sick, on training, or otherwise off-roster. Maintains a historical log of who drove what when (for incident attribution and HR reconciliation).
|
||||
- *Service schedule.* Tracks distance covered per vehicle against the service-interval policy (default 5,000 km between services). Flags vehicles due or overdue for service. Records completed services so the running total resets. The km figure auto-corrects from driver-submitted odometer readings at each fuel submission, so the running total is more accurate than the GPS-only figure alone.
|
||||
- **Tools used today:** Spreadsheet for driver-to-vehicle mapping (frequently stale). Separate spreadsheet for service log (km readings entered manually from odometer photos). Occasionally direct DB access. Tracksolid web console for device-side admin.
|
||||
- **Pain points today:**
|
||||
- `devices.enabled_flag=1` is set everywhere and nowhere; the lifecycle is implicit.
|
||||
- Driver-to-vehicle mapping lives in a sheet — when a driver goes on leave, the sheet is rarely updated, and trips end up attributed to the wrong person or to nobody.
|
||||
- Service tracking is reactive: vehicles get serviced when something fails or when someone notices the odometer photo looks high, not on a planned schedule.
|
||||
- Driver phone numbers and status (terminated, on leave) drift between HR's record and operational reality — the platform doesn't see HR changes until someone manually copies them across.
|
||||
- **Phase that primarily serves them:** Phase 3 (device admin + driver-roster UI + service-due dashboard).
|
||||
- **Key KPIs:** Time to provision a new device, time to complete a driver reassignment, percentage of trips attributed to a named driver, vehicles serviced on-schedule vs late.
|
||||
|
||||
### 4.4 Finance / cost-centre owner — "What did this cost?"
|
||||
|
||||
- **Day-in-the-life:** Allocates fleet costs by cost centre, by assigned city, by customer. Reviews fuel anomalies, idle time, after-hours usage.
|
||||
- **Tools used today:** CSV exports, manual spreadsheet work.
|
||||
- **Pain points today:** Cost-centre tagging is partial. After-hours-usage queries require a SQL specialist. Fuel/temperature data is captured but not surfaced.
|
||||
- **Phase that primarily serves them:** Phase 4 (analytics, fuel/temperature surfaces, cost-centre allocation views).
|
||||
- **Key KPIs:** Cost-allocation completeness, anomaly detection lead time.
|
||||
|
||||
### 4.5 Executive / sponsor — "Is the platform delivering?"
|
||||
|
||||
- **Day-in-the-life:** Monthly review of fleet performance. Capex decisions on devices and vehicles. Quarterly conversations with the telematics vendor.
|
||||
- **Tools used today:** Manual decks built from spreadsheets.
|
||||
- **Pain points today:** Numbers are hand-curated and slow. No durable executive view.
|
||||
- **Phase that primarily serves them:** Phase 4 (executive summary view, SLO attainment dashboard).
|
||||
- **Key KPIs:** Monthly fleet uptime, cost-per-km trend, incident count.
|
||||
|
||||
### 4.6 Secondary actors
|
||||
|
||||
- **Drivers** are *subjects* of the platform, not users. They interact via the vehicle and via voice/chat with dispatch; no driver-facing app is in scope.
|
||||
- **The telematics vendor (Jimicloud)** is an upstream system, not a user. We track their API contracts via the contract checker.
|
||||
- **Customers** are not platform users. Customer-facing tracking is out of scope.
|
||||
|
||||
---
|
||||
|
||||
## 5. Phases overview
|
||||
|
||||
The product is delivered in **four phases over approximately nine to ten weeks**, each phase shipping a self-contained increment of value. Each phase has a hard deliverable, a measurable success criterion, and a defined scope cut. No phase blocks production usage of earlier phases.
|
||||
|
||||
| Phase | Theme | Weeks | Primary persona | Headline deliverable |
|
||||
|---|---|---|---|---|
|
||||
| **P1** | Foundation + live tracking | 1–3 | Dispatcher | A working live-position dashboard, deployed, on the new architecture, against a parallel data source |
|
||||
| **P2** | Historical + trip analytics | 4–6 | Operations manager | Historical playback + trip reports with feature parity for a chosen 30-day test window |
|
||||
| **P3** | Operations tooling + cutover | 7–8 | Operations manager + Fleet admin | SLO dashboards, device-lifecycle admin UI, **driver-roster + reassignment UI**, **service-due dashboard**, alarm console, legacy decommission |
|
||||
| **P4** | Intelligence + driver KPIs | 9+ | Operations manager + Finance + Executive + HR | **Driver shift reporting (ACC sign-on / sign-off + geocoded location)**, **driver-behaviour scoring**, cost-centre allocation, fuel/temperature surfaces, executive monthly view |
|
||||
|
||||
Phases 1–3 are committed scope for this rebuild. Phase 4's driver KPI work is committed; the remaining Phase 4 surfaces (cost, anomalies, executive view) are committed in principle with details confirmed at end of Phase 3.
|
||||
|
||||
Routing and ticket-driven dispatch are **not part of this PRD** (see §3.2 non-goals and §1 scope boundary) — they are tracked separately.
|
||||
|
||||
---
|
||||
|
||||
## 6. Phase 1 — Foundation and live tracking (weeks 1–3)
|
||||
|
||||
### 6.1 Objective
|
||||
|
||||
Establish the platform's architectural foundation and ship a live-position dashboard that demonstrates feature parity with today's `live.rahamafresh.com`, running in parallel against the same telematics sources. By end of Phase 1, the new platform is reachable, deployable, monitored, and shows live vehicle positions correctly.
|
||||
|
||||
### 6.2 Why this is Phase 1
|
||||
|
||||
Live tracking is the highest-traffic, highest-stakes use case (dispatch makes decisions on it every minute). It exercises the full stack — push receiver, parser, projector, serve function, dashboard renderer — at the smallest scope. If the architecture works for live tracking, the rest follows. If it doesn't, we discover that in week 3, not week 12.
|
||||
|
||||
### 6.3 User stories
|
||||
|
||||
- **U1.1 — Dispatcher sees current positions.** As a dispatcher, I can open the live dashboard and see every active vehicle plotted on a map with its current location, last-update time, vehicle plate, and operational state (moving / parked / offline / unknown), refreshed every 15 seconds.
|
||||
- **U1.2 — Dispatcher filters by cost centre and city.** As a dispatcher, I can filter the live view by cost centre and assigned city without page reload.
|
||||
- **U1.3 — Dispatcher distinguishes broken from dormant.** As a dispatcher, I can tell at a glance which "offline" vehicles are decommissioned (and therefore not my problem) versus which are unexpectedly silent.
|
||||
- **U1.4 — Ops sees SLO breach.** As an ops manager, I can see at any time what percentage of active devices have a fix within the freshness SLO (default 90 s) and which vehicles are below SLO right now.
|
||||
- **U1.5 — Engineer replays an event.** As an engineer, I can take a raw Jimi payload from `events.raw`, re-parse it with the current parser, and confirm what `state.live_positions` would have been written, without re-fetching from Jimi.
|
||||
|
||||
### 6.4 Functional requirements
|
||||
|
||||
- **F1.1 — Multi-account ingest.** Receive push events from all Jimi sub-accounts (current TARGETS list) via HMAC-signed webhooks. Identify each event by `(account_id, imei)` from day one — no retroactive multi-tenancy.
|
||||
- **F1.2 — Polled ingest.** Poll `jimi.user.device.location.list` per account on a 60-second cadence (catch-up on startup, ongoing). Poll `jimi.user.device.location.get` for stale IMEIs every 10 minutes.
|
||||
- **F1.3 — Immutable event log + minimal-gateway contract.** Every push and every poll response writes a row to `events.raw` with verbatim payload, source, signature, `received_at`, `parser_version`. The push-receiving gateway performs HMAC verify + INSERT + `NOTIFY events_raw_new` + 200 OK, and nothing else, per request. No parsing, no PostGIS, no projector work on the push path. Parser and projector work happens in the worker container role (architecture §2.3, §6).
|
||||
- **F1.4 — Versioned parser, event-driven.** A parser worker holds a `LISTEN events_raw_new` connection, drains new rows to `events.parsed` on arrival (typically within milliseconds), applying Pydantic-typed transformations. Parser version is recorded per row. Re-parsing is a SQL statement. A 5-second timer-based sweep catches the rare missed-NOTIFY case; under normal operation it is a no-op.
|
||||
- **F1.5 — Single-writer projector, event-driven.** One projector holds a `LISTEN events_parsed_new` connection and updates `state.live_positions` on arrival of each `position_fix` event. Ordering invariant: process events in `occurred_at` order; never overwrite a newer fix with an older one. Stage-to-stage lag (parser → projector) is bounded by NOTIFY propagation, not by polling intervals.
|
||||
- **F1.6 — Dedup rule applied once.** The tracker-first dedup rule (tracker mc_type priority → 24h freshness gate → fall back to camera if all trackers stale → intra-type tiebreak by most-recent fix → activation_time tiebreak) is implemented in **one** SQL function (`serve.fn_live_view`) and **one** Pydantic projection. No client-side dedup.
|
||||
- **F1.7 — Live API endpoint.** `GET /api/views/live?filters=…` returns a render-ready payload: `{summary: {…KPIs…}, geojson: {…vehicles…}, slo_status: {…breaches…}}`.
|
||||
- **F1.8 — Live dashboard.** A static HTML page (`index-live.html`) imports `fleet-core.js`, authenticates against `/api/auth/token`, calls the live endpoint every 15 s with a JWT, renders KPIs and a MapLibre map. No business logic in JS. No anonymous access path; users sign in before the map renders.
|
||||
- **F1.9 — SLO measurement.** A `slo_measurement` worker computes fix-freshness every 60 s and writes to `slo.measurements`. Grafana dashboards render against `slo.*`.
|
||||
- **F1.10 — Contract checker.** A daily job calls each Jimi endpoint against a sandbox account, validates the response against the current Pydantic model, alerts on drift.
|
||||
- **F1.11 — Device lifecycle.** `domain.devices.lifecycle` is NOT NULL with values `provisioned | active | suspended | decommissioned`. The live view shows only `active` devices.
|
||||
- **F1.12 — Parallel deployment.** The new platform runs alongside the old; both receive Jimi pushes. The new dashboard is reachable at `live-v2.rahamafresh.com` and requires a JWT from day one (no public-read on the new platform — see §3.3 principle 9, §15 Q1). The old dashboard remains canonical for dispatch until end of Phase 3.
|
||||
|
||||
### 6.5 Non-functional requirements
|
||||
|
||||
- **NFR1.1 — Fix freshness.** ≥95% of active devices have a fix within 90 s during business hours (07:00–19:00 EAT).
|
||||
- **NFR1.2 — Push receiver latency.** p95 < 100 ms (Jimi-side measurable timeout).
|
||||
- **NFR1.3 — Live endpoint latency.** p95 < 300 ms, p99 < 800 ms.
|
||||
- **NFR1.4 — Parser lag.** p95 of `received_at` to `events.parsed` insertion: < 30 s.
|
||||
- **NFR1.5 — Availability.** 99.5% monthly for push receiver and live endpoint. (Higher targets in P3.)
|
||||
- **NFR1.6 — Security.** All inbound webhooks HMAC-verified. All dashboard endpoints require a valid JWT — read endpoints included, no anonymous access path. Public-read posture from the legacy platform is not preserved (§15 Q1 closed, §3.3 principle 9).
|
||||
- **NFR1.7 — Observability.** Every request logged in structured JSON with `event_id`, `imei`, `endpoint`, `parser_version`, `latency_ms`.
|
||||
|
||||
### 6.6 Success criteria
|
||||
|
||||
Phase 1 is done when, in continuous production observation for 7 days:
|
||||
|
||||
1. The new platform receives 100% of Jimi pushes the old platform receives (per ingest-log comparison).
|
||||
2. The new live dashboard renders the same vehicle positions as the old, within ±15 s.
|
||||
3. `slo.v_current_status` shows fix-freshness SLO ≥95%.
|
||||
4. Zero retroactive write-path guards have been added (no FIX-M21 equivalent).
|
||||
5. The contract checker has run green for 7 consecutive days.
|
||||
6. A demonstration of replay (truncate `state.live_positions`, re-project from `events.parsed`, confirm restored) succeeds in <30 minutes.
|
||||
|
||||
### 6.7 Out of scope for Phase 1
|
||||
|
||||
Historical playback, trips, parking events, alarms, fuel, temperature, OBD, dispatcher write-actions, routing, mobile responsive (works but not polished), authentication beyond JWT scaffolding.
|
||||
|
||||
---
|
||||
|
||||
## 7. Phase 2 — Historical and trip analytics (weeks 4–6)
|
||||
|
||||
### 7.1 Objective
|
||||
|
||||
Ship the historical-track and trip-report capabilities at feature parity with today's `fleetintelligence.rahamafresh.com`, on the new architecture. By end of Phase 2, ops managers can reproduce any analysis they do today against the new platform.
|
||||
|
||||
### 7.2 Why this is Phase 2
|
||||
|
||||
Live tracking gives dispatch what they need. Historical + trip analytics gives ops management what they need. These two cover ~90% of current platform usage. Building Phase 2 immediately after Phase 1 means the same architectural muscles are exercised on a wider surface (CAGGs, longer time ranges, larger result sets, more filters), without leaving the team for a month to come back.
|
||||
|
||||
### 7.3 User stories
|
||||
|
||||
- **U2.1 — Ops manager plays back a track.** As an ops manager, I can select a vehicle and a 24-hour window and see the vehicle's track animated on a map, with speed and direction overlaid.
|
||||
- **U2.2 — Ops manager reviews trips.** As an ops manager, I can see all trips for a vehicle, plate, cost centre, or assigned city in a date range, with start/end time, start/end address, distance, duration, idle time, and max speed.
|
||||
- **U2.3 — Ops manager exports.** As an ops manager, I can export any historical view to CSV for downstream finance reconciliation.
|
||||
- **U2.4 — Ops manager reviews parking events.** As an ops manager, I can see parking events with start/end time and address, filter by duration (>1h, >8h, overnight).
|
||||
- **U2.5 — Finance reconciles by cost centre.** As a finance owner, I can see total distance and trip count per cost centre for a billing period.
|
||||
- **U2.6 — Engineer back-computes.** As an engineer, I can re-run the trip projector against `events.parsed` for any date range without re-fetching from Jimi.
|
||||
|
||||
### 7.4 Functional requirements
|
||||
|
||||
- **F2.1 — Trip projector.** A projector reads `events.parsed` of kinds `trip_open`, `trip_close`, and `position_fix` and writes `state.trips` with start/end time, start/end position, distance, duration, idle time, max speed.
|
||||
- **F2.2 — Parking projector.** A projector derives parking events from position-fix streams (speed=0 for >5 min) and writes `state.parking_events`.
|
||||
- **F2.3 — Geocoding worker.** A separate worker drains a `geocode_queue` table (positions needing addresses) and writes `state.geocoded_positions`. Nominatim primary, Mapbox fallback. Never blocks ingest.
|
||||
- **F2.4 — Historical API endpoints.** `GET /api/views/history?filters=…` returns `{summary, geojson, slo_status}` for a date range. `GET /api/views/history/animation?…` returns time-stamped position frames for playback.
|
||||
- **F2.5 — Trip API endpoint.** `GET /api/views/trips?filters=…` returns trip records. Filters: vehicle, plate, cost centre, assigned city, date range, min duration, min distance.
|
||||
- **F2.6 — Parking API endpoint.** `GET /api/views/parking?filters=…` similar shape.
|
||||
- **F2.7 — Continuous aggregates.** TimescaleDB CAGGs for daily and weekly trip rollups per cost centre, refreshed every 15 min.
|
||||
- **F2.8 — Historical dashboard.** `index-history.html` page with form-driven filter UI, playback control, KPI tiles. Shares `fleet-core.js` with the live dashboard.
|
||||
- **F2.9 — CSV export.** Every dashboard view has a "Download CSV" action that exports the current filtered result.
|
||||
- **F2.10 — Migration of recent history.** The last 90 days of trips and positions from the legacy DB are imported into `events.raw` (synthesised events with `source=legacy_import`), then re-parsed and projected on the new platform. Older data remains in the legacy DB as a read-only archive available to ops via Grafana.
|
||||
|
||||
### 7.5 Non-functional requirements
|
||||
|
||||
- **NFR2.1 — Historical API latency.** p95 < 1.5 s for a 24-hour vehicle track; < 3 s for a 7-day cost-centre rollup.
|
||||
- **NFR2.2 — Trip ingest lag.** Trips closed in Jimi appear in `state.trips` within 600 s (SLO `trip_lag`).
|
||||
- **NFR2.3 — Geocoding hit rate.** ≥80% of trip endpoints have a non-null address within 24 h of the trip closing.
|
||||
- **NFR2.4 — CSV export.** Up to 100,000 rows in <30 s.
|
||||
|
||||
### 7.6 Success criteria
|
||||
|
||||
Phase 2 is done when, for a chosen 30-day test window:
|
||||
|
||||
1. The new historical dashboard renders the same trip records as the legacy one (sample 100 trips, 100% match on start time, end time, distance ±1%, addresses 90% match or none-vs-something).
|
||||
2. Trip lag SLO is met for 7 consecutive days.
|
||||
3. A 90-day backfill of trips from the legacy DB has been imported, re-parsed, and projected successfully.
|
||||
4. Ops manager has signed off on parity for their workflow.
|
||||
|
||||
### 7.7 Out of scope for Phase 2
|
||||
|
||||
Active alarms console (Phase 3), driver-behaviour scoring (Phase 4). Routing and ticket-driven dispatch are out of scope of this PRD entirely (companion project).
|
||||
|
||||
---
|
||||
|
||||
## 8. Phase 3 — Operations tooling and cutover (weeks 7–8)
|
||||
|
||||
### 8.1 Objective
|
||||
|
||||
Equip ops managers and fleet administrators with first-class tools for the work they currently do in spreadsheets, Slack, and direct database access. By end of Phase 3, the platform is the canonical home for fleet operations, and the legacy platform is decommissioned.
|
||||
|
||||
### 8.2 Why this is Phase 3
|
||||
|
||||
Phases 1 and 2 ship feature parity for live and historical views. Phase 3 ships the **operational hygiene** that the legacy platform was missing — SLO visibility, alarm triage, device lifecycle UI, dispatcher workflow. This is where the rebuild starts to be visibly better than what it replaces, and it ends with the legacy stack going dark.
|
||||
|
||||
### 8.3 User stories
|
||||
|
||||
- **U3.1 — Ops sees SLO health.** As an ops manager, I can see a dashboard of all platform SLOs (fix freshness, trip lag, parser lag, contract drift) with current value, threshold, and trend over the last 7 days.
|
||||
- **U3.2 — Ops triages alarms.** As an ops manager, I can see an alarm console listing recent alarms (panic button, speeding, geofence breach, etc.) with vehicle, plate, time, location, and ack-status. I can acknowledge and add a note.
|
||||
- **U3.3 — Admin provisions a device.** As a fleet administrator, I can add a new device to the platform: enter IMEI, vehicle plate, cost centre, assigned city, set lifecycle to `provisioned`. On first valid push from that device, transition to `active`.
|
||||
- **U3.4 — Admin suspends a device.** As a fleet administrator, I can suspend a device (e.g. subscription expired) and the device disappears from operational views but remains visible to admin views.
|
||||
- **U3.5 — Admin decommissions a device.** As a fleet administrator, I can decommission a device permanently. It vanishes from all operational views.
|
||||
- **U3.6 — Admin audits.** As a fleet administrator, I can see an audit log of all lifecycle transitions for any device.
|
||||
- **U3.8 — Admin assigns a primary driver.** As a fleet administrator, I can assign a primary driver to a vehicle with an effective-from date and an optional end date. The assignment is the default attribution for all trips and behaviour scoring on that vehicle.
|
||||
- **U3.9 — Admin reassigns a vehicle for leave or absence.** As a fleet administrator, when a driver goes on leave (annual leave, sick leave, training, suspension, etc.), I can mark them off-roster for a date range and assign a temporary substitute driver to their vehicle for that range. When the date range ends, the primary driver automatically resumes attribution — no second action required.
|
||||
- **U3.10 — Admin sees vehicles without an assigned driver.** As a fleet administrator, I can see a list of vehicles that are currently active but have no driver assigned (or whose assigned driver is currently off-roster with no substitute). This list is the daily "needs reassignment" worklist.
|
||||
- **U3.11 — Admin sees driver history.** As a fleet administrator, I can see the historical sequence of drivers for any vehicle, and the historical sequence of vehicles for any driver. Useful for incident investigation, HR reconciliation, and customer queries.
|
||||
- **U3.12 — Admin sees service-due dashboard.** As a fleet administrator, I can see every active vehicle with its odometer-since-last-service running total, the service-interval policy applied to it (default 5,000 km), and how many km remain until next service. The view sorts by urgency: overdue first, then due-soon (within 500 km), then comfortable.
|
||||
- **U3.13 — Admin records a completed service.** As a fleet administrator, I can record a completed service: vehicle, date, service type, odometer reading at service, optional notes, optional cost. The running total resets at that reading.
|
||||
- **U3.14 — Admin sets a per-vehicle service interval.** As a fleet administrator, I can override the default 5,000 km service interval for a specific vehicle class (e.g., heavy trucks at 10,000 km) so the service-due math reflects the vehicle's actual maintenance plan.
|
||||
|
||||
### 8.4 Functional requirements
|
||||
|
||||
- **F3.1 — SLO dashboard.** A page (`/ops/slos`) rendering all rows from `slo.targets` with current state from `slo.measurements`. Green/amber/red badges with thresholds visible.
|
||||
- **F3.2 — Alarm projector.** Projector reads `events.parsed` of kind `alarm` and writes `state.alarms` with `ack_status`, `ack_by`, `ack_note`.
|
||||
- **F3.3 — Alarm console.** A page (`/ops/alarms`) listing recent alarms with filters (severity, vehicle, date range), ack action, and note field.
|
||||
- **F3.4 — Device admin UI.** A page (`/admin/devices`) with table view, lifecycle transitions, audit log.
|
||||
- **F3.5 — Lifecycle audit log.** `domain.devices_audit` table records every lifecycle transition with `actor`, `at`, `from_lifecycle`, `to_lifecycle`, `reason`.
|
||||
- **F3.8 — Authentication.** JWT-based login for ops and admin pages. Two scopes: `read:fleet`, `admin`. Dispatcher and ops manager = `read:fleet`. Fleet admin = `admin`. (A `write:dispatch` scope is intentionally not introduced here; ticket-driven dispatch is the companion project's domain.)
|
||||
- **F3.9 — Cutover plan.** Push events are mirrored to both old and new platforms for 7 days. DNS cuts. The legacy push receiver continues to accept events as standby for 48 h, then is decommissioned.
|
||||
- **F3.10 — Driver model.** `domain.drivers` holds driver identity (`driver_id` UUID, `full_name`, `employee_ref`, `phone`, `status`). `status ∈ {active, on_leave, suspended, terminated}`. `domain.driver_status_log` records every status transition with `actor`, `at`, `from`, `to`, `reason`, `effective_from`, `effective_to`.
|
||||
- **F3.11 — Driver assignment model.** `domain.driver_assignments` is the time-ranged source of truth for which driver was on which vehicle: `(assignment_id, vehicle_id, driver_id, role, effective_from, effective_to, reason, created_by, created_at)`. `role ∈ {primary, substitute}`. Effective ranges may not overlap for the same `(vehicle_id, role)`. The "currently assigned driver" for a vehicle at time `t` is: substitute whose range covers `t` if one exists, otherwise primary whose range covers `t`, otherwise NULL.
|
||||
- **F3.12 — Assignment lookup function.** `serve.fn_driver_at(vehicle_id, at_time) → driver_id` returns the assigned driver at a point in time. Used by every projector that attributes activity to a driver (trips, shifts, behaviour scoring).
|
||||
- **F3.13 — Driver-roster admin UI.** Page `/admin/drivers` lists drivers with current status and current vehicle. Page `/admin/assignments` provides the calendar-style reassignment UI (driver-on-leave date range → choose substitute → save).
|
||||
- **F3.14 — Service policy model.** `domain.service_policies` defines the service interval per vehicle class (`vehicle_class`, `interval_km`, `description`). Default policy: `interval_km = 5000`. A vehicle inherits its class's policy unless overridden in `domain.vehicles.service_interval_km_override`.
|
||||
- **F3.15 — Service event model.** `ops.service_log` records completed services: `(service_id, vehicle_id, serviced_at, service_type, odometer_km_at_service, cost, notes, recorded_by, recorded_at)`. Once recorded, the running-total math resets at this row.
|
||||
- **F3.16 — Service-due projector.** A projector computes per-vehicle `km_since_last_service` from `state.position_history` (sum of segment distances since `serviced_at`) and writes `state.service_status (vehicle_id, last_serviced_at, km_at_last_service, current_odometer_km, km_since_last_service, interval_km, km_to_next_service, status)`. `status ∈ {overdue, due_soon, ok}` derived from `km_to_next_service` versus a configurable buffer (default 500 km for due_soon).
|
||||
- **F3.17 — Service-due API endpoint.** `GET /api/views/service?filters=…` returns the service worklist payload `{summary: {overdue_count, due_soon_count, ok_count}, vehicles: [{vehicle_id, plate, ...service_status fields}]}` sorted by urgency.
|
||||
- **F3.18 — Service-due dashboard.** A page (`/ops/service`) renders the service worklist with overdue (red) at top, due-soon (amber) next, ok (green) collapsed. Each row links to the vehicle's service-log history. A "Record service" action opens a form that creates an `ops.service_log` row.
|
||||
|
||||
**Fuel / refuelling ingest from the existing WhatsApp microservice**
|
||||
|
||||
The company already operates a WhatsApp-based fuelling microservice: drivers send a message containing a photo of the odometer, litres added, fuel station, and time. That microservice continues to own the driver-facing WhatsApp interaction. This platform consumes its output as a new ingest source — same event-sourcing pattern as Jimi pushes.
|
||||
|
||||
- **F3.19 — Fuel submission ingest endpoint.** `POST /push/fuel` accepts a payload from the existing microservice with `(submission_id, imei_or_plate, driver_phone, odometer_km, litres, fuel_station_name, fuel_station_geom?, photo_ref, submitted_at, signature)`. HMAC-verified against a shared secret. Payload written verbatim to `events.raw` with `source = 'whatsapp_fuel'`. Parser produces a typed `fuel_submission` event in `events.parsed` keyed by the platform's `vehicle_id` (resolved from IMEI or normalised plate) and `driver_id` (resolved from `driver_phone`).
|
||||
- **F3.20 — Fuel / odometer projector.** Reads `events.parsed` of kind `fuel_submission` and writes:
|
||||
- `state.fuel_log`: `(fuel_id, vehicle_id, driver_id, fuel_station_name, fuel_station_geom, odometer_km_submitted, litres, photo_ref, submitted_at)`.
|
||||
- `state.odometer_readings`: appends `(reading_id, vehicle_id, source, odometer_km, observed_at, confidence)` where `source = 'driver_submission'` and `confidence` is computed from the agreement between submitted km and platform-derived (GPS-summed) km at the same timestamp.
|
||||
- **F3.21 — Odometer-truth view and variance-gated reset.** `serve.fn_odometer_status(vehicle_id) → (gps_derived_km, last_submitted_km, last_submitted_at, divergence_km, confidence)` exposes both sources side-by-side. Confidence is computed from the variance between the submitted km delta and the GPS-derived km delta over the same interval, normalised: `variance = |Δodometer_submitted - Δdistance_gps| / Δdistance_gps`. If `variance ≤ 5%`, the reading is `confidence = high`. If `variance > 5%`, the reading is `confidence = low`.
|
||||
|
||||
**High-confidence readings** reset the service-due clock: the service-due projector (F3.16) consumes the submitted km as the new baseline for `km_since_last_service`. The fuel submission is accepted into `state.fuel_log` normally.
|
||||
|
||||
**Low-confidence readings are quarantined**, not used. The submission is still written to `state.fuel_log` (we don't lose the record) and the reading is still appended to `state.odometer_readings` with `source = 'driver_submission'`, `confidence = 'low'` (we don't lose audit trail), but `state.service_status` is **not** updated from a low-confidence reading. A row is written to `ops.admin_alerts` of kind `odometer_variance_exceeded` with `vehicle_id`, `variance_pct`, `submitted_km`, `gps_derived_km`, and the photo reference; the fleet administrator reviews against the photo and either confirms (promoting the reading to high-confidence via an explicit admin action that resets the clock) or rejects (the reading stays in audit but never updates service status).
|
||||
|
||||
Rationale: an unverified manual entry — typo, fraud, or honest mistake — should not silently corrupt service-due math. A reading that looks wrong against the GPS-derived figure stays in the audit trail but doesn't get to move the service clock until a human has looked at the photo.
|
||||
|
||||
**HR roster sync — `domain.drivers` stays current automatically**
|
||||
|
||||
Driver identity, phone numbers, and employment status are mastered in the HR system, not in this platform. We sync via the platform's existing event-sourcing pattern rather than via FDW + materialised view, so HR data follows the same `events.raw → parsed → projected` lifecycle as Jimi and fuel submissions. Cadence is 3 hours — HR data is not minute-by-minute volatile, and a force-refresh action handles the rare urgent case.
|
||||
|
||||
- **F3.22 — HR driver sync worker.** Scheduled every 3 hours. Pulls the HR extract (table, view, or API — Q19) and writes one row per driver verbatim to `events.raw` with `source = 'hr_extract'`. Parser produces `hr_driver_snapshot` events in `events.parsed` with normalised fields (E.164 phone, trimmed name, validated `employee_ref`, status in `{active, on_leave, suspended, terminated}`). Projector applies them to `domain.drivers` (upsert by `employee_ref`).
|
||||
- **F3.23 — Force-refresh HR.** A fleet-admin action triggers an immediate HR pull outside the schedule. Used when an urgent termination or suspension needs to take effect inside the 3-hour window. Logged in `domain.devices_audit`-style audit so we know why a refresh fired and who fired it.
|
||||
- **F3.24 — HR sync staleness metric.** `slo.targets` row `hr_sync_lag` measures "minutes since last successful HR sync". SLO threshold: ≤ 240 min (3 h cadence + 1 h buffer) during business hours. Breach surfaces in the SLO dashboard like any other.
|
||||
- **F3.25 — Quarantined odometer review action.** A fleet-admin page (`/admin/odometer-review`) lists open `odometer_variance_exceeded` alerts with submitted km, GPS-derived km, variance percentage, photo, driver, and submission time. The admin either **confirms** (the reading is promoted to `confidence = high` retroactively, `state.service_status` is updated with this reading as the baseline, and the alert is closed with `resolution = confirmed`) or **rejects** (the reading stays as `confidence = low` in `state.odometer_readings`, the alert is closed with `resolution = rejected`, optional reason captured). Resolution is logged in `domain.devices_audit`-style audit with actor, timestamp, resolution, and reason. No bulk-action option in v1; per-row review is the point.
|
||||
|
||||
### 8.5 Non-functional requirements
|
||||
|
||||
- **NFR3.1 — Availability.** 99.9% monthly for the platform after cutover.
|
||||
- **NFR3.2 — Auth latency.** Token-issue endpoint p95 < 200 ms.
|
||||
- **NFR3.3 — Audit completeness.** Every lifecycle transition has an audit row. Audit rows are never deleted.
|
||||
|
||||
### 8.6 Success criteria
|
||||
|
||||
Phase 3 is done when:
|
||||
|
||||
1. Legacy `live.rahamafresh.com` and `fleetintelligence.rahamafresh.com` are no longer the canonical dashboards. DNS points to the new platform.
|
||||
2. Ops manager and fleet administrator have signed off on workflow parity + improvements.
|
||||
3. All SLOs in `slo.v_current_status` have been green for 7 consecutive days.
|
||||
4. The contract checker has detected at least one synthetic upstream change in staging (proof it works).
|
||||
5. Zero data loss during cutover (verified by comparing `events.raw` counts in the mirror window).
|
||||
6. ≥95% of active vehicles have a non-null `serve.fn_driver_at(vehicle_id, now())` — i.e. the driver roster is functionally complete, not aspirational.
|
||||
7. The service-due dashboard is the canonical source for "what's due this week" — confirmed by fleet admin having retired the service spreadsheet for two consecutive weeks.
|
||||
8. The reassignment-for-leave flow has been used in production at least 3 times without ops needing to fall back to the spreadsheet.
|
||||
9. ≥80% of fuel submissions from the WhatsApp microservice land in `events.raw` within 60 s of submission (per microservice telemetry vs platform `received_at`).
|
||||
10. ≥90% of active vehicles have at least one driver-submitted odometer reading within the last 30 days. Of those readings, ≥75% pass the high-confidence variance gate (F3.21) on first submission. Quarantined readings have a median admin-review-and-resolve time ≤ 3 working days.
|
||||
11. HR sync `hr_sync_lag` SLO is green (≤ 240 min) for 95% of business hours across 7 consecutive days; force-refresh has been used at least once and verified to take effect within 60 s.
|
||||
|
||||
### 8.7 Out of scope for Phase 3
|
||||
|
||||
Driver-behaviour scoring (Phase 4), customer-facing portal (out of scope entirely). Routing and ticket-driven dispatch are out of scope of this PRD entirely (companion project).
|
||||
|
||||
---
|
||||
|
||||
## 9. Phase 4 — Intelligence and driver KPIs (weeks 9+)
|
||||
|
||||
### 9.1 Objective
|
||||
|
||||
Convert the data the platform now collects cleanly into business intelligence:
|
||||
|
||||
- **Driver KPIs** — shift sign-on and sign-off detection (first ACC_ON of the day, last ACC_OFF) with geocoded location at each, total drive time per shift, idle time, and a composite **driver-behaviour score** built from speeding, harsh-acceleration, and harsh-braking events.
|
||||
- **Cost allocation** — distance, fuel, idle time, and after-hours usage per cost centre and per assigned city.
|
||||
- **Anomaly surfaces** — fuel and temperature anomalies surfaced for ops triage.
|
||||
- **Executive view** — a one-page monthly summary backed by data, not hand curation.
|
||||
|
||||
By end of Phase 4, the **ops manager has a daily driver-performance worklist**, the **HR-adjacent attribution** (who drove what when, where they signed on, where they signed off) is in the platform and not in a spreadsheet, **finance has self-service cost allocation**, and **executives consume a monthly view that builds itself**.
|
||||
|
||||
Driver KPIs are the headline of Phase 4. They depend on the P3 driver-roster work being complete and accurate — the platform cannot attribute a shift to "Driver X" if the assignment data isn't there.
|
||||
|
||||
### 9.2 Why this is Phase 4
|
||||
|
||||
Driver KPIs depend on three earlier-phase prerequisites being in place: (a) the event log carries ACC_ON/ACC_OFF cleanly (P1), (b) trips and idle minutes are projected (P2), and (c) the driver roster is current and trustworthy (P3). Putting driver KPIs earlier would mean attributing shifts to the wrong people; putting them later would mean we've left obvious value on the table after the platform is otherwise complete.
|
||||
|
||||
Driver KPIs and service tracking together close the loop on the fleet team's three rosters: device roster (P3), driver roster (P3), service roster (P3) → driver performance (P4) → cost allocation (P4) → executive summary (P4). The data flows from operational hygiene into business intelligence in one direction.
|
||||
|
||||
**Phase 4 scope.** Driver shift reporting and behaviour scoring are **committed**. Cost allocation, fuel/temperature anomaly surfaces, and the executive view are **committed in principle, detailed at end of Phase 3** based on what the data and the team have taught us by then. Stretch items (deviation alerts, timesheet export) are post-rebuild backlog.
|
||||
|
||||
### 9.3 User stories
|
||||
|
||||
Driver-related stories are committed scope. The remaining stories are refined at end of Phase 4.
|
||||
|
||||
**Driver KPIs — shift reporting (committed)**
|
||||
|
||||
- **U4.1 — Ops sees when each driver started work.** As an ops manager, I can see, for any date, every driver's shift start time and shift start location (geocoded address). Shift start is defined as the first ACC_ON event of the day for the driver's assigned vehicle. If a driver did not sign on, the row is present with status `no_shift`.
|
||||
- **U4.2 — Ops sees when each driver finished work.** As an ops manager, for the same date, I can see every driver's shift end time and shift end location (geocoded address). Shift end is the last ACC_OFF event of the day for that driver's assigned vehicle, after their shift-start ACC_ON. If a shift is still open at the time of viewing (driver currently working), status is `open`.
|
||||
- **U4.3 — Ops sees total drive time and idle time per shift.** As an ops manager, for any shift I can see total driving minutes (ACC_ON with speed > 0), total idle minutes (ACC_ON with speed = 0), and total stopped-engine-on minutes within the shift window.
|
||||
- **U4.4 — Ops sees a driver's week.** As an ops manager, I can pick a driver and a week and see the seven-day timeline of shifts with start/end times, distance, and behaviour score per shift.
|
||||
|
||||
**Driver KPIs — behaviour scoring (committed)**
|
||||
|
||||
- **U4.5 — Ops scores drivers on quality of driving.** As an ops manager, I can see a per-driver behaviour score for any window. The score is a composite of: speeding events (sustained position-fix speed above a configurable fleet threshold, or Jimi's own `speeding` alarm), harsh-acceleration events (from Jimi alarms), harsh-braking events (from Jimi alarms), harsh-cornering events (from Jimi alarms, if device supports them), idle-percentage. Each component is normalised per 100 km driven so longer-distance drivers aren't penalised.
|
||||
- **U4.6 — Ops drills into harsh events.** As an ops manager, for any harsh event I can see the timestamp, location (geocoded), speed at event, and the surrounding 60 seconds of position trail. Useful for "is this a real concern or a road feature?" judgement.
|
||||
- **U4.7 — Ops compares drivers.** As an ops manager, I can compare any subset of drivers (e.g., a depot's roster) side-by-side on the composite score and on each component, for any window.
|
||||
|
||||
**Cost, anomalies, executive (committed)**
|
||||
|
||||
- **U4.8 — Finance allocates by cost centre.** As a finance owner, I can see total distance, fuel consumption, idle time, after-hours usage per cost centre for any billing period.
|
||||
- **U4.9 — Ops detects fuel anomalies.** As an ops manager, I can see vehicles with unusual fuel consumption (sudden drop = theft suspicion, sudden rise = sensor failure) for a window.
|
||||
- **U4.10 — Ops monitors cold-chain.** As an ops manager (for refrigerated vehicles), I can see temperature compliance per trip, with breaches flagged.
|
||||
- **U4.11 — Executive sees the month.** As an executive, I can see a one-page monthly summary: fleet uptime SLO attainment, cost-per-km trend, incident count, top-5 vehicles by distance, top-5 cost centres by spend, **top-5 and bottom-5 drivers by behaviour score**, vehicles serviced on-schedule vs late.
|
||||
|
||||
**Stretch (post-rebuild backlog)**
|
||||
|
||||
- **U4.12 — HR exports timesheets.** *(Stretch)* HR can export a per-driver timesheet (sign-on, sign-off, total hours) for any pay period directly from the platform.
|
||||
|
||||
Deviation alerts and multi-stop optimisation — previously listed here as stretch — are now in the routing companion project's scope, not this platform's.
|
||||
|
||||
### 9.4 Functional requirements
|
||||
|
||||
**Driver shift derivation (committed)**
|
||||
|
||||
- **F4.1 — Shift projector.** A projector reads `events.parsed` of kinds `acc_on`, `acc_off`, and `position_fix` and writes `state.driver_shifts`: `(shift_id, driver_id, vehicle_id, shift_date, started_at, started_geom, started_address, ended_at, ended_geom, ended_address, drive_minutes, idle_minutes, distance_km, status)`. `status ∈ {open, closed, no_shift}`. Driver attribution uses `serve.fn_driver_at(vehicle_id, started_at)` from Phase 3.
|
||||
- **F4.2 — Shift definition.** Shift start = first ACC_ON of the day where the prior ACC_OFF was ≥ `min_break_hours` ago (default 6 hours, configurable per cost centre). Shift end = last ACC_OFF before the next shift-start trigger. Lunch breaks (short ACC_OFFs within a shift) do not end the shift; they are subtracted from drive time and reported as idle.
|
||||
- **F4.3 — Day boundary handling.** A shift starting before midnight and ending after midnight is one shift. The reporting "day" is the calendar date of the `started_at`. A shift starting after midnight is its own day even if the prior shift ended after midnight of the same calendar day.
|
||||
- **F4.4 — Geocoding of shift endpoints.** Shift start and shift end positions are enqueued to the geocoding worker (Phase 2) at projector time so addresses are present within the geocoder's SLO window.
|
||||
- **F4.5 — Shift API endpoint.** `GET /api/views/shifts?filters=…` returns shift records. Filters: driver, vehicle, cost centre, date range, status. Includes a `current_shifts` flag for "show me everyone currently signed on right now".
|
||||
|
||||
**Driver behaviour scoring (committed)**
|
||||
|
||||
- **F4.6 — Behaviour event extraction.** A projector identifies behaviour events from `events.parsed`:
|
||||
- `speeding`: sustained position-fix speed above a fleet-configurable threshold (default 80 km/h urban / 100 km/h highway distinguished by `road_class` tag in `domain.vehicles` if known, otherwise single fleet-wide threshold of 90 km/h) for > 30 s, **or** an explicit `speeding` alarm from Jimi.
|
||||
- `harsh_accel`: alarm of type `harshAcceleration` from Jimi.
|
||||
- `harsh_brake`: alarm of type `harshBraking` from Jimi.
|
||||
- `harsh_corner`: alarm of type `harshCornering` from Jimi (if device supports it).
|
||||
Each event is written to `state.behaviour_events` with `(driver_id, vehicle_id, occurred_at, kind, severity, geom, speed_kmh)`.
|
||||
|
||||
Note: a more sophisticated speeding rule that uses statutory `maxspeed` per road segment (requires OSM road topology) is intentionally deferred — it belongs in the routing companion project where OSM ingestion already lives. The fleet-wide threshold gets us 80% of the value without the cost.
|
||||
- **F4.7 — Behaviour score function.** `serve.fn_driver_behaviour_score(driver_id, period_start, period_end) → numeric` returns a 0–100 score. Component weights are configurable in `domain.behaviour_weights` (default: speeding 35, harsh_brake 25, harsh_accel 25, harsh_corner 15). Each component is normalised per 100 km driven in the window. 100 = no events; 0 = every km has a harsh event.
|
||||
- **F4.8 — Behaviour API endpoint.** `GET /api/views/drivers?filters=…` returns driver records with current score, score trend, top-3 event types, week-over-week change. `GET /api/views/drivers/{id}/events?…` returns the event list with location and speed for drill-down.
|
||||
|
||||
**Cost, anomalies, executive (committed)**
|
||||
|
||||
- **F4.9 — Cost-allocation view.** `serve.fn_cost_allocation(period, cost_centre?)` returns the finance-ready breakdown: distance, fuel litres, fuel cost (from `state.fuel_log` driver-submitted entries — real cost per litre × litres, with station attribution), idle hours, after-hours hours, per cost centre, per assigned city. Where `state.fuel_log` data is missing, falls back to estimated consumption from `state.fuel_readings` (Jimi OBD tap) and surfaces the basis (`cost_basis ∈ {actual, estimated}`) in the response.
|
||||
- **F4.10 — Fuel anomaly detector.** Worker consumes both `state.fuel_readings` (Jimi OBD tap) and `state.fuel_log` (driver-submitted fillups). Applies a z-score rule (≥ 3σ against the vehicle's trailing 30-day mean) to each independently. Cross-checks the two sources: when a fillup is recorded in `state.fuel_log` but OBD doesn't show the expected tank-level rise (or vice versa), the anomaly is tagged `source_disagreement` and surfaces with higher severity — catches fuel-card-vs-tank discrepancies (a theft signature) and sensor failures (an OBD signature). Anomalies written to `ops.anomalies` for review.
|
||||
- **F4.11 — Temperature compliance projector.** For cold-chain vehicles, reads `state.temperature_readings`, computes time-out-of-band per trip, surfaces breaches.
|
||||
- **F4.12 — Executive monthly view.** `serve.fn_executive_summary(month)` returns the one-page payload: SLO attainment, cost-per-km trend, incident count, distance leaders, cost-centre spend, driver-score leaders + laggards, services on-schedule percentage.
|
||||
|
||||
**Stretch**
|
||||
|
||||
- **F4.13 — Timesheet export.** *(Stretch)* `GET /api/views/timesheets?driver=…&pay_period=…` returns CSV ready for HR import.
|
||||
|
||||
### 9.5 Success criteria
|
||||
|
||||
Driver-related criteria are committed. Cost/anomaly/executive criteria are refined at end of Phase 3.
|
||||
|
||||
**Driver shift reporting (committed)**
|
||||
|
||||
1. For 7 consecutive working days, every active vehicle whose driver is rostered has a `state.driver_shifts` row with non-null `started_at`, `started_address`, `ended_at`, `ended_address`. Vehicles without an assigned driver are flagged as `unattributed_shift` (not as missing data).
|
||||
2. Sign-on and sign-off addresses are populated (non-null) for ≥95% of closed shifts within 24 h of shift close.
|
||||
3. Ops manager has used the shift view to make at least 3 operational decisions (timesheet reconciliation, late-start investigation, route-coverage review) in the first 30 days.
|
||||
|
||||
**Driver behaviour scoring (committed)**
|
||||
|
||||
4. Behaviour events are extracted for every active driver continuously for 14 consecutive days.
|
||||
5. The composite score is published per driver per week and visible in the ops dashboard.
|
||||
6. A blinded review with the ops manager confirms that the top-5 and bottom-5 drivers by score match the ops manager's independent ranking of the same group with ≥70% agreement (sanity-check the scoring weights).
|
||||
|
||||
**Cost / anomalies / executive (refined at end of P4)**
|
||||
|
||||
7. Finance has signed off on cost-allocation reports.
|
||||
8. Executive consumes the monthly view without manual deck construction for 3 consecutive months.
|
||||
|
||||
### 9.6 Out of scope for Phase 4 (still and forever for this PRD cycle)
|
||||
|
||||
Customer-facing portal, driver mobile app, native iOS/Android, multi-region active-active deployment, video streaming from cameras, automated disciplinary action triggered by behaviour score (the score informs human decisions; it does not take them), real-time driver feedback in the cab (out of scope without a driver-facing app), payroll integration (timesheets export to CSV as the integration surface in stretch). Routing, ETA prediction, deviation alerts, and ticket-driven dispatch remain in the companion project's scope.
|
||||
|
||||
---
|
||||
|
||||
## 10. Cross-cutting requirements
|
||||
|
||||
These apply to every phase, not to any single one.
|
||||
|
||||
### 10.1 Data and privacy
|
||||
|
||||
- **D1 — Data retention.** `events.raw` retained for 365 days (then archived to rustfs as monthly parquet dumps). `state.*` retained indefinitely. PII (driver names, vehicle plates, **driver shift locations and home-area sign-on points**) treated per Kenya Data Protection Act 2019 and Uganda DPPA 2019 — access logged, exports auditable.
|
||||
- **D2 — Data residency.** All production data resides on rahamafresh.com infrastructure (VPS region: as currently configured). No SaaS data exit.
|
||||
- **D3 — Backup.** Daily logical dump to rustfs; weekly events.raw and monthly geo slices. Restore drill quarterly.
|
||||
- **D4 — Right-to-delete.** Driver records can be anonymised on request without breaking referential integrity (FK on `driver_id` set to a "redacted" sentinel). Anonymisation preserves `state.driver_shifts` and `state.behaviour_events` rows but replaces `driver_id` with the sentinel; shift locations and behaviour-event geometries are retained for fleet-level analysis but no longer attributable to a person.
|
||||
- **D5 — Driver-shift sensitivity.** Shift start/end locations reveal a driver's home or near-home depot pattern. Access to per-driver shift views requires `read:fleet` scope and is logged with `actor`, `driver_id_viewed`, `at`. Aggregated views (depot-level start-time distribution) require no special scope.
|
||||
|
||||
### 10.2 Security
|
||||
|
||||
- **S1 — Inbound webhook auth.** HMAC-signed (shared secret with Jimi). Signature header `X-Jimi-Signature`. Signature mismatch → 401, log, alert.
|
||||
- **S2 — Internal API auth.** All API endpoints require a valid JWT — read and write, dashboards and admin pages. No anonymous access path on the new platform. Three scopes: `read:fleet` (live, history, trips, parking, alarms, shifts, behaviour read), `write:ops` (driver assignment changes, alarm acks, service-log entries, odometer review actions), `admin:fleet` (device lifecycle transitions, audit access, system configuration). 15-min access tokens, 30-day refresh tokens. Refresh-token revocation list.
|
||||
- **S3 — Database access.** Application connects via `pgbouncer` with a per-role user (`app_writer`, `app_reader`, `migrations`). Grafana uses `reporting_reader`. No direct DB access from outside the VPS.
|
||||
- **S4 — Secrets.** `.env` in dev, Coolify env vars in prod. Never in git. `.env.example` documents every key.
|
||||
- **S5 — TLS.** All public endpoints behind Traefik with Let's Encrypt. HSTS enabled. No plain HTTP.
|
||||
- **S6 — Rate limiting.** slowapi: dashboards 60 req/min/IP, push 1000 req/min total.
|
||||
- **S7 — Audit.** Lifecycle transitions, dispatch decisions, alarm acks, and admin actions all written to audit tables with actor, time, payload.
|
||||
|
||||
### 10.3 Performance and scale
|
||||
|
||||
- **P1 — Target scale.** Current: ~180 devices, ~5,000 alarms/day, ~5,000 position fixes/day from push, ~120k position fixes/day from polling. Design target: 5x current (~900 devices, ~600k fixes/day) without architectural change. Beyond 5x: a discussion about read replicas and partition strategy.
|
||||
- **P2 — Database sizing.** TimescaleDB-HA on the existing VPS class (8 vCPU / 32 GB / NVMe). Hypertable chunk interval: 1 day for events, 7 days for state hypertables.
|
||||
- **P3 — Cache.** No external cache (Redis) in v1. Postgres query plan + CAGGs cover the read patterns. Re-evaluate at 3x scale.
|
||||
|
||||
### 10.4 Observability
|
||||
|
||||
- **O1 — Structured logs.** JSON to stdout, aggregated by Coolify, retained 30 days.
|
||||
- **O2 — Metrics.** Postgres views in `slo.*`. Grafana renders. No Prometheus in v1.
|
||||
- **O3 — Alerting.** Grafana alerts to Slack (channel TBD) on SLO breach. PagerDuty-style on-call: deferred (no on-call rotation today).
|
||||
- **O4 — Tracing.** OpenTelemetry-ready (FastAPI middleware) but no collector in v1. Trace IDs propagated in logs.
|
||||
|
||||
### 10.5 Internationalisation and locale
|
||||
|
||||
- **I1 — Time zone.** All UI in EAT (UTC+3). All storage in UTC. Conversion at the serve layer.
|
||||
- **I2 — Language.** English only.
|
||||
- **I3 — Units.** Distance in km, speed in km/h, temperature in °C, fuel in litres.
|
||||
|
||||
### 10.6 Accessibility
|
||||
|
||||
- **A1 — Contrast.** All map overlays, KPI tiles, and status badges meet WCAG AA contrast against their background.
|
||||
- **A2 — Keyboard navigation.** All filter forms and table actions keyboard-accessible.
|
||||
- **A3 — Screen reader.** KPI tiles have ARIA labels. Map markers have alt text via popup.
|
||||
|
||||
### 10.7 Compatibility
|
||||
|
||||
- **C1 — Browser support.** Latest Chrome, Edge, Safari, Firefox (n and n-1). No IE.
|
||||
- **C2 — Mobile.** Responsive layout works at ≥375px width. Native mobile is out of scope.
|
||||
- **C3 — Print.** Trip reports and parking reports print sensibly (CSS print stylesheet).
|
||||
|
||||
---
|
||||
|
||||
## 11. Risks and mitigations
|
||||
|
||||
| # | Risk | Likelihood | Impact | Mitigation |
|
||||
|---|---|---|---|---|
|
||||
| R1 | Jimicloud changes API contract during rebuild | Medium | High | Contract checker built in Phase 1; daily runs catch drift within 24h. |
|
||||
| R2 | Push mirror in legacy stack drops events during parallel run | Low | High | Compare `events.raw` counts every 6h during mirror window; reconcile via polled catch-up. |
|
||||
| R3 | Team capacity stretched across product phases and feature requests | High | Medium | Phase scope is firm; new feature requests go to Phase 4 stretch backlog or to the routing companion project. Stakeholder agreement up front. |
|
||||
| R4 | Cutover discovers a feature parity gap | Medium | Medium | Side-by-side comparison in week 7–8 with ops manager sign-off before DNS cut. |
|
||||
| R5 | Coolify deploy mechanism doesn't support clean image-tag rollback | Low | Medium | Validated in Phase 1 week 1 (CI/CD smoke test before any business logic). |
|
||||
| R6 | Gitea or registry outage blocks production deploys | Low | Low | Last-known-good image cached on the VPS; `docker run` can be invoked directly in emergency. |
|
||||
| R7 | Data migration of 90-day backfill takes longer than a weekend | Medium | Low | Backfill is non-blocking; old platform stays canonical until backfill complete. |
|
||||
| R8 | A dependency vendor sunset (TimescaleDB, MapLibre, etc.) during build | Very low | Medium | All chosen tech is OSS with multi-year support cycles; no SaaS lock-in. |
|
||||
| R9 | Driver roster drifts out of date — vehicles move with no assigned driver, or with a driver on leave who didn't get reassigned | Medium | Medium | P3 dashboard surfaces "unassigned-but-moving" vehicles as a daily worklist; behaviour events for unattributed shifts roll up to a `_unassigned` bucket so the data isn't lost, just not personally attributed. |
|
||||
| R10 | Behaviour score weights are wrong — top scorers and bottom scorers don't match ops manager's intuition | Medium | Low | Weights are configurable rows in `domain.behaviour_weights`, not constants; blinded review in P4 success criteria catches the mismatch and tunes weights without code change. |
|
||||
| R11 | Odometer readings drift from actual vehicle odometer (Jimi distance is GPS-derived, not OBD-read) so service-due math is wrong | Low | Medium | **Variance-gated reset at every fuel submission** (F3.21): driver-submitted odometer at fillup resets the service-due clock when within 5% of the GPS-derived figure (`confidence = high`); readings outside that envelope are quarantined to `ops.admin_alerts` and do **not** mutate `state.service_status` until a fleet admin reviews the photo and either confirms or rejects (F3.25). The reading is preserved in the audit trail either way. Manual admin correction remains available as a backstop. |
|
||||
| R12 | Privacy concerns around driver shift locations — staff side or union side | Low | Medium | D5 access logging + admin-only personal view + aggregate views without scope gate. Disclose to drivers what is captured (transparency builds trust). |
|
||||
| R13 | Companion routing project starts late and dispatchers continue without route-suggestion tooling | Medium | Low | This PRD does not own the timeline of the routing project. Dispatchers retain phone/WhatsApp coordination (status quo) until that project ships. Live position + historical playback in this platform remain available throughout. |
|
||||
| R14 | HR extract schema changes (field rename, type change, new status value) and our sync starts producing garbage | Medium | Medium | Contract checker pattern applies: a daily validation job runs the HR extract through the Pydantic model in dry-run mode and alerts on drift. Parser is version-pinned; new HR schema = bumped parser version, no silent data loss. |
|
||||
| R15 | Driver submits fraudulent fuel data — wrong odometer, inflated litres, fake station | Medium | Medium | Photo is retained in `state.fuel_log.photo_ref` for human spot-audit. Submitted odometer is cross-checked against GPS-derived figure (F3.21 confidence flag); divergence > 5% flags review. Submitter phone is cross-checked against assigned-driver phone at submission time; mismatch flags review. Monthly anomaly batch (F4.10) surfaces patterns. Detection is not bulletproof but raises the cost of fraud meaningfully. |
|
||||
| R16 | Companion WhatsApp comms project starts late and dispatcher-to-driver coordination remains on personal phones | Medium | Low | Same posture as R13 (routing). This PRD does not own that project's timeline. Existing voice/WhatsApp coordination continues unaffected. |
|
||||
| R17 | A heavy historical query, OOM in a single worker, or runaway scheduled job takes down live tracking — single-process fate sharing | Medium | High | Architecture deploys three container roles from one image: `platform-gateway` (push receivers + dashboard reads), `platform-worker` (parsers + projectors + geocoder), `platform-cron` (scheduled polls + contract checks). Failures are isolated to the role that hits them; the gateway keeps returning 200 OK to Jimi while a worker is restarting. Verified by F1.x cutover criteria (kill a worker container during normal traffic, confirm push ingest continues). |
|
||||
| R18 | Stage-to-stage lag (parser polling, projector polling) consumes the fix-freshness SLO budget before the data even gets to the dashboard | Low | Medium | Parser and projector wake on `LISTEN/NOTIFY` from the upstream stage's write, not on fixed polling intervals. Internal stage-to-stage lag is bounded by NOTIFY propagation (typically <100 ms), so the 90 s freshness SLO budget is spent on Jimi transport and dashboard polling, not on our scheduler. Timer-based sweeps are retained as a fallback for missed NOTIFY (e.g. connection blip) but are no-ops under normal operation. |
|
||||
| R19 | Public-read dashboards leak driver shift home-area locations, plate-to-customer mappings, and HR-derived driver identity to anyone with the URL | Medium | High | Public-read posture not preserved on the rebuilt platform. JWT required on every endpoint from Phase 1, including live map reads. Three scopes (`read:fleet`, `write:ops`, `admin:fleet`); driver shift views require explicit access logging (D5). Q1 closed in favor of authenticated access. |
|
||||
|
||||
---
|
||||
|
||||
## 12. Dependencies and engineering mapping
|
||||
|
||||
### 12.1 External dependencies
|
||||
|
||||
- **Jimicloud / Tracksolid Pro API access** — existing, multi-account credentials in `.env`.
|
||||
- **Nominatim** (self-hosted or public) for reverse geocoding.
|
||||
- **Mapbox** (fallback geocoder, basemap option) — token needed.
|
||||
- **rustfs** for static assets and backups.
|
||||
- **Let's Encrypt** for TLS.
|
||||
|
||||
OSM road topology (Geofabrik extracts) is **not a dependency of this platform**. It belongs to the routing companion project. If the companion project chooses pgRouting and wants to share the same database, that conversation happens at companion-project planning time and may involve adding a `geo` schema then.
|
||||
|
||||
### 12.2 Internal dependencies
|
||||
|
||||
- VPS capacity headroom for parallel run (legacy + new for ~6 weeks). Current VPS class should suffice; confirm at Phase 1 kickoff.
|
||||
- Stakeholder availability for Phase 2 sign-off (ops manager) and Phase 3 sign-off (ops manager + fleet admin).
|
||||
- Open question resolution (see §15) before relevant phases.
|
||||
|
||||
### 12.3 Mapping to engineering phases (architecture doc)
|
||||
|
||||
| PRD phase | Engineering phases (from architecture doc) | Notes |
|
||||
|---|---|---|
|
||||
| P1 — Foundation + live | Eng A (Foundation), Eng B (Event log + parser), Eng C (Projectors, subset: live_positions) | Live tracking is the smallest end-to-end slice. Eng A ships the three container roles (gateway / worker / cron) from one image; subsequent phases extend the worker and cron roles without changing the gateway contract. |
|
||||
| P2 — Historical + trips | Eng C (Projectors, full), Eng D (Serve layer) | Historical adds more projectors + more SQL functions |
|
||||
| P3 — Ops tooling + cutover | Eng E (Dashboards, full) + Eng G (Cutover) + driver-roster + service-due additions | Ops UI completes the dashboard set; driver assignment + service tracking added to the admin surface; cutover happens at end of P3 |
|
||||
| P4 — Intelligence + driver KPIs | Driver-shift projector + behaviour-event projector + behaviour-score function + cost/anomaly/executive views | Depends on P3 driver roster. Architecture doc's Eng F (Routing) is **dropped** from this rebuild's scope. |
|
||||
|
||||
The architecture document's Phase F (Routing — OSM loader, map-match projector, segment-speed CAGG, route endpoint, pgRouting) is removed from this PRD's commit. If the routing companion project chooses to build on the same database, the architecture doc remains a useful reference for that project — but it is not work this PRD funds or schedules.
|
||||
|
||||
---
|
||||
|
||||
## 13. Stakeholders and sign-off
|
||||
|
||||
| Role | Name | Sign-off required for |
|
||||
|---|---|---|
|
||||
| Product sponsor | TBD | Overall PRD, phase scopes, budget |
|
||||
| Engineering lead | TBD | Architecture, capacity, schedule |
|
||||
| Ops manager | TBD | P2 + P3 feature parity, cutover go/no-go |
|
||||
| Fleet administrator | TBD | P3 admin UI, lifecycle model |
|
||||
| Dispatcher representative | TBD | P1 live UX |
|
||||
| HR / People lead | TBD | P4 driver KPI scope (shift reporting + behaviour scoring), Q14 driver transparency communication |
|
||||
| Workshop / Maintenance lead | TBD | P3 service-due dashboard + service interval policy (Q10) |
|
||||
| Finance | TBD | P4 cost-allocation model |
|
||||
| Fuel microservice owner | TBD | P3 fuel-submission ingest (F3.19–F3.21), payload contract (Q17) |
|
||||
| HR systems owner | TBD | P3 HR sync (F3.22–F3.24), extract source decisions (Q19) |
|
||||
|
||||
A sign-off is "I have read the requirements for the phases that affect my work and I agree they describe what I need." It is not "I will not change my mind later." Change is expected and tracked.
|
||||
|
||||
---
|
||||
|
||||
## 14. Open questions
|
||||
|
||||
These are decisions needed before the affected phase starts.
|
||||
|
||||
| # | Question | Needed before | Owner | Default if undecided |
|
||||
|---|---|---|---|---|
|
||||
| Q1 | ~~Authentication posture for dashboards: public-read (today) or JWT-gated?~~ **Decided.** All endpoints require JWT from day one, including live dashboard reads. Public-read not preserved. | **Decided** | Sponsor + Engineering | **JWT-gated on all endpoints.** Driver-shift home-area data, HR-sourced driver identity, plate-to-customer mappings — none of these are public-read-safe. Reflected in §3.3 principle 9, F1.8, F1.12, NFR1.6, S2, R19. |
|
||||
| Q2 | Confirmed SLO numerical targets (fix freshness, trip lag) | P1 (freshness), P2 (trip lag) | Ops manager | 90 s / 600 s |
|
||||
| Q3 | Image registry: ghcr.io vs self-hosted registry.rahamafresh.com | P1 | Engineering | ghcr.io (lower ops burden) |
|
||||
| Q4 | Driver assignment model: primary driver per vehicle with substitute override for leave (recommended), or per-trip assignment? | P3 | Ops manager + Fleet admin | Primary + substitute with time-bounded ranges (per §F3.11). |
|
||||
| Q5 | Slack channel for SLO alerts | P3 | Sponsor | `#fleet-ops` (TBD existence) |
|
||||
| Q6 | What stays of n8n? Any genuine cross-system workflows worth keeping? | P3 | Engineering + Ops | Decommission entirely unless concrete workflow surfaces |
|
||||
| Q7 | Customer-facing tracking — confirmed out of scope or future phase? | P4 sign-off | Sponsor | Out of scope (PRD non-goal) |
|
||||
| Q8 | Data retention beyond 365 days for `events.raw` — archive or delete? | P3 (when first chunk ages out) | Sponsor + legal | Archive to rustfs as monthly parquet |
|
||||
| Q9 | Phase 4 scope details for cost-allocation and fuel/temperature priorities (driver KPIs are committed) | End of P3 | Sponsor + ops + finance | Confirmed at P3 wrap |
|
||||
| Q10 | Service interval policy: one default (5,000 km) or per vehicle class? Which vehicle classes exist and what is each class's interval? | P3 | Fleet admin + workshop | Default 5,000 km, with `service_interval_km_override` per vehicle to start, vehicle classes added when the classes are known. |
|
||||
| Q11 | Minimum break between shifts (the gap that distinguishes "shift end" from "lunch break"). Default 6 h — confirm against actual roster patterns. | P4 | Ops manager + HR | 6 h, configurable per cost centre if depots differ. |
|
||||
| Q12 | Treatment of unassigned-vehicle movement: bucket under `_unassigned` driver, surface as alert, both? Privacy implication: tracking unattributed driving still records location. | P4 | Ops manager + HR + sponsor | Bucket under `_unassigned`, surface daily worklist via P3 dashboard. |
|
||||
| Q13 | Behaviour score weights — do we accept the proposed defaults (speeding 35, harsh_brake 25, harsh_accel 25, harsh_corner 15) or does ops have a different priority? | P4 | Ops manager | Proposed defaults; tunable post-launch via blinded review. |
|
||||
| Q14 | Driver transparency communication — do we inform drivers what is captured (shift locations, behaviour events) before P4 launches? | P4 launch | Sponsor + HR | Yes; transparency reduces R12 and is the right thing to do regardless. |
|
||||
| Q15 | Companion routing project — kick off in parallel with this rebuild, or sequence after cutover? Affects whether dispatchers see suggest-route tooling in 2026 or 2027. | P3 sign-off | Sponsor + Engineering | Sequence after cutover; pre-empts capacity contention during P1–P3. |
|
||||
| Q16 | Should this platform retain a thin dispatch-audit log (`ops.tickets` or similar) for "who decided to send vehicle X where", or is even that the routing project's responsibility? | P3 | Sponsor + Ops + Companion-project lead | Drop entirely from this PRD; if companion project doesn't ship it, we revisit. |
|
||||
| Q17 | Fuel microservice payload contract — exact field names, types, photo storage convention. The microservice is in production and continues to evolve; we adapt to it and version-pin the parser. | P3 | Engineering + Fuel microservice owner | We adopt the microservice's current schema as `parser_version = 1`. Future microservice changes bump parser version; both versions remain replayable from `events.raw`. |
|
||||
| Q18 | HR ingest pattern — sync-via-event-log (recommended) or FDW + materialised view? | **Decided** | Sponsor + Engineering | **Sync-via-event-log with 3-hour cadence + force-refresh action.** Same `events.raw → parsed → projected → state` pipeline as Jimi pushes and fuel submissions. Documented in F3.22–F3.24. |
|
||||
| Q19 | HR extract source details — which database / API endpoint, which table or view, credentials, network reachability from the platform's VPS. | P3 | HR systems owner + Engineering | TBD — needs HR systems owner conversation. Default if HR cannot expose a stable extract: nightly CSV drop to rustfs, ingested by a file-watcher worker following the same `events.raw` pattern. |
|
||||
| Q20 | Companion WhatsApp comms project — scope, timeline, tech choice (Meta API vs Evolution + Chatwoot). | After P3 sign-off | Sponsor + comms-project lead | Sequence after cutover (same logic as Q15 for routing). Tech choice belongs to that project, not this one. |
|
||||
|
||||
---
|
||||
|
||||
## 15. Glossary
|
||||
|
||||
- **ACC_ON / ACC_OFF** — Ignition-on and ignition-off signals from the tracker, sent when the vehicle's accessory power is turned on or off. The day's first ACC_ON marks the driver's shift start; the last ACC_OFF marks shift end.
|
||||
- **Active device** — A device whose lifecycle state is `active` (provisioned + has reported within retention threshold + not suspended or decommissioned).
|
||||
- **Behaviour score** — A 0–100 per-driver composite of speeding, overspeed-vs-segment, harsh-acceleration, harsh-braking, and harsh-cornering events, normalised per 100 km driven. 100 = clean driving in the window; 0 = a harsh event on every km.
|
||||
- **CAGG** — TimescaleDB Continuous Aggregate; a materialised view kept fresh by the database.
|
||||
- **Cross-feed** — The mechanism by which an alarm push's latent position is also written to `live_positions` (introduced as FIX-M21 in the current system; in the rebuild it is automatic via event sourcing).
|
||||
- **Dedup rule** — The logic for selecting which device's fix represents a vehicle when multiple devices (tracker + camera) are on one plate.
|
||||
- **Driver assignment** — A time-ranged record stating which driver was on which vehicle in which role (primary or substitute) for which date range. Substitute assignments override the primary for their range.
|
||||
- **Driver shift** — A derived period from first ACC_ON of a day to last ACC_OFF, attributed to whichever driver was assigned (primary or substitute) to the vehicle at shift start. Carries geocoded start and end locations.
|
||||
- **Fuel submission** — A driver-originated record from the existing WhatsApp fuelling microservice containing odometer reading, litres added, fuel station, time, and photo. Ingested via `POST /push/fuel`, written to `events.raw` (`source = 'whatsapp_fuel'`), projected to `state.fuel_log` and `state.odometer_readings`.
|
||||
- **HR extract sync** — A scheduled 3-hour pull (with admin force-refresh) of the HR system's driver roster into `events.raw` (`source = 'hr_extract'`), projected to `domain.drivers`. The platform's source of truth for driver identity, phone, and employment status. Same event-sourcing pipeline as Jimi pushes and fuel submissions.
|
||||
- **Lifecycle state** — A device's administrative state machine (`provisioned`, `active`, `suspended`, `decommissioned`).
|
||||
- **Operational state** — A vehicle's derived current state (`moving`, `parked`, `offline`, `unknown`).
|
||||
- **Projector** — A worker that reads `events.parsed` and writes to a `state.*` table.
|
||||
- **Odometer source** — Where a km reading came from. `gps_derived` (summed from position fixes — always available but approximate), `driver_submission` (from a fuel submission — accurate but periodic), or `admin_correction` (manually entered). `serve.fn_odometer_status` exposes both `gps_derived` and `driver_submission` side-by-side with a divergence + confidence indicator.
|
||||
- **Service interval** — The km-between-services policy for a vehicle. Default 5,000 km, per-vehicle-class override allowed (e.g. heavy trucks at 10,000 km), per-vehicle override allowed.
|
||||
- **Service status** — A vehicle's position in its service cycle: `ok` (comfortably within interval), `due_soon` (within 500 km of next service), `overdue` (km_since_last_service ≥ interval_km).
|
||||
- **SLO** — Service Level Objective; an explicit numerical commitment (e.g. "95% of active devices have a fix within 90 s").
|
||||
- **Stale IMEI** — A device whose latest fix is older than the freshness threshold (default 30 min for rescue, 90 s for SLO).
|
||||
- **Substitute driver** — A driver temporarily assigned to a vehicle while the primary driver is on leave / off-roster. Substitute assignments are time-bounded; when the range ends, attribution returns to the primary automatically.
|
||||
- **Unattributed shift** — A shift on a vehicle that had no driver assigned (primary or substitute) at shift start. Recorded under the `_unassigned` driver sentinel for fleet-level analysis; surfaced as a daily worklist for fleet admin.
|
||||
- **Zero island** — A latitude/longitude of exactly (0,0), which is in the Gulf of Guinea and indicates a sensor error, never a real fix.
|
||||
|
||||
---
|
||||
|
||||
## 16. Companion documents
|
||||
|
||||
**Companion projects (not yet authored):**
|
||||
|
||||
- *Routing + ServiceNow ticket dispatch PRD* — to be written. Scope: ServiceNow inbound ticket integration, vehicle-to-ticket allocation policy, suggest-route engine (pgRouting / OSRM / Valhalla / third-party — choice deferred to that project), dispatcher route-suggestion UX, deviation alerts, ETA prediction, multi-stop optimisation. Depends on this platform for live vehicle position, driver assignment, and (optionally) segment-speed observations.
|
||||
- *WhatsApp comms PRD* — to be written. Scope: dispatcher-to-driver broadcasts, inbound conversation handling, service-reminder templates, alarm notifications, opt-in/opt-out workflow, choice between Meta WhatsApp Business API and Evolution + Chatwoot, conversation-state UX for dispatchers. Depends on this platform for `domain.drivers` (identity, phone) and emits its own conversation log. **Does not** include the fuel/refuelling microservice, which is in-scope of this PRD as an ingest source.
|
||||
|
||||
**External services this PRD integrates with (existing, not built by this project):**
|
||||
|
||||
- *WhatsApp fuelling microservice* — already in production. Driver sends a WhatsApp message with odometer photo, litres, station, time; the microservice parses and publishes a structured payload. This PRD's `POST /push/fuel` endpoint consumes that payload (F3.19). The microservice's WhatsApp interaction model is owned by the microservice; this PRD owns only the downstream ingest and projection. Payload contract evolves per Q17.
|
||||
- *HR system* — existing system of record for driver identity and employment status. This PRD reads from it via the HR sync worker (F3.22) on a 3-hour cadence. This PRD does not write to it.
|
||||
|
||||
---
|
||||
51
Dockerfile
51
Dockerfile
|
|
@ -1,51 +0,0 @@
|
|||
# syntax=docker/dockerfile:1.7
|
||||
|
||||
ARG PYTHON_VERSION=3.12-slim
|
||||
|
||||
FROM python:${PYTHON_VERSION} AS builder
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends build-essential libpq-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY pyproject.toml ./
|
||||
RUN python -m venv /opt/venv \
|
||||
&& /opt/venv/bin/pip install --upgrade pip \
|
||||
&& /opt/venv/bin/pip install .
|
||||
|
||||
FROM python:${PYTHON_VERSION} AS runtime
|
||||
|
||||
ARG GIT_SHA=unknown
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PATH="/opt/venv/bin:${PATH}" \
|
||||
APP_GIT_SHA=${GIT_SHA}
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends libpq5 curl \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& useradd --create-home --shell /bin/sh --uid 1000 app
|
||||
|
||||
COPY --from=builder /opt/venv /opt/venv
|
||||
|
||||
WORKDIR /srv/app
|
||||
COPY app/ ./app/
|
||||
COPY scripts/entrypoint.sh /usr/local/bin/entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/entrypoint.sh \
|
||||
&& chown -R app:app /srv/app
|
||||
|
||||
USER app
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
HEALTHCHECK --interval=15s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD curl -fsS http://127.0.0.1:8000/health/${APP_ROLE:-gateway} || exit 1
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
|
||||
59
README.md
59
README.md
|
|
@ -1,61 +1,2 @@
|
|||
# fleet-platform
|
||||
|
||||
Greenfield rebuild of the Rahamafresh fleet telematics platform.
|
||||
|
||||
One FastAPI codebase, one Docker image, three container roles (gateway / worker / cron) from the same image. Event-sourced ingest into TimescaleDB + PostGIS. JWT-mandatory reads. Image-tag deploys via Coolify from a self-hosted Forgejo registry.
|
||||
|
||||
See:
|
||||
- `260522_fleet_platform_prd_final.md` — product spec (P1–P4)
|
||||
- `260522_fleet_platform_architecture_final.md` — engineering design (phases A–G)
|
||||
- `~/.claude/plans/you-are-an-experienced-majestic-planet.md` — approved Phase 1 plan
|
||||
|
||||
## Quick start (dev)
|
||||
|
||||
```sh
|
||||
cp .env.example .env
|
||||
# edit .env: at minimum set POSTGRES_PASSWORD, JWT_SECRET, TRACKSOLID_PUSH_TOKEN
|
||||
docker compose -f docker-compose.dev.yml up
|
||||
```
|
||||
|
||||
Health checks:
|
||||
|
||||
```sh
|
||||
curl http://localhost:8001/health/gateway
|
||||
curl http://localhost:8001/health/worker # via worker container's exposed port
|
||||
curl http://localhost:8001/health/cron
|
||||
```
|
||||
|
||||
## Layout
|
||||
|
||||
```
|
||||
app/ one FastAPI codebase
|
||||
entrypoints/ three role entrypoints from the same image
|
||||
gateway.py /push/jimi/*, /api/views/*, /api/auth/token
|
||||
worker.py LISTEN parser + projectors
|
||||
cron.py polling + SLO + contract checker
|
||||
models/ Pydantic models (Jimi contracts, view shapes)
|
||||
parsers/ one function per msg_type / poll endpoint
|
||||
projectors/ single-writer projectors per state table
|
||||
db/migrations/ dbmate forward-only SQL
|
||||
scripts/ operational utilities (parity_check, entrypoint.sh)
|
||||
tests/ pytest
|
||||
web/ fleet-core.js + index-live.html + login.html
|
||||
```
|
||||
|
||||
## Container roles
|
||||
|
||||
The same image runs in three roles, selected by `APP_ROLE`:
|
||||
|
||||
| Role | Workload |
|
||||
|---|---|
|
||||
| `gateway` | HTTP: Tracksolid push receivers + dashboard read API + JWT issuance |
|
||||
| `worker` | LISTEN events_raw_new / events_parsed_new → parser → projectors |
|
||||
| `cron` | APScheduler: polled ingest (60s/10m), SLO measurement, contract checker |
|
||||
|
||||
Failure isolation is the point: a heavy report in `worker` does not stall `gateway`.
|
||||
|
||||
## Deploy
|
||||
|
||||
CI (Forgejo Actions) builds on push to `main`, tags `<registry>/fleet-platform:<sha>` and `:latest`. Coolify deploys by tag. Rollback is `coolify deploy :<prev-sha>`.
|
||||
|
||||
`dbmate up` runs on `platform-worker` startup before the worker serves traffic; `gateway` and `cron` wait on a startup probe that confirms migration completion.
|
||||
|
|
|
|||
134
app/auth.py
134
app/auth.py
|
|
@ -1,134 +0,0 @@
|
|||
import hashlib
|
||||
import secrets
|
||||
from collections.abc import Awaitable, Callable
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from typing import Annotated, Any
|
||||
|
||||
import bcrypt
|
||||
import jwt
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import OAuth2PasswordBearer
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.config import get_settings
|
||||
from app.db import get_pool
|
||||
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
|
||||
|
||||
ACCESS = "access"
|
||||
REFRESH = "refresh"
|
||||
|
||||
|
||||
class TokenPair(BaseModel):
|
||||
access_token: str
|
||||
refresh_token: str
|
||||
token_type: str = "bearer"
|
||||
expires_in: int
|
||||
|
||||
|
||||
class AuthAccount(BaseModel):
|
||||
account_id: int
|
||||
username: str
|
||||
scopes: list[str]
|
||||
|
||||
|
||||
def hash_password(plain: str) -> str:
|
||||
return bcrypt.hashpw(plain.encode("utf-8"), bcrypt.gensalt(rounds=12)).decode("utf-8")
|
||||
|
||||
|
||||
def verify_password(plain: str, hashed: str) -> bool:
|
||||
try:
|
||||
return bcrypt.checkpw(plain.encode("utf-8"), hashed.encode("utf-8"))
|
||||
except (ValueError, TypeError):
|
||||
return False
|
||||
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
def issue_access_token(account_id: int, scopes: list[str]) -> tuple[str, int]:
|
||||
settings = get_settings()
|
||||
ttl_seconds = settings.jwt_access_ttl_min * 60
|
||||
payload: dict[str, Any] = {
|
||||
"sub": str(account_id),
|
||||
"scopes": scopes,
|
||||
"typ": ACCESS,
|
||||
"iat": _now(),
|
||||
"exp": _now() + timedelta(seconds=ttl_seconds),
|
||||
}
|
||||
token = jwt.encode(payload, settings.jwt_secret, algorithm="HS256")
|
||||
return token, ttl_seconds
|
||||
|
||||
|
||||
def issue_refresh_token(account_id: int) -> tuple[str, datetime, str]:
|
||||
"""Returns (opaque_token, expires_at, token_hash). Persist only the hash."""
|
||||
settings = get_settings()
|
||||
raw = secrets.token_urlsafe(48)
|
||||
expires_at = _now() + timedelta(days=settings.jwt_refresh_ttl_days)
|
||||
token_hash = hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||||
_ = account_id
|
||||
return raw, expires_at, token_hash
|
||||
|
||||
|
||||
def decode_access_token(token: str) -> dict[str, Any]:
|
||||
settings = get_settings()
|
||||
try:
|
||||
claims: dict[str, Any] = jwt.decode(
|
||||
token, settings.jwt_secret, algorithms=["HS256"]
|
||||
)
|
||||
except jwt.PyJWTError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"invalid token: {exc.__class__.__name__}",
|
||||
) from exc
|
||||
if claims.get("typ") != ACCESS:
|
||||
raise HTTPException(status_code=401, detail="not an access token")
|
||||
return claims
|
||||
|
||||
|
||||
async def fetch_account(username: str) -> tuple[int, str, list[str]] | None:
|
||||
pool = await get_pool()
|
||||
async with pool.connection() as conn, conn.cursor() as cur:
|
||||
await cur.execute(
|
||||
"SELECT account_id, password_hash, scopes "
|
||||
"FROM auth.accounts "
|
||||
"WHERE username = %s AND is_active = true",
|
||||
(username,),
|
||||
)
|
||||
row = await cur.fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return int(row[0]), str(row[1]), list(row[2])
|
||||
|
||||
|
||||
async def store_refresh_token(account_id: int, token_hash: str, expires_at: datetime) -> None:
|
||||
pool = await get_pool()
|
||||
async with pool.connection() as conn, conn.cursor() as cur:
|
||||
await cur.execute(
|
||||
"INSERT INTO auth.tokens (account_id, token_type, token_hash, expires_at) "
|
||||
"VALUES (%s, 'refresh', %s, %s)",
|
||||
(account_id, token_hash, expires_at),
|
||||
)
|
||||
|
||||
|
||||
async def current_account(
|
||||
token: Annotated[str, Depends(oauth2_scheme)],
|
||||
) -> AuthAccount:
|
||||
claims = decode_access_token(token)
|
||||
return AuthAccount(
|
||||
account_id=int(claims["sub"]),
|
||||
username="",
|
||||
scopes=list(claims.get("scopes", [])),
|
||||
)
|
||||
|
||||
|
||||
def require_scope(scope: str) -> Callable[[AuthAccount], Awaitable[AuthAccount]]:
|
||||
async def _checker(
|
||||
account: Annotated[AuthAccount, Depends(current_account)],
|
||||
) -> AuthAccount:
|
||||
if scope not in account.scopes and "admin:fleet" not in account.scopes:
|
||||
raise HTTPException(status_code=403, detail=f"missing scope: {scope}")
|
||||
return account
|
||||
|
||||
return _checker
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
from functools import lru_cache
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
|
||||
|
||||
database_url: str = Field(alias="DATABASE_URL")
|
||||
|
||||
jwt_secret: str = Field(alias="JWT_SECRET")
|
||||
jwt_access_ttl_min: int = Field(default=15, alias="JWT_ACCESS_TTL_MIN")
|
||||
jwt_refresh_ttl_days: int = Field(default=30, alias="JWT_REFRESH_TTL_DAYS")
|
||||
|
||||
tracksolid_push_token: str = Field(default="", alias="TRACKSOLID_PUSH_TOKEN")
|
||||
tracksolid_api_base_url: str = Field(default="", alias="TRACKSOLID_API_BASE_URL")
|
||||
|
||||
ntfy_base_url: str = Field(default="", alias="NTFY_BASE_URL")
|
||||
ntfy_topic: str = Field(default="fleet-slo-breach", alias="NTFY_TOPIC")
|
||||
ntfy_token: str = Field(default="", alias="NTFY_TOKEN")
|
||||
|
||||
app_mode: Literal["dev", "prod"] = Field(default="prod", alias="APP_MODE")
|
||||
app_role: Literal["gateway", "worker", "cron"] = Field(default="gateway", alias="APP_ROLE")
|
||||
app_log_level: str = Field(default="INFO", alias="APP_LOG_LEVEL")
|
||||
app_git_sha: str = Field(default="unknown", alias="APP_GIT_SHA")
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_settings() -> Settings:
|
||||
return Settings()
|
||||
36
app/db.py
36
app/db.py
|
|
@ -1,36 +0,0 @@
|
|||
from typing import cast
|
||||
|
||||
from psycopg_pool import AsyncConnectionPool
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
_pool: AsyncConnectionPool | None = None
|
||||
|
||||
|
||||
async def get_pool() -> AsyncConnectionPool:
|
||||
global _pool # noqa: PLW0603 — lazy singleton, by design
|
||||
if _pool is None:
|
||||
settings = get_settings()
|
||||
_pool = AsyncConnectionPool(
|
||||
conninfo=settings.database_url,
|
||||
min_size=1,
|
||||
max_size=10,
|
||||
open=False,
|
||||
)
|
||||
await _pool.open()
|
||||
return _pool
|
||||
|
||||
|
||||
async def close_pool() -> None:
|
||||
global _pool # noqa: PLW0603 — lazy singleton, by design
|
||||
if _pool is not None:
|
||||
await _pool.close()
|
||||
_pool = None
|
||||
|
||||
|
||||
async def check_db() -> bool:
|
||||
pool = await get_pool()
|
||||
async with pool.connection() as conn, conn.cursor() as cur:
|
||||
await cur.execute("SELECT 1")
|
||||
row = await cur.fetchone()
|
||||
return cast(tuple[int], row)[0] == 1
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
"""Cron entrypoint.
|
||||
|
||||
Runs as a FastAPI app (for /health/cron) with APScheduler spawning the
|
||||
time-triggered jobs. P1 jobs are added in week 2 task #8 (polling) and week 3
|
||||
tasks #12 (SLO measurement) and #13 (contract checker).
|
||||
"""
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import structlog
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.config import get_settings
|
||||
from app.db import close_pool, get_pool
|
||||
from app.health import router as health_router
|
||||
from app.logging_setup import configure_logging
|
||||
|
||||
log = structlog.get_logger("cron")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI) -> AsyncIterator[None]:
|
||||
configure_logging()
|
||||
settings = get_settings()
|
||||
await get_pool()
|
||||
log.info("cron.starting", git_sha=settings.app_git_sha, mode=settings.app_mode)
|
||||
|
||||
scheduler = AsyncIOScheduler(timezone="UTC")
|
||||
scheduler.start()
|
||||
log.info("cron.scheduler_started")
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
scheduler.shutdown(wait=False)
|
||||
await close_pool()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="fleet-platform [cron]",
|
||||
version=get_settings().app_git_sha,
|
||||
lifespan=lifespan,
|
||||
)
|
||||
app.include_router(health_router)
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
from app.main import create_app
|
||||
|
||||
app = create_app(role="gateway")
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
"""Worker entrypoint.
|
||||
|
||||
Runs as a FastAPI app (so /health/worker is reachable) with two long-lived
|
||||
background tasks spawned in lifespan:
|
||||
|
||||
- parser: LISTEN events_raw_new → drain events.raw → events.parsed
|
||||
- projector: LISTEN events_parsed_new → drain events.parsed → state.live_positions
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import structlog
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.config import get_settings
|
||||
from app.db import close_pool, get_pool
|
||||
from app.health import router as health_router
|
||||
from app.logging_setup import configure_logging
|
||||
from app.projectors import live_positions
|
||||
from app.workers import parser
|
||||
from app.workers.listener import listen_forever
|
||||
|
||||
log = structlog.get_logger("worker")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI) -> AsyncIterator[None]:
|
||||
configure_logging()
|
||||
settings = get_settings()
|
||||
await get_pool()
|
||||
log.info("worker.starting", git_sha=settings.app_git_sha, mode=settings.app_mode)
|
||||
|
||||
parser_task = asyncio.create_task(
|
||||
listen_forever("events_raw_new", parser.drain),
|
||||
name="parser-listener",
|
||||
)
|
||||
projector_task = asyncio.create_task(
|
||||
listen_forever("events_parsed_new", live_positions.drain),
|
||||
name="live-positions-listener",
|
||||
)
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
for t in (parser_task, projector_task):
|
||||
t.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||
await t
|
||||
await close_pool()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="fleet-platform [worker]",
|
||||
version=get_settings().app_git_sha,
|
||||
lifespan=lifespan,
|
||||
)
|
||||
app.include_router(health_router)
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.config import get_settings
|
||||
from app.db import check_db
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
role: str
|
||||
db: str
|
||||
image_sha: str
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _build(role: str, db_ok: bool) -> HealthResponse:
|
||||
return HealthResponse(
|
||||
role=role,
|
||||
db="ok" if db_ok else "fail",
|
||||
image_sha=get_settings().app_git_sha,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/health/gateway")
|
||||
async def health_gateway() -> HealthResponse:
|
||||
return _build("gateway", await check_db())
|
||||
|
||||
|
||||
@router.get("/health/worker")
|
||||
async def health_worker() -> HealthResponse:
|
||||
return _build("worker", await check_db())
|
||||
|
||||
|
||||
@router.get("/health/cron")
|
||||
async def health_cron() -> HealthResponse:
|
||||
return _build("cron", await check_db())
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
import logging
|
||||
import sys
|
||||
|
||||
import structlog
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
|
||||
def configure_logging() -> None:
|
||||
settings = get_settings()
|
||||
level = getattr(logging, settings.app_log_level.upper(), logging.INFO)
|
||||
|
||||
logging.basicConfig(
|
||||
format="%(message)s",
|
||||
stream=sys.stdout,
|
||||
level=level,
|
||||
)
|
||||
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.contextvars.merge_contextvars,
|
||||
structlog.processors.add_log_level,
|
||||
structlog.processors.TimeStamper(fmt="iso", utc=True),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.JSONRenderer(),
|
||||
],
|
||||
wrapper_class=structlog.make_filtering_bound_logger(level),
|
||||
context_class=dict,
|
||||
logger_factory=structlog.PrintLoggerFactory(),
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
52
app/main.py
52
app/main.py
|
|
@ -1,52 +0,0 @@
|
|||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from slowapi.errors import RateLimitExceeded
|
||||
from slowapi.middleware import SlowAPIMiddleware
|
||||
|
||||
from app.config import get_settings
|
||||
from app.db import close_pool, get_pool
|
||||
from app.health import router as health_router
|
||||
from app.logging_setup import configure_logging
|
||||
from app.rate_limit import limiter
|
||||
from app.routers.auth import router as auth_router
|
||||
from app.routers.push import router as push_router
|
||||
from app.routers.views import router as views_router
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI) -> AsyncIterator[None]:
|
||||
configure_logging()
|
||||
await get_pool()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
await close_pool()
|
||||
|
||||
|
||||
def _rate_limit_handler(request: Request, exc: Exception) -> JSONResponse:
|
||||
_ = request, exc
|
||||
return JSONResponse(status_code=429, content={"detail": "rate limit exceeded"})
|
||||
|
||||
|
||||
def create_app(role: str) -> FastAPI:
|
||||
settings = get_settings()
|
||||
app = FastAPI(
|
||||
title=f"fleet-platform [{role}]",
|
||||
version=settings.app_git_sha,
|
||||
lifespan=lifespan,
|
||||
)
|
||||
app.state.limiter = limiter
|
||||
app.add_exception_handler(RateLimitExceeded, _rate_limit_handler)
|
||||
app.add_middleware(SlowAPIMiddleware)
|
||||
|
||||
app.include_router(health_router)
|
||||
|
||||
if role == "gateway":
|
||||
app.include_router(auth_router)
|
||||
app.include_router(push_router)
|
||||
app.include_router(views_router)
|
||||
|
||||
return app
|
||||
|
|
@ -1,138 +0,0 @@
|
|||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
|
||||
def _coerce_ts(value: Any) -> datetime | None: # noqa: PLR0911 — many wire formats
|
||||
"""Tracksolid uses both unix epoch (sec/ms) and ISO/BCD strings.
|
||||
|
||||
Returns a UTC datetime, or None when the value cannot be interpreted.
|
||||
"""
|
||||
if value is None or value == "":
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
n = int(value)
|
||||
if n > 10**12:
|
||||
n = n // 1000
|
||||
try:
|
||||
return datetime.fromtimestamp(n, tz=UTC)
|
||||
except (OverflowError, OSError, ValueError):
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
s = value.strip()
|
||||
if not s:
|
||||
return None
|
||||
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"):
|
||||
try:
|
||||
return datetime.strptime(s, fmt).replace(tzinfo=UTC)
|
||||
except ValueError:
|
||||
continue
|
||||
if s.isdigit() and len(s) in (12, 14):
|
||||
fmt = "%y%m%d%H%M%S" if len(s) == 12 else "%Y%m%d%H%M%S"
|
||||
try:
|
||||
return datetime.strptime(s, fmt).replace(tzinfo=UTC)
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
class _JimiBase(BaseModel):
|
||||
model_config = ConfigDict(extra="allow", populate_by_name=True)
|
||||
|
||||
|
||||
class JimiPushGps(_JimiBase):
|
||||
imei: str = Field(validation_alias="deviceImei")
|
||||
gps_time: datetime = Field(validation_alias="gpsTime")
|
||||
lat: float
|
||||
lng: float
|
||||
speed_kmh: float | None = Field(default=None, validation_alias="gpsSpeed")
|
||||
direction_deg: float | None = Field(default=None, validation_alias="direction")
|
||||
acc: int | str | None = None
|
||||
satellites: int | None = Field(default=None, validation_alias="satelliteNum")
|
||||
altitude_m: float | None = Field(default=None, validation_alias="altitude")
|
||||
post_type: int | None = Field(default=None, validation_alias="postType")
|
||||
|
||||
@field_validator("gps_time", mode="before")
|
||||
@classmethod
|
||||
def _parse_time(cls, v: Any) -> Any:
|
||||
parsed = _coerce_ts(v)
|
||||
if parsed is None:
|
||||
raise ValueError(f"unparseable gpsTime: {v!r}")
|
||||
return parsed
|
||||
|
||||
|
||||
class JimiPushAlarm(_JimiBase):
|
||||
imei: str
|
||||
alarm_type: str = Field(validation_alias="alarmType")
|
||||
alarm_name: str | None = Field(default=None, validation_alias="alarmName")
|
||||
alarm_time: datetime = Field(validation_alias="alarmTime")
|
||||
lat: float | None = None
|
||||
lng: float | None = None
|
||||
speed_kmh: float | None = Field(default=None, validation_alias="speed")
|
||||
device_name: str | None = Field(default=None, validation_alias="deviceName")
|
||||
|
||||
@field_validator("alarm_time", mode="before")
|
||||
@classmethod
|
||||
def _parse_time(cls, v: Any) -> Any:
|
||||
parsed = _coerce_ts(v)
|
||||
if parsed is None:
|
||||
raise ValueError(f"unparseable alarmTime: {v!r}")
|
||||
return parsed
|
||||
|
||||
@field_validator("imei", mode="before")
|
||||
@classmethod
|
||||
def _resolve_imei(cls, v: Any, info: Any) -> Any:
|
||||
return v
|
||||
|
||||
|
||||
class JimiPushHeartbeat(_JimiBase):
|
||||
imei: str = Field(validation_alias="deviceImei")
|
||||
gate_time: datetime = Field(validation_alias="gateTime")
|
||||
power_level: int | None = Field(default=None, validation_alias="powerLevel")
|
||||
gsm_signal: int | None = Field(default=None, validation_alias="gsmSign")
|
||||
acc: int | None = None
|
||||
power_status: int | None = Field(default=None, validation_alias="powerStatus")
|
||||
|
||||
@field_validator("gate_time", mode="before")
|
||||
@classmethod
|
||||
def _parse_time(cls, v: Any) -> Any:
|
||||
parsed = _coerce_ts(v)
|
||||
if parsed is None:
|
||||
raise ValueError(f"unparseable gateTime: {v!r}")
|
||||
return parsed
|
||||
|
||||
|
||||
class JimiPushEvent(_JimiBase):
|
||||
imei: str = Field(validation_alias="deviceImei")
|
||||
event_type: str = Field(validation_alias="type")
|
||||
event_time: datetime = Field(validation_alias="gateTime")
|
||||
timezone_str: str | None = Field(default=None, validation_alias="timezone")
|
||||
|
||||
@field_validator("event_time", mode="before")
|
||||
@classmethod
|
||||
def _parse_time(cls, v: Any) -> Any:
|
||||
parsed = _coerce_ts(v)
|
||||
if parsed is None:
|
||||
raise ValueError(f"unparseable gateTime: {v!r}")
|
||||
return parsed
|
||||
|
||||
|
||||
class JimiPollFix(_JimiBase):
|
||||
imei: str = Field(validation_alias="deviceImei")
|
||||
gps_time: datetime = Field(validation_alias="gpsTime")
|
||||
lat: float
|
||||
lng: float
|
||||
speed_kmh: float | None = Field(default=None, validation_alias="gpsSpeed")
|
||||
direction_deg: float | None = Field(default=None, validation_alias="direction")
|
||||
altitude_m: float | None = Field(default=None, validation_alias="altitude")
|
||||
satellites: int | None = Field(default=None, validation_alias="satelliteNum")
|
||||
acc: int | str | None = None
|
||||
|
||||
@field_validator("gps_time", mode="before")
|
||||
@classmethod
|
||||
def _parse_time(cls, v: Any) -> Any:
|
||||
parsed = _coerce_ts(v)
|
||||
if parsed is None:
|
||||
raise ValueError(f"unparseable gpsTime: {v!r}")
|
||||
return parsed
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class LiveSummary(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
total_active: int
|
||||
moving: int
|
||||
parked: int
|
||||
offline: int
|
||||
below_freshness_slo: int
|
||||
as_of: str
|
||||
|
||||
|
||||
class SloStatusEntry(BaseModel):
|
||||
threshold: float
|
||||
current: float | None = None
|
||||
status: str
|
||||
|
||||
|
||||
class LiveViewResponse(BaseModel):
|
||||
summary: LiveSummary
|
||||
geojson: dict[str, Any]
|
||||
slo_status: dict[str, SloStatusEntry]
|
||||
|
|
@ -1,241 +0,0 @@
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from app.models.jimi import (
|
||||
JimiPollFix,
|
||||
JimiPushAlarm,
|
||||
JimiPushEvent,
|
||||
JimiPushGps,
|
||||
JimiPushHeartbeat,
|
||||
)
|
||||
|
||||
PARSER_VERSION = 1
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class ParsedEvent:
|
||||
kind: str
|
||||
occurred_at: datetime
|
||||
imei: str
|
||||
account_id: str | None
|
||||
payload: dict[str, Any]
|
||||
|
||||
|
||||
class UnsupportedMsgType(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _is_valid_fix(lat: float | None, lng: float | None) -> bool:
|
||||
if lat is None or lng is None:
|
||||
return False
|
||||
if lat == 0.0 and lng == 0.0:
|
||||
return False # zero island — sensor error
|
||||
if not (-90.0 <= lat <= 90.0):
|
||||
return False
|
||||
return -180.0 <= lng <= 180.0
|
||||
|
||||
|
||||
def _items_from_payload(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
"""Coerce the gateway-stored payload into a list of dict items.
|
||||
|
||||
The gateway stores the verbatim Tracksolid form `data` field as either
|
||||
a dict (single object) or {"_list": [...]} (array). Some callers may pass
|
||||
{"_raw": "..."} if JSON parsing failed at the gateway.
|
||||
"""
|
||||
raw_list = payload.get("_list")
|
||||
if isinstance(raw_list, list):
|
||||
return [item for item in raw_list if isinstance(item, dict)]
|
||||
if "_raw" in payload:
|
||||
return []
|
||||
return [payload]
|
||||
|
||||
|
||||
def _fix_payload(
|
||||
lat: float,
|
||||
lng: float,
|
||||
*,
|
||||
speed_kmh: float | None,
|
||||
direction_deg: float | None,
|
||||
altitude_m: float | None = None,
|
||||
satellites: int | None = None,
|
||||
acc: Any = None,
|
||||
source: str,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"speed_kmh": speed_kmh,
|
||||
"direction_deg": direction_deg,
|
||||
"altitude_m": altitude_m,
|
||||
"satellites": satellites,
|
||||
"acc": acc,
|
||||
"source": source,
|
||||
}
|
||||
|
||||
|
||||
def _parse_push_gps(payload: dict[str, Any], account_id: str | None) -> list[ParsedEvent]:
|
||||
out: list[ParsedEvent] = []
|
||||
for item in _items_from_payload(payload):
|
||||
model = JimiPushGps.model_validate(item)
|
||||
if not _is_valid_fix(model.lat, model.lng):
|
||||
continue
|
||||
out.append(
|
||||
ParsedEvent(
|
||||
kind="position_fix",
|
||||
occurred_at=model.gps_time,
|
||||
imei=model.imei,
|
||||
account_id=account_id,
|
||||
payload=_fix_payload(
|
||||
model.lat,
|
||||
model.lng,
|
||||
speed_kmh=model.speed_kmh,
|
||||
direction_deg=model.direction_deg,
|
||||
altitude_m=model.altitude_m,
|
||||
satellites=model.satellites,
|
||||
acc=model.acc,
|
||||
source="tracksolid_push",
|
||||
),
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _parse_push_alarm(payload: dict[str, Any], account_id: str | None) -> list[ParsedEvent]:
|
||||
out: list[ParsedEvent] = []
|
||||
for item in _items_from_payload(payload):
|
||||
item_copy = dict(item)
|
||||
item_copy.setdefault("imei", item.get("deviceImei") or item.get("imei"))
|
||||
model = JimiPushAlarm.model_validate(item_copy)
|
||||
out.append(
|
||||
ParsedEvent(
|
||||
kind="alarm",
|
||||
occurred_at=model.alarm_time,
|
||||
imei=model.imei,
|
||||
account_id=account_id,
|
||||
payload={
|
||||
"alarm_type": model.alarm_type,
|
||||
"alarm_name": model.alarm_name,
|
||||
"lat": model.lat,
|
||||
"lng": model.lng,
|
||||
"speed_kmh": model.speed_kmh,
|
||||
"device_name": model.device_name,
|
||||
},
|
||||
)
|
||||
)
|
||||
if _is_valid_fix(model.lat, model.lng):
|
||||
assert model.lat is not None and model.lng is not None
|
||||
out.append(
|
||||
ParsedEvent(
|
||||
kind="position_fix",
|
||||
occurred_at=model.alarm_time,
|
||||
imei=model.imei,
|
||||
account_id=account_id,
|
||||
payload=_fix_payload(
|
||||
model.lat,
|
||||
model.lng,
|
||||
speed_kmh=model.speed_kmh,
|
||||
direction_deg=None,
|
||||
source="tracksolid_push_alarm_crossfeed",
|
||||
),
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _parse_push_heartbeat(payload: dict[str, Any], account_id: str | None) -> list[ParsedEvent]:
|
||||
out: list[ParsedEvent] = []
|
||||
for item in _items_from_payload(payload):
|
||||
model = JimiPushHeartbeat.model_validate(item)
|
||||
out.append(
|
||||
ParsedEvent(
|
||||
kind="heartbeat",
|
||||
occurred_at=model.gate_time,
|
||||
imei=model.imei,
|
||||
account_id=account_id,
|
||||
payload={
|
||||
"power_level": model.power_level,
|
||||
"gsm_signal": model.gsm_signal,
|
||||
"acc": model.acc,
|
||||
"power_status": model.power_status,
|
||||
},
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _parse_push_event(payload: dict[str, Any], account_id: str | None) -> list[ParsedEvent]:
|
||||
out: list[ParsedEvent] = []
|
||||
for item in _items_from_payload(payload):
|
||||
model = JimiPushEvent.model_validate(item)
|
||||
kind = "device_login" if model.event_type.upper() == "LOGIN" else "device_logout"
|
||||
out.append(
|
||||
ParsedEvent(
|
||||
kind=kind,
|
||||
occurred_at=model.event_time,
|
||||
imei=model.imei,
|
||||
account_id=account_id,
|
||||
payload={"event_type": model.event_type, "timezone": model.timezone_str},
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _items_for_poll(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
"""Polled list/get responses come back wrapped under various keys."""
|
||||
for key in ("_list", "data", "records"):
|
||||
v = payload.get(key)
|
||||
if isinstance(v, list):
|
||||
return [item for item in v if isinstance(item, dict)]
|
||||
if "_raw" in payload:
|
||||
return []
|
||||
return [payload]
|
||||
|
||||
|
||||
def _parse_poll_list(payload: dict[str, Any], account_id: str | None) -> list[ParsedEvent]:
|
||||
out: list[ParsedEvent] = []
|
||||
for item in _items_for_poll(payload):
|
||||
model = JimiPollFix.model_validate(item)
|
||||
if not _is_valid_fix(model.lat, model.lng):
|
||||
continue
|
||||
out.append(
|
||||
ParsedEvent(
|
||||
kind="position_fix",
|
||||
occurred_at=model.gps_time,
|
||||
imei=model.imei,
|
||||
account_id=account_id,
|
||||
payload=_fix_payload(
|
||||
model.lat,
|
||||
model.lng,
|
||||
speed_kmh=model.speed_kmh,
|
||||
direction_deg=model.direction_deg,
|
||||
altitude_m=model.altitude_m,
|
||||
satellites=model.satellites,
|
||||
acc=model.acc,
|
||||
source="tracksolid_poll",
|
||||
),
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
_DISPATCH = {
|
||||
("tracksolid_push", "pushgps"): _parse_push_gps,
|
||||
("tracksolid_push", "pushalarm"): _parse_push_alarm,
|
||||
("tracksolid_push", "pushhb"): _parse_push_heartbeat,
|
||||
("tracksolid_push", "pushevent"): _parse_push_event,
|
||||
("tracksolid_poll_list", None): _parse_poll_list,
|
||||
("tracksolid_poll_get", None): _parse_poll_list,
|
||||
}
|
||||
|
||||
|
||||
def parse_raw(
|
||||
source: str,
|
||||
msg_type: str | None,
|
||||
payload: dict[str, Any],
|
||||
account_id: str | None,
|
||||
) -> list[ParsedEvent]:
|
||||
handler = _DISPATCH.get((source, msg_type)) or _DISPATCH.get((source, None))
|
||||
if handler is None:
|
||||
raise UnsupportedMsgType(f"no parser for source={source!r} msg_type={msg_type!r}")
|
||||
return handler(payload, account_id)
|
||||
|
|
@ -1,156 +0,0 @@
|
|||
"""Single-writer projector for state.live_positions + state.position_history.
|
||||
|
||||
Drains events.parsed of kind 'position_fix' that haven't been projected yet.
|
||||
Each device (imei) owns one row in state.live_positions; the tracker-vs-camera
|
||||
dedup happens at read time in serve.fn_live_view, not here.
|
||||
|
||||
Ordering invariant: state.live_positions never moves backwards in occurred_at
|
||||
*per imei* (older fixes from the same device do not overwrite newer ones).
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from psycopg import AsyncCursor
|
||||
from psycopg.types.json import Jsonb
|
||||
|
||||
from app.db import get_pool
|
||||
from app.parsers.jimi import PARSER_VERSION
|
||||
|
||||
log = structlog.get_logger("projector.live_positions")
|
||||
|
||||
DRAIN_BATCH = 500
|
||||
PROJECTED_FLAG_KEY = "live_positions_projected_at"
|
||||
|
||||
|
||||
async def _resolve_device(cur: AsyncCursor[Any], imei: str) -> int | None:
|
||||
"""Returns vehicle_id for the device, or None if unmapped/unknown."""
|
||||
await cur.execute(
|
||||
"SELECT vehicle_id FROM domain.devices WHERE imei = %s",
|
||||
(imei,),
|
||||
)
|
||||
row = await cur.fetchone()
|
||||
if row is None or row[0] is None:
|
||||
return None
|
||||
return int(row[0])
|
||||
|
||||
|
||||
async def _project_one(
|
||||
cur: AsyncCursor[Any],
|
||||
*,
|
||||
occurred_at: datetime,
|
||||
imei: str,
|
||||
payload: dict[str, Any],
|
||||
) -> bool:
|
||||
lat = payload.get("lat")
|
||||
lng = payload.get("lng")
|
||||
if lat is None or lng is None:
|
||||
return False
|
||||
|
||||
vehicle_id = await _resolve_device(cur, imei)
|
||||
if vehicle_id is None:
|
||||
return False
|
||||
|
||||
geom_wkt = f"POINT({lng} {lat})"
|
||||
|
||||
await cur.execute(
|
||||
"""
|
||||
INSERT INTO state.live_positions (
|
||||
imei, vehicle_id, occurred_at, geom, speed_kmh, direction_deg,
|
||||
acc_state, source, parser_version, updated_at
|
||||
) VALUES (
|
||||
%s, %s, %s, ST_SetSRID(ST_GeomFromText(%s), 4326),
|
||||
%s, %s, %s, %s, %s, now()
|
||||
)
|
||||
ON CONFLICT (imei) DO UPDATE
|
||||
SET vehicle_id = EXCLUDED.vehicle_id,
|
||||
occurred_at = EXCLUDED.occurred_at,
|
||||
geom = EXCLUDED.geom,
|
||||
speed_kmh = EXCLUDED.speed_kmh,
|
||||
direction_deg = EXCLUDED.direction_deg,
|
||||
acc_state = EXCLUDED.acc_state,
|
||||
source = EXCLUDED.source,
|
||||
parser_version = EXCLUDED.parser_version,
|
||||
updated_at = now()
|
||||
WHERE EXCLUDED.occurred_at > state.live_positions.occurred_at
|
||||
""",
|
||||
(
|
||||
imei, vehicle_id, occurred_at, geom_wkt,
|
||||
payload.get("speed_kmh"),
|
||||
payload.get("direction_deg"),
|
||||
payload.get("acc") if isinstance(payload.get("acc"), int) else None,
|
||||
payload.get("source") or "unknown",
|
||||
PARSER_VERSION,
|
||||
),
|
||||
)
|
||||
|
||||
await cur.execute(
|
||||
"""
|
||||
INSERT INTO state.position_history (
|
||||
vehicle_id, imei, occurred_at, geom, speed_kmh, direction_deg,
|
||||
acc_state, altitude_m, satellites, source, parser_version
|
||||
) VALUES (
|
||||
%s, %s, %s, ST_SetSRID(ST_GeomFromText(%s), 4326),
|
||||
%s, %s, %s, %s, %s, %s, %s
|
||||
)
|
||||
""",
|
||||
(
|
||||
vehicle_id, imei, occurred_at, geom_wkt,
|
||||
payload.get("speed_kmh"),
|
||||
payload.get("direction_deg"),
|
||||
payload.get("acc") if isinstance(payload.get("acc"), int) else None,
|
||||
payload.get("altitude_m"),
|
||||
payload.get("satellites"),
|
||||
payload.get("source") or "unknown",
|
||||
PARSER_VERSION,
|
||||
),
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
async def drain() -> int:
|
||||
"""Process pending position_fix events. Returns count drained."""
|
||||
pool = await get_pool()
|
||||
processed = 0
|
||||
async with pool.connection() as conn, conn.transaction(), conn.cursor() as cur:
|
||||
await cur.execute(
|
||||
"""
|
||||
SELECT parsed_id, occurred_at, imei, payload
|
||||
FROM events.parsed
|
||||
WHERE kind = 'position_fix'
|
||||
AND NOT (payload ? %s)
|
||||
ORDER BY occurred_at
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT %s
|
||||
""",
|
||||
(PROJECTED_FLAG_KEY, DRAIN_BATCH),
|
||||
)
|
||||
rows = await cur.fetchall()
|
||||
for _, occurred_at, imei, payload in rows:
|
||||
try:
|
||||
await _project_one(
|
||||
cur,
|
||||
occurred_at=occurred_at,
|
||||
imei=imei,
|
||||
payload=payload,
|
||||
)
|
||||
except Exception:
|
||||
log.exception("projector.failed", imei=imei)
|
||||
raise
|
||||
if rows:
|
||||
await cur.executemany(
|
||||
"""
|
||||
UPDATE events.parsed
|
||||
SET payload = payload || %s::jsonb
|
||||
WHERE parsed_id = %s AND occurred_at = %s
|
||||
""",
|
||||
[
|
||||
(Jsonb({PROJECTED_FLAG_KEY: "now"}), int(pid), occ)
|
||||
for pid, occ, _, _ in rows
|
||||
],
|
||||
)
|
||||
processed = len(rows)
|
||||
if processed:
|
||||
log.info("projector.drained", count=processed)
|
||||
return processed
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
from slowapi import Limiter
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
limiter = Limiter(key_func=get_remote_address, default_limits=[])
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
from fastapi import APIRouter, Form, HTTPException
|
||||
|
||||
from app.auth import (
|
||||
TokenPair,
|
||||
fetch_account,
|
||||
issue_access_token,
|
||||
issue_refresh_token,
|
||||
store_refresh_token,
|
||||
verify_password,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/auth", tags=["auth"])
|
||||
|
||||
|
||||
@router.post("/token", response_model=TokenPair)
|
||||
async def issue_token(
|
||||
username: str = Form(...),
|
||||
password: str = Form(...),
|
||||
) -> TokenPair:
|
||||
record = await fetch_account(username)
|
||||
if record is None:
|
||||
raise HTTPException(status_code=401, detail="invalid credentials")
|
||||
account_id, password_hash, scopes = record
|
||||
if not verify_password(password, password_hash):
|
||||
raise HTTPException(status_code=401, detail="invalid credentials")
|
||||
|
||||
access, ttl = issue_access_token(account_id, scopes)
|
||||
refresh, expires_at, refresh_hash = issue_refresh_token(account_id)
|
||||
await store_refresh_token(account_id, refresh_hash, expires_at)
|
||||
|
||||
return TokenPair(
|
||||
access_token=access,
|
||||
refresh_token=refresh,
|
||||
expires_in=ttl,
|
||||
)
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
import hmac
|
||||
import json
|
||||
from typing import Any, Literal
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
from psycopg.types.json import Jsonb
|
||||
|
||||
from app.config import get_settings
|
||||
from app.db import get_pool
|
||||
from app.rate_limit import limiter
|
||||
|
||||
router = APIRouter(prefix="/push/jimi", tags=["push"])
|
||||
|
||||
MAX_ITEMS_PER_POST = 5000
|
||||
SUCCESS = {"code": 0, "msg": "success"}
|
||||
|
||||
MsgType = Literal[
|
||||
"pushobd",
|
||||
"pushfaultinfo",
|
||||
"pushalarm",
|
||||
"pushgps",
|
||||
"pushhb",
|
||||
"pushtripreport",
|
||||
"pushevent",
|
||||
]
|
||||
|
||||
|
||||
def _verify_token(token: str) -> None:
|
||||
expected = get_settings().tracksolid_push_token
|
||||
if not expected:
|
||||
raise HTTPException(status_code=503, detail="push token not configured")
|
||||
if not hmac.compare_digest(token, expected):
|
||||
raise HTTPException(status_code=401, detail="invalid token")
|
||||
|
||||
|
||||
def _coerce_payload(raw: str) -> dict[str, Any]:
|
||||
if not raw:
|
||||
return {"_raw": ""}
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return {"_raw": raw}
|
||||
if isinstance(parsed, list):
|
||||
if len(parsed) > MAX_ITEMS_PER_POST:
|
||||
parsed = parsed[:MAX_ITEMS_PER_POST]
|
||||
return {"_list": parsed}
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
return {"_raw": raw}
|
||||
|
||||
|
||||
async def _ingest(msg_type: str, raw_data: str) -> None:
|
||||
pool = await get_pool()
|
||||
payload = _coerce_payload(raw_data)
|
||||
async with pool.connection() as conn, conn.cursor() as cur:
|
||||
await cur.execute(
|
||||
"INSERT INTO events.raw (source, msg_type, payload) VALUES (%s, %s, %s)",
|
||||
("tracksolid_push", msg_type, Jsonb(payload)),
|
||||
)
|
||||
|
||||
|
||||
async def _handle(request: Request, msg_type: MsgType) -> dict[str, Any]:
|
||||
form = await request.form()
|
||||
token = str(form.get("token", ""))
|
||||
_verify_token(token)
|
||||
raw_data = str(form.get("data") or form.get("data_list") or "")
|
||||
await _ingest(msg_type, raw_data)
|
||||
return SUCCESS
|
||||
|
||||
|
||||
@router.post("/pushobd")
|
||||
@limiter.limit("1000/minute")
|
||||
async def push_obd(request: Request) -> dict[str, Any]:
|
||||
return await _handle(request, "pushobd")
|
||||
|
||||
|
||||
@router.post("/pushfaultinfo")
|
||||
@limiter.limit("1000/minute")
|
||||
async def push_fault(request: Request) -> dict[str, Any]:
|
||||
return await _handle(request, "pushfaultinfo")
|
||||
|
||||
|
||||
@router.post("/pushalarm")
|
||||
@limiter.limit("1000/minute")
|
||||
async def push_alarm(request: Request) -> dict[str, Any]:
|
||||
return await _handle(request, "pushalarm")
|
||||
|
||||
|
||||
@router.post("/pushgps")
|
||||
@limiter.limit("1000/minute")
|
||||
async def push_gps(request: Request) -> dict[str, Any]:
|
||||
return await _handle(request, "pushgps")
|
||||
|
||||
|
||||
@router.post("/pushhb")
|
||||
@limiter.limit("1000/minute")
|
||||
async def push_hb(request: Request) -> dict[str, Any]:
|
||||
return await _handle(request, "pushhb")
|
||||
|
||||
|
||||
@router.post("/pushtripreport")
|
||||
@limiter.limit("1000/minute")
|
||||
async def push_trip(request: Request) -> dict[str, Any]:
|
||||
return await _handle(request, "pushtripreport")
|
||||
|
||||
|
||||
@router.post("/pushevent")
|
||||
@limiter.limit("1000/minute")
|
||||
async def push_event(request: Request) -> dict[str, Any]:
|
||||
return await _handle(request, "pushevent")
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
import json
|
||||
from typing import Annotated, Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from psycopg.types.json import Jsonb
|
||||
|
||||
from app.auth import AuthAccount, require_scope
|
||||
from app.db import get_pool
|
||||
from app.models.views import LiveViewResponse
|
||||
from app.rate_limit import limiter
|
||||
|
||||
router = APIRouter(prefix="/api/views", tags=["views"])
|
||||
|
||||
_FILTERS_DESC = "JSON object: cost_centre, assigned_city, vehicle_numbers[]"
|
||||
|
||||
|
||||
def _parse_filters(filters_q: str | None) -> dict[str, Any]:
|
||||
if not filters_q:
|
||||
return {}
|
||||
try:
|
||||
parsed = json.loads(filters_q)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise HTTPException(status_code=400, detail=f"invalid filters json: {exc}") from exc
|
||||
if not isinstance(parsed, dict):
|
||||
raise HTTPException(status_code=400, detail="filters must be a JSON object")
|
||||
return parsed
|
||||
|
||||
|
||||
@router.get("/live", response_model=LiveViewResponse)
|
||||
@limiter.limit("60/minute")
|
||||
async def live_view(
|
||||
request: Request,
|
||||
_account: Annotated[AuthAccount, Depends(require_scope("read:fleet"))],
|
||||
filters: Annotated[str | None, Query(description=_FILTERS_DESC)] = None,
|
||||
) -> LiveViewResponse:
|
||||
_ = request
|
||||
filters_dict = _parse_filters(filters)
|
||||
pool = await get_pool()
|
||||
async with pool.connection() as conn, conn.cursor() as cur:
|
||||
await cur.execute("SELECT serve.fn_live_view(%s)", (Jsonb(filters_dict),))
|
||||
row = await cur.fetchone()
|
||||
if row is None or row[0] is None:
|
||||
raise HTTPException(status_code=500, detail="serve.fn_live_view returned NULL")
|
||||
return LiveViewResponse.model_validate(row[0])
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
"""Postgres LISTEN/NOTIFY driver with timer-based sweep fallback.
|
||||
|
||||
The handler is invoked on every notification AND every sweep_interval seconds,
|
||||
so a missed NOTIFY (e.g. connection blip) never wedges the worker. Under
|
||||
normal operation the timer fires on an empty queue and is a no-op.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any
|
||||
|
||||
import psycopg
|
||||
import structlog
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
log = structlog.get_logger("listener")
|
||||
|
||||
Handler = Callable[[], Awaitable[Any]]
|
||||
|
||||
|
||||
async def listen_forever(
|
||||
channel: str,
|
||||
handler: Handler,
|
||||
*,
|
||||
sweep_interval: float = 5.0,
|
||||
) -> None:
|
||||
settings = get_settings()
|
||||
while True:
|
||||
try:
|
||||
await _run_once(settings.database_url, channel, handler, sweep_interval)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception:
|
||||
log.exception("listener.crashed_restarting", channel=channel)
|
||||
await asyncio.sleep(2.0)
|
||||
|
||||
|
||||
async def _run_once(
|
||||
conninfo: str,
|
||||
channel: str,
|
||||
handler: Handler,
|
||||
sweep_interval: float,
|
||||
) -> None:
|
||||
conn = await psycopg.AsyncConnection.connect(conninfo, autocommit=True)
|
||||
try:
|
||||
async with conn.cursor() as cur:
|
||||
await cur.execute(f"LISTEN {channel}")
|
||||
log.info("listener.subscribed", channel=channel)
|
||||
|
||||
await _safe_call(handler, channel)
|
||||
|
||||
notifies = conn.notifies()
|
||||
while True:
|
||||
with contextlib.suppress(TimeoutError):
|
||||
await asyncio.wait_for(anext(notifies), timeout=sweep_interval)
|
||||
await _safe_call(handler, channel)
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
async def _safe_call(handler: Handler, channel: str) -> None:
|
||||
try:
|
||||
await handler()
|
||||
except Exception:
|
||||
log.exception("listener.handler_failed", channel=channel)
|
||||
|
|
@ -1,98 +0,0 @@
|
|||
"""Parser worker — drains events.raw to events.parsed."""
|
||||
|
||||
import structlog
|
||||
from psycopg.types.json import Jsonb
|
||||
|
||||
from app.db import get_pool
|
||||
from app.parsers.jimi import PARSER_VERSION, UnsupportedMsgType, parse_raw
|
||||
|
||||
log = structlog.get_logger("worker.parser")
|
||||
|
||||
DRAIN_BATCH = 200
|
||||
|
||||
|
||||
async def drain() -> int:
|
||||
pool = await get_pool()
|
||||
processed = 0
|
||||
async with pool.connection() as conn, conn.transaction(), conn.cursor() as cur:
|
||||
await cur.execute(
|
||||
"""
|
||||
SELECT event_id, received_at, source, msg_type, account_id, payload
|
||||
FROM events.raw
|
||||
WHERE parsed_at IS NULL
|
||||
ORDER BY received_at
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT %s
|
||||
""",
|
||||
(DRAIN_BATCH,),
|
||||
)
|
||||
rows = await cur.fetchall()
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
parsed_rows: list[tuple[object, ...]] = []
|
||||
error_rows: list[tuple[object, ...]] = []
|
||||
mark_rows: list[tuple[object, ...]] = []
|
||||
|
||||
for event_id, received_at, source, msg_type, account_id, payload in rows:
|
||||
try:
|
||||
events = parse_raw(source, msg_type, payload, account_id)
|
||||
except UnsupportedMsgType:
|
||||
mark_rows.append((PARSER_VERSION, event_id, received_at))
|
||||
continue
|
||||
except Exception as exc:
|
||||
error_rows.append((
|
||||
event_id, received_at, PARSER_VERSION,
|
||||
exc.__class__.__name__, str(exc)[:1000], Jsonb(payload),
|
||||
))
|
||||
mark_rows.append((PARSER_VERSION, event_id, received_at))
|
||||
continue
|
||||
|
||||
for ev in events:
|
||||
parsed_rows.append((
|
||||
event_id, received_at, ev.occurred_at, ev.kind,
|
||||
ev.account_id or "", ev.imei, Jsonb(ev.payload),
|
||||
PARSER_VERSION,
|
||||
))
|
||||
mark_rows.append((PARSER_VERSION, event_id, received_at))
|
||||
|
||||
if parsed_rows:
|
||||
await cur.executemany(
|
||||
"""
|
||||
INSERT INTO events.parsed (
|
||||
raw_event_id, raw_received_at, occurred_at, kind,
|
||||
account_id, imei, payload, parser_version
|
||||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
""",
|
||||
parsed_rows,
|
||||
)
|
||||
|
||||
if error_rows:
|
||||
await cur.executemany(
|
||||
"""
|
||||
INSERT INTO events.parser_errors (
|
||||
raw_event_id, raw_received_at, parser_version,
|
||||
error_class, error_message, payload
|
||||
) VALUES (%s, %s, %s, %s, %s, %s)
|
||||
""",
|
||||
error_rows,
|
||||
)
|
||||
|
||||
await cur.executemany(
|
||||
"""
|
||||
UPDATE events.raw
|
||||
SET parsed_at = now(), parser_version = %s
|
||||
WHERE event_id = %s AND received_at = %s
|
||||
""",
|
||||
mark_rows,
|
||||
)
|
||||
processed = len(rows)
|
||||
|
||||
if processed:
|
||||
log.info(
|
||||
"parser.drained",
|
||||
raw=processed,
|
||||
parsed=len(parsed_rows),
|
||||
errors=len(error_rows),
|
||||
)
|
||||
return processed
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
-- migrate:up
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS timescaledb;
|
||||
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS events;
|
||||
CREATE SCHEMA IF NOT EXISTS state;
|
||||
CREATE SCHEMA IF NOT EXISTS domain;
|
||||
CREATE SCHEMA IF NOT EXISTS geo;
|
||||
CREATE SCHEMA IF NOT EXISTS serve;
|
||||
CREATE SCHEMA IF NOT EXISTS slo;
|
||||
CREATE SCHEMA IF NOT EXISTS ops;
|
||||
CREATE SCHEMA IF NOT EXISTS auth;
|
||||
|
||||
-- migrate:down
|
||||
|
||||
DROP SCHEMA IF EXISTS auth CASCADE;
|
||||
DROP SCHEMA IF EXISTS ops CASCADE;
|
||||
DROP SCHEMA IF EXISTS slo CASCADE;
|
||||
DROP SCHEMA IF EXISTS serve CASCADE;
|
||||
DROP SCHEMA IF EXISTS geo CASCADE;
|
||||
DROP SCHEMA IF EXISTS domain CASCADE;
|
||||
DROP SCHEMA IF EXISTS state CASCADE;
|
||||
DROP SCHEMA IF EXISTS events CASCADE;
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
-- migrate:up
|
||||
|
||||
CREATE TABLE events.raw (
|
||||
event_id bigserial,
|
||||
received_at timestamptz NOT NULL DEFAULT now(),
|
||||
source text NOT NULL CHECK (source IN (
|
||||
'tracksolid_push',
|
||||
'tracksolid_poll_list',
|
||||
'tracksolid_poll_get',
|
||||
'whatsapp_fuel',
|
||||
'hr_extract',
|
||||
'ops_action',
|
||||
'legacy_import'
|
||||
)),
|
||||
account_id text,
|
||||
imei text,
|
||||
msg_type text,
|
||||
payload jsonb NOT NULL,
|
||||
parser_version int,
|
||||
parsed_at timestamptz,
|
||||
PRIMARY KEY (event_id, received_at)
|
||||
);
|
||||
|
||||
SELECT create_hypertable('events.raw', 'received_at', chunk_time_interval => INTERVAL '1 day');
|
||||
|
||||
CREATE INDEX events_raw_unparsed_idx
|
||||
ON events.raw (received_at)
|
||||
WHERE parsed_at IS NULL;
|
||||
|
||||
CREATE INDEX events_raw_account_imei_idx
|
||||
ON events.raw (account_id, imei, received_at DESC);
|
||||
|
||||
CREATE TABLE events.parsed (
|
||||
parsed_id bigserial,
|
||||
raw_event_id bigint NOT NULL,
|
||||
raw_received_at timestamptz NOT NULL,
|
||||
occurred_at timestamptz NOT NULL,
|
||||
kind text NOT NULL,
|
||||
account_id text NOT NULL,
|
||||
imei text NOT NULL,
|
||||
payload jsonb NOT NULL,
|
||||
parser_version int NOT NULL,
|
||||
inserted_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (parsed_id, occurred_at)
|
||||
);
|
||||
|
||||
SELECT create_hypertable('events.parsed', 'occurred_at', chunk_time_interval => INTERVAL '1 day');
|
||||
|
||||
CREATE INDEX events_parsed_kind_imei_idx
|
||||
ON events.parsed (kind, imei, occurred_at DESC);
|
||||
|
||||
CREATE TABLE events.parser_errors (
|
||||
error_id bigserial PRIMARY KEY,
|
||||
raw_event_id bigint NOT NULL,
|
||||
raw_received_at timestamptz NOT NULL,
|
||||
parser_version int NOT NULL,
|
||||
error_class text NOT NULL,
|
||||
error_message text NOT NULL,
|
||||
payload jsonb NOT NULL,
|
||||
inserted_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION events.notify_raw_new() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
PERFORM pg_notify('events_raw_new', NEW.event_id::text);
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER events_raw_new_trigger
|
||||
AFTER INSERT ON events.raw
|
||||
FOR EACH ROW EXECUTE FUNCTION events.notify_raw_new();
|
||||
|
||||
CREATE OR REPLACE FUNCTION events.notify_parsed_new() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
PERFORM pg_notify('events_parsed_new', NEW.parsed_id::text);
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER events_parsed_new_trigger
|
||||
AFTER INSERT ON events.parsed
|
||||
FOR EACH ROW EXECUTE FUNCTION events.notify_parsed_new();
|
||||
|
||||
-- migrate:down
|
||||
|
||||
DROP TRIGGER IF EXISTS events_parsed_new_trigger ON events.parsed;
|
||||
DROP TRIGGER IF EXISTS events_raw_new_trigger ON events.raw;
|
||||
DROP FUNCTION IF EXISTS events.notify_parsed_new();
|
||||
DROP FUNCTION IF EXISTS events.notify_raw_new();
|
||||
DROP TABLE IF EXISTS events.parser_errors;
|
||||
DROP TABLE IF EXISTS events.parsed;
|
||||
DROP TABLE IF EXISTS events.raw;
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
-- migrate:up
|
||||
|
||||
CREATE TABLE domain.accounts (
|
||||
account_id text PRIMARY KEY,
|
||||
name text NOT NULL,
|
||||
app_key text NOT NULL,
|
||||
is_active boolean NOT NULL DEFAULT true,
|
||||
created_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE TABLE domain.vehicles (
|
||||
vehicle_id bigserial PRIMARY KEY,
|
||||
plate text NOT NULL UNIQUE,
|
||||
cost_centre text,
|
||||
assigned_city text,
|
||||
vehicle_class text,
|
||||
service_interval_km_override int,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX vehicles_cost_centre_idx ON domain.vehicles (cost_centre);
|
||||
CREATE INDEX vehicles_assigned_city_idx ON domain.vehicles (assigned_city);
|
||||
|
||||
CREATE TABLE domain.devices (
|
||||
imei text PRIMARY KEY,
|
||||
account_id text NOT NULL REFERENCES domain.accounts(account_id),
|
||||
vehicle_id bigint REFERENCES domain.vehicles(vehicle_id),
|
||||
device_type text NOT NULL CHECK (device_type IN ('tracker', 'camera')),
|
||||
model text,
|
||||
lifecycle text NOT NULL CHECK (lifecycle IN (
|
||||
'provisioned',
|
||||
'active',
|
||||
'suspended',
|
||||
'decommissioned'
|
||||
)),
|
||||
activation_at timestamptz,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX devices_account_lifecycle_idx
|
||||
ON domain.devices (account_id, lifecycle);
|
||||
|
||||
CREATE INDEX devices_vehicle_idx
|
||||
ON domain.devices (vehicle_id)
|
||||
WHERE vehicle_id IS NOT NULL;
|
||||
|
||||
CREATE TABLE domain.devices_audit (
|
||||
audit_id bigserial PRIMARY KEY,
|
||||
imei text NOT NULL,
|
||||
from_lifecycle text,
|
||||
to_lifecycle text NOT NULL,
|
||||
actor text NOT NULL,
|
||||
reason text,
|
||||
occurred_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX devices_audit_imei_idx
|
||||
ON domain.devices_audit (imei, occurred_at DESC);
|
||||
|
||||
-- migrate:down
|
||||
|
||||
DROP TABLE IF EXISTS domain.devices_audit;
|
||||
DROP TABLE IF EXISTS domain.devices;
|
||||
DROP TABLE IF EXISTS domain.vehicles;
|
||||
DROP TABLE IF EXISTS domain.accounts;
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
-- migrate:up
|
||||
|
||||
CREATE TABLE state.live_positions (
|
||||
imei text PRIMARY KEY REFERENCES domain.devices(imei),
|
||||
vehicle_id bigint NOT NULL REFERENCES domain.vehicles(vehicle_id),
|
||||
occurred_at timestamptz NOT NULL,
|
||||
geom geometry(Point, 4326) NOT NULL,
|
||||
speed_kmh numeric,
|
||||
direction_deg numeric,
|
||||
acc_state int,
|
||||
source text NOT NULL,
|
||||
parser_version int NOT NULL,
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX live_positions_vehicle_idx ON state.live_positions (vehicle_id);
|
||||
CREATE INDEX live_positions_occurred_idx ON state.live_positions (occurred_at DESC);
|
||||
|
||||
CREATE TABLE state.position_history (
|
||||
history_id bigserial,
|
||||
vehicle_id bigint NOT NULL REFERENCES domain.vehicles(vehicle_id),
|
||||
imei text NOT NULL,
|
||||
occurred_at timestamptz NOT NULL,
|
||||
geom geometry(Point, 4326) NOT NULL,
|
||||
speed_kmh numeric,
|
||||
direction_deg numeric,
|
||||
acc_state int,
|
||||
altitude_m numeric,
|
||||
satellites int,
|
||||
source text NOT NULL,
|
||||
parser_version int NOT NULL,
|
||||
inserted_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (history_id, occurred_at)
|
||||
);
|
||||
|
||||
SELECT create_hypertable('state.position_history', 'occurred_at', chunk_time_interval => INTERVAL '7 days');
|
||||
|
||||
CREATE INDEX position_history_vehicle_time_idx
|
||||
ON state.position_history (vehicle_id, occurred_at DESC);
|
||||
|
||||
CREATE INDEX position_history_imei_time_idx
|
||||
ON state.position_history (imei, occurred_at DESC);
|
||||
|
||||
CREATE INDEX position_history_geom_idx
|
||||
ON state.position_history USING GIST (geom);
|
||||
|
||||
-- migrate:down
|
||||
|
||||
DROP TABLE IF EXISTS state.position_history;
|
||||
DROP TABLE IF EXISTS state.live_positions;
|
||||
|
|
@ -1,58 +0,0 @@
|
|||
-- migrate:up
|
||||
|
||||
CREATE TABLE slo.targets (
|
||||
metric text PRIMARY KEY,
|
||||
threshold numeric NOT NULL,
|
||||
window_seconds int NOT NULL,
|
||||
description text,
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
INSERT INTO slo.targets (metric, threshold, window_seconds, description) VALUES
|
||||
('fix_freshness_pct_60s', 95, 300, 'Pct of active devices with a fix within 90s, window 5 min'),
|
||||
('parser_lag_p95_sec', 30, 300, 'P95 of received_at -> events.parsed insertion lag, window 5 min'),
|
||||
('contract_drift_days', 1, 86400, 'Days since last successful sandbox contract validation');
|
||||
|
||||
CREATE TABLE slo.measurements (
|
||||
measurement_id bigserial,
|
||||
metric text NOT NULL REFERENCES slo.targets(metric),
|
||||
value numeric NOT NULL,
|
||||
measured_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (measurement_id, measured_at)
|
||||
);
|
||||
|
||||
SELECT create_hypertable('slo.measurements', 'measured_at', chunk_time_interval => INTERVAL '7 days');
|
||||
|
||||
CREATE INDEX slo_measurements_metric_time_idx
|
||||
ON slo.measurements (metric, measured_at DESC);
|
||||
|
||||
CREATE OR REPLACE VIEW slo.v_current_status AS
|
||||
SELECT
|
||||
t.metric,
|
||||
t.threshold,
|
||||
t.window_seconds,
|
||||
latest.value AS current_value,
|
||||
latest.measured_at AS measured_at,
|
||||
CASE
|
||||
WHEN latest.value IS NULL THEN 'unknown'
|
||||
WHEN t.metric LIKE '%pct%'
|
||||
AND latest.value >= t.threshold THEN 'green'
|
||||
WHEN t.metric LIKE '%pct%'
|
||||
AND latest.value < t.threshold THEN 'red'
|
||||
WHEN latest.value <= t.threshold THEN 'green'
|
||||
ELSE 'red'
|
||||
END AS status
|
||||
FROM slo.targets t
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT value, measured_at
|
||||
FROM slo.measurements m
|
||||
WHERE m.metric = t.metric
|
||||
ORDER BY measured_at DESC
|
||||
LIMIT 1
|
||||
) latest ON true;
|
||||
|
||||
-- migrate:down
|
||||
|
||||
DROP VIEW IF EXISTS slo.v_current_status;
|
||||
DROP TABLE IF EXISTS slo.measurements;
|
||||
DROP TABLE IF EXISTS slo.targets;
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
-- migrate:up
|
||||
|
||||
CREATE TABLE auth.accounts (
|
||||
account_id bigserial PRIMARY KEY,
|
||||
username text NOT NULL UNIQUE,
|
||||
password_hash text NOT NULL,
|
||||
scopes text[] NOT NULL DEFAULT ARRAY[]::text[],
|
||||
is_active boolean NOT NULL DEFAULT true,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
last_login_at timestamptz
|
||||
);
|
||||
|
||||
CREATE TABLE auth.tokens (
|
||||
token_id bigserial PRIMARY KEY,
|
||||
account_id bigint NOT NULL REFERENCES auth.accounts(account_id),
|
||||
token_type text NOT NULL CHECK (token_type IN ('refresh')),
|
||||
token_hash text NOT NULL UNIQUE,
|
||||
issued_at timestamptz NOT NULL DEFAULT now(),
|
||||
expires_at timestamptz NOT NULL,
|
||||
revoked_at timestamptz
|
||||
);
|
||||
|
||||
CREATE INDEX auth_tokens_account_idx
|
||||
ON auth.tokens (account_id)
|
||||
WHERE revoked_at IS NULL;
|
||||
|
||||
-- migrate:down
|
||||
|
||||
DROP TABLE IF EXISTS auth.tokens;
|
||||
DROP TABLE IF EXISTS auth.accounts;
|
||||
|
|
@ -1,138 +0,0 @@
|
|||
-- migrate:up
|
||||
|
||||
-- serve.fn_live_view — single source of truth for the live dashboard.
|
||||
-- Implements PRD F1.6 dedup (tracker-first → 24h freshness → camera fallback
|
||||
-- → most-recent fix → activation_time tiebreak) AND PRD F1.11 lifecycle
|
||||
-- filter (lifecycle='active' only).
|
||||
|
||||
CREATE OR REPLACE FUNCTION serve.fn_live_view(filters jsonb)
|
||||
RETURNS jsonb
|
||||
LANGUAGE plpgsql STABLE
|
||||
AS $$
|
||||
DECLARE
|
||||
fresh_window interval := COALESCE((filters->>'fresh_window')::interval, interval '24 hours');
|
||||
offline_after interval := COALESCE((filters->>'offline_after')::interval, interval '5 minutes');
|
||||
move_speed_kmh numeric := COALESCE((filters->>'move_speed_kmh')::numeric, 5);
|
||||
p_cost_centre text := filters->>'cost_centre';
|
||||
p_assigned_city text := filters->>'assigned_city';
|
||||
p_vehicle_numbers text[] := CASE
|
||||
WHEN filters ? 'vehicle_numbers'
|
||||
THEN ARRAY(SELECT jsonb_array_elements_text(filters->'vehicle_numbers'))
|
||||
ELSE NULL
|
||||
END;
|
||||
result jsonb;
|
||||
BEGIN
|
||||
WITH candidates AS (
|
||||
SELECT
|
||||
lp.imei,
|
||||
lp.occurred_at,
|
||||
lp.geom,
|
||||
lp.speed_kmh,
|
||||
lp.direction_deg,
|
||||
d.device_type,
|
||||
d.activation_at,
|
||||
v.vehicle_id,
|
||||
v.plate,
|
||||
v.cost_centre,
|
||||
v.assigned_city
|
||||
FROM state.live_positions lp
|
||||
JOIN domain.devices d ON d.imei = lp.imei
|
||||
JOIN domain.vehicles v ON v.vehicle_id = d.vehicle_id
|
||||
WHERE d.lifecycle = 'active'
|
||||
AND (p_cost_centre IS NULL OR v.cost_centre = p_cost_centre)
|
||||
AND (p_assigned_city IS NULL OR v.assigned_city = p_assigned_city)
|
||||
AND (p_vehicle_numbers IS NULL OR v.plate = ANY (p_vehicle_numbers))
|
||||
),
|
||||
ranked AS (
|
||||
SELECT
|
||||
c.*,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY c.vehicle_id
|
||||
ORDER BY
|
||||
-- tracker-first
|
||||
CASE c.device_type WHEN 'tracker' THEN 0 ELSE 1 END,
|
||||
-- 24h freshness gate: fresh tracker beats stale tracker
|
||||
CASE WHEN c.occurred_at > now() - fresh_window THEN 0 ELSE 1 END,
|
||||
-- intra-type tiebreak by most-recent fix
|
||||
c.occurred_at DESC,
|
||||
-- final tiebreak by activation_time
|
||||
c.activation_at DESC NULLS LAST
|
||||
) AS rn
|
||||
FROM candidates c
|
||||
),
|
||||
deduped AS (
|
||||
SELECT * FROM ranked WHERE rn = 1
|
||||
),
|
||||
enriched AS (
|
||||
SELECT
|
||||
d.*,
|
||||
CASE
|
||||
WHEN d.occurred_at <= now() - offline_after THEN 'offline'
|
||||
WHEN d.speed_kmh IS NOT NULL AND d.speed_kmh > move_speed_kmh THEN 'moving'
|
||||
ELSE 'parked'
|
||||
END AS operational_state
|
||||
FROM deduped d
|
||||
),
|
||||
summary AS (
|
||||
SELECT jsonb_build_object(
|
||||
'total_active', count(*),
|
||||
'moving', count(*) FILTER (WHERE operational_state = 'moving'),
|
||||
'parked', count(*) FILTER (WHERE operational_state = 'parked'),
|
||||
'offline', count(*) FILTER (WHERE operational_state = 'offline'),
|
||||
'below_freshness_slo', count(*) FILTER (
|
||||
WHERE occurred_at <= now() - interval '90 seconds'
|
||||
),
|
||||
'as_of', to_char(now() AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"')
|
||||
) AS s
|
||||
FROM enriched
|
||||
),
|
||||
features AS (
|
||||
SELECT COALESCE(jsonb_agg(
|
||||
jsonb_build_object(
|
||||
'type', 'Feature',
|
||||
'geometry', ST_AsGeoJSON(e.geom)::jsonb,
|
||||
'properties', jsonb_build_object(
|
||||
'vehicle_id', e.vehicle_id,
|
||||
'plate', e.plate,
|
||||
'imei', e.imei,
|
||||
'device_type', e.device_type,
|
||||
'cost_centre', e.cost_centre,
|
||||
'assigned_city', e.assigned_city,
|
||||
'occurred_at', to_char(e.occurred_at AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"'),
|
||||
'speed_kmh', e.speed_kmh,
|
||||
'direction_deg', e.direction_deg,
|
||||
'operational_state', e.operational_state,
|
||||
'style_class', 'vehicle-' || e.operational_state
|
||||
)
|
||||
)
|
||||
), '[]'::jsonb) AS feats
|
||||
FROM enriched e
|
||||
),
|
||||
slo_block AS (
|
||||
SELECT COALESCE(jsonb_object_agg(
|
||||
metric,
|
||||
jsonb_build_object(
|
||||
'threshold', threshold,
|
||||
'current', current_value,
|
||||
'status', status
|
||||
)
|
||||
), '{}'::jsonb) AS ss
|
||||
FROM slo.v_current_status
|
||||
)
|
||||
SELECT jsonb_build_object(
|
||||
'summary', (SELECT s FROM summary),
|
||||
'geojson', jsonb_build_object(
|
||||
'type', 'FeatureCollection',
|
||||
'features', (SELECT feats FROM features)
|
||||
),
|
||||
'slo_status', (SELECT ss FROM slo_block)
|
||||
)
|
||||
INTO result;
|
||||
|
||||
RETURN result;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- migrate:down
|
||||
|
||||
DROP FUNCTION IF EXISTS serve.fn_live_view(jsonb);
|
||||
|
|
@ -1,87 +0,0 @@
|
|||
name: fleet-platform-dev
|
||||
|
||||
services:
|
||||
db:
|
||||
image: timescale/timescaledb-ha:pg16
|
||||
environment:
|
||||
POSTGRES_DB: ${POSTGRES_DB:-fleet}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-fleet}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- db_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER}"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
pgbouncer:
|
||||
image: edoburu/pgbouncer:latest
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_USER: ${POSTGRES_USER:-fleet}
|
||||
DB_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
DB_NAME: ${POSTGRES_DB:-fleet}
|
||||
POOL_MODE: transaction
|
||||
AUTH_TYPE: scram-sha-256
|
||||
ports:
|
||||
- "6432:6432"
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
||||
platform-gateway:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
GIT_SHA: dev
|
||||
image: fleet-platform:dev
|
||||
environment:
|
||||
APP_ROLE: gateway
|
||||
APP_MODE: dev
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-fleet}:${POSTGRES_PASSWORD}@pgbouncer:6432/${POSTGRES_DB:-fleet}
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
TRACKSOLID_PUSH_TOKEN: ${TRACKSOLID_PUSH_TOKEN}
|
||||
ports:
|
||||
- "8001:8000"
|
||||
volumes:
|
||||
- ./app:/srv/app/app
|
||||
depends_on:
|
||||
pgbouncer:
|
||||
condition: service_started
|
||||
command: ["uvicorn", "app.entrypoints.gateway:app", "--host", "0.0.0.0", "--port", "8000", "--reload", "--reload-dir", "/srv/app/app"]
|
||||
|
||||
platform-worker:
|
||||
image: fleet-platform:dev
|
||||
environment:
|
||||
APP_ROLE: worker
|
||||
APP_MODE: dev
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-fleet}:${POSTGRES_PASSWORD}@pgbouncer:6432/${POSTGRES_DB:-fleet}
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ./app:/srv/app/app
|
||||
depends_on:
|
||||
pgbouncer:
|
||||
condition: service_started
|
||||
|
||||
platform-cron:
|
||||
image: fleet-platform:dev
|
||||
environment:
|
||||
APP_ROLE: cron
|
||||
APP_MODE: dev
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-fleet}:${POSTGRES_PASSWORD}@pgbouncer:6432/${POSTGRES_DB:-fleet}
|
||||
TRACKSOLID_API_BASE_URL: ${TRACKSOLID_API_BASE_URL}
|
||||
ports:
|
||||
- "8003:8000"
|
||||
volumes:
|
||||
- ./app:/srv/app/app
|
||||
depends_on:
|
||||
pgbouncer:
|
||||
condition: service_started
|
||||
|
||||
volumes:
|
||||
db_data:
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
[project]
|
||||
name = "fleet-platform"
|
||||
version = "0.1.0"
|
||||
description = "Fleet telematics platform — greenfield rebuild (Rahamafresh)"
|
||||
requires-python = ">=3.12"
|
||||
readme = "README.md"
|
||||
|
||||
dependencies = [
|
||||
"fastapi>=0.115",
|
||||
"uvicorn[standard]>=0.32",
|
||||
"psycopg[binary,pool]>=3.2",
|
||||
"pydantic>=2.9",
|
||||
"pydantic-settings>=2.6",
|
||||
"structlog>=24.4",
|
||||
"apscheduler>=3.10,<4",
|
||||
"bcrypt>=4.2",
|
||||
"pyjwt[crypto]>=2.10",
|
||||
"slowapi>=0.1.9",
|
||||
"httpx>=0.28",
|
||||
"python-multipart>=0.0.20",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"ruff>=0.8",
|
||||
"mypy>=1.13",
|
||||
"pytest>=8.3",
|
||||
"pytest-asyncio>=0.24",
|
||||
"pytest-httpx>=0.32",
|
||||
"anyio>=4.6",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["app"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
target-version = "py312"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "B", "UP", "SIM", "PL", "RUF"]
|
||||
ignore = ["PLR0913", "PLR2004"]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"tests/*" = ["PLR0913", "PLR2004"]
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.12"
|
||||
strict = true
|
||||
warn_unused_ignores = true
|
||||
disallow_untyped_defs = true
|
||||
plugins = ["pydantic.mypy"]
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = ["apscheduler.*", "slowapi.*"]
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_mode = "auto"
|
||||
testpaths = ["tests"]
|
||||
addopts = "-ra --strict-markers"
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
ROLE="${APP_ROLE:-gateway}"
|
||||
|
||||
case "$ROLE" in
|
||||
gateway|worker|cron)
|
||||
exec uvicorn "app.entrypoints.${ROLE}:app" --host 0.0.0.0 --port 8000
|
||||
;;
|
||||
*)
|
||||
echo "entrypoint: unknown APP_ROLE='$ROLE' (expected gateway|worker|cron)" >&2
|
||||
exit 64
|
||||
;;
|
||||
esac
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
import os
|
||||
|
||||
os.environ.setdefault("DATABASE_URL", "postgresql://test:test@localhost:5432/test")
|
||||
os.environ.setdefault("JWT_SECRET", "test-secret-not-for-prod")
|
||||
os.environ.setdefault("APP_MODE", "dev")
|
||||
os.environ.setdefault("APP_ROLE", "gateway")
|
||||
0
tests/fixtures/__init__.py
vendored
0
tests/fixtures/__init__.py
vendored
76
tests/fixtures/jimi_payloads.py
vendored
76
tests/fixtures/jimi_payloads.py
vendored
|
|
@ -1,76 +0,0 @@
|
|||
"""Synthetic Tracksolid wire payloads.
|
||||
|
||||
These match the field names Tracksolid sends on the wire. Real sandbox-account
|
||||
fixtures will replace these in week 1 day 1 (see Phase-1 plan, week 2 step 1).
|
||||
"""
|
||||
|
||||
PUSH_GPS_SINGLE = {
|
||||
"deviceImei": "860112050000001",
|
||||
"gpsTime": "2026-05-22 12:34:56",
|
||||
"lat": -1.2864,
|
||||
"lng": 36.8172,
|
||||
"gpsSpeed": 42.5,
|
||||
"direction": 90,
|
||||
"acc": "1",
|
||||
"satelliteNum": 12,
|
||||
"altitude": 1795.0,
|
||||
"postType": 0,
|
||||
}
|
||||
|
||||
PUSH_ALARM_SINGLE = {
|
||||
"imei": "860112050000001",
|
||||
"alarmType": "panic",
|
||||
"alarmName": "Panic button",
|
||||
"alarmTime": "2026-05-22 12:35:10",
|
||||
"lat": -1.2864,
|
||||
"lng": 36.8172,
|
||||
"speed": 0.0,
|
||||
"deviceName": "FRSD-001",
|
||||
}
|
||||
|
||||
PUSH_HEARTBEAT_SINGLE = {
|
||||
"deviceImei": "860112050000001",
|
||||
"gateTime": "2026-05-22 12:30:00",
|
||||
"powerLevel": 88,
|
||||
"gsmSign": 27,
|
||||
"acc": 1,
|
||||
"powerStatus": 1,
|
||||
}
|
||||
|
||||
PUSH_EVENT_LOGIN = {
|
||||
"deviceImei": "860112050000001",
|
||||
"type": "LOGIN",
|
||||
"gateTime": "2026-05-22 07:00:00",
|
||||
"timezone": "+03:00",
|
||||
}
|
||||
|
||||
POLL_LOCATION_LIST_RESPONSE = {
|
||||
"_list": [
|
||||
{
|
||||
"deviceImei": "860112050000001",
|
||||
"gpsTime": "2026-05-22 12:34:56",
|
||||
"lat": -1.2864,
|
||||
"lng": 36.8172,
|
||||
"gpsSpeed": 42.5,
|
||||
"direction": 90,
|
||||
"satelliteNum": 12,
|
||||
},
|
||||
{
|
||||
"deviceImei": "860112050000002",
|
||||
"gpsTime": "2026-05-22 12:34:50",
|
||||
"lat": -0.3476,
|
||||
"lng": 32.5825,
|
||||
"gpsSpeed": 0.0,
|
||||
"direction": 0,
|
||||
"satelliteNum": 9,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
ZERO_ISLAND_FIX = {
|
||||
"deviceImei": "860112050000001",
|
||||
"gpsTime": "2026-05-22 12:00:00",
|
||||
"lat": 0.0,
|
||||
"lng": 0.0,
|
||||
"gpsSpeed": 0.0,
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
from app.auth import (
|
||||
decode_access_token,
|
||||
hash_password,
|
||||
issue_access_token,
|
||||
issue_refresh_token,
|
||||
verify_password,
|
||||
)
|
||||
|
||||
|
||||
def test_bcrypt_roundtrip() -> None:
|
||||
h = hash_password("hunter2-correct-horse-battery")
|
||||
assert verify_password("hunter2-correct-horse-battery", h)
|
||||
assert not verify_password("wrong", h)
|
||||
|
||||
|
||||
def test_access_token_carries_scopes_and_decodes() -> None:
|
||||
token, ttl = issue_access_token(42, ["read:fleet"])
|
||||
assert ttl > 0
|
||||
claims = decode_access_token(token)
|
||||
assert claims["sub"] == "42"
|
||||
assert claims["scopes"] == ["read:fleet"]
|
||||
assert claims["typ"] == "access"
|
||||
|
||||
|
||||
def test_decode_rejects_tampered_token() -> None:
|
||||
token, _ = issue_access_token(1, ["read:fleet"])
|
||||
tampered = token[:-1] + ("A" if token[-1] != "A" else "B")
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
decode_access_token(tampered)
|
||||
assert exc_info.value.status_code == 401
|
||||
|
||||
|
||||
def test_refresh_token_returns_distinct_raw_and_hash() -> None:
|
||||
raw, expires_at, token_hash = issue_refresh_token(account_id=1)
|
||||
assert raw != token_hash
|
||||
assert len(token_hash) == 64
|
||||
assert expires_at.tzinfo is not None
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
import pytest
|
||||
|
||||
from app.parsers.jimi import UnsupportedMsgType, parse_raw
|
||||
from tests.fixtures.jimi_payloads import (
|
||||
POLL_LOCATION_LIST_RESPONSE,
|
||||
PUSH_ALARM_SINGLE,
|
||||
PUSH_EVENT_LOGIN,
|
||||
PUSH_GPS_SINGLE,
|
||||
PUSH_HEARTBEAT_SINGLE,
|
||||
ZERO_ISLAND_FIX,
|
||||
)
|
||||
|
||||
|
||||
def test_push_gps_single_produces_one_position_fix() -> None:
|
||||
events = parse_raw("tracksolid_push", "pushgps", PUSH_GPS_SINGLE, account_id="acct-1")
|
||||
assert len(events) == 1
|
||||
ev = events[0]
|
||||
assert ev.kind == "position_fix"
|
||||
assert ev.imei == "860112050000001"
|
||||
assert ev.account_id == "acct-1"
|
||||
assert ev.payload["lat"] == -1.2864
|
||||
assert ev.payload["speed_kmh"] == 42.5
|
||||
|
||||
|
||||
def test_zero_island_is_dropped() -> None:
|
||||
events = parse_raw("tracksolid_push", "pushgps", ZERO_ISLAND_FIX, account_id="acct-1")
|
||||
assert events == []
|
||||
|
||||
|
||||
def test_push_alarm_produces_alarm_and_crossfeed_position() -> None:
|
||||
events = parse_raw("tracksolid_push", "pushalarm", PUSH_ALARM_SINGLE, account_id="acct-1")
|
||||
kinds = sorted(e.kind for e in events)
|
||||
assert kinds == ["alarm", "position_fix"]
|
||||
|
||||
|
||||
def test_push_heartbeat_kind() -> None:
|
||||
events = parse_raw("tracksolid_push", "pushhb", PUSH_HEARTBEAT_SINGLE, account_id="acct-1")
|
||||
assert len(events) == 1
|
||||
assert events[0].kind == "heartbeat"
|
||||
assert events[0].payload["power_level"] == 88
|
||||
|
||||
|
||||
def test_push_event_login() -> None:
|
||||
events = parse_raw("tracksolid_push", "pushevent", PUSH_EVENT_LOGIN, account_id="acct-1")
|
||||
assert len(events) == 1
|
||||
assert events[0].kind == "device_login"
|
||||
|
||||
|
||||
def test_poll_location_list_yields_one_event_per_device() -> None:
|
||||
events = parse_raw(
|
||||
"tracksolid_poll_list", None, POLL_LOCATION_LIST_RESPONSE, account_id="acct-1"
|
||||
)
|
||||
assert len(events) == 2
|
||||
assert {e.imei for e in events} == {"860112050000001", "860112050000002"}
|
||||
assert all(e.kind == "position_fix" for e in events)
|
||||
|
||||
|
||||
def test_unsupported_msg_type_raises() -> None:
|
||||
with pytest.raises(UnsupportedMsgType):
|
||||
parse_raw("tracksolid_push", "pushobd", {}, account_id="acct-1")
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
from app.config import get_settings
|
||||
from app.entrypoints.gateway import app
|
||||
|
||||
|
||||
def test_settings_load() -> None:
|
||||
settings = get_settings()
|
||||
assert settings.app_role in {"gateway", "worker", "cron"}
|
||||
assert settings.jwt_secret
|
||||
|
||||
|
||||
def test_create_app_importable() -> None:
|
||||
assert app.title.startswith("fleet-platform")
|
||||
|
|
@ -1,257 +0,0 @@
|
|||
// fleet-core.js
|
||||
// Shared client primitives for the fleet-platform dashboards.
|
||||
// MapLibre is the only external dependency; everything else is vanilla ES modules.
|
||||
//
|
||||
// All business logic lives server-side (PRD §8, arch §7). This module:
|
||||
// - authClient : token cache + login + apiFetch wrapper
|
||||
// - initMap : MapLibre instance + vehicle layer
|
||||
// - renderView : ingest {summary, geojson, slo_status} -> DOM + map
|
||||
// - initFilters : form-driven filter UI
|
||||
// - clockEAT : ticks the EAT clock element
|
||||
|
||||
const API_BASE = '';
|
||||
const STORAGE_ACCESS = 'fleet.accessToken';
|
||||
const STORAGE_REFRESH = 'fleet.refreshToken';
|
||||
const STORAGE_EXPIRES = 'fleet.expiresAt';
|
||||
|
||||
/* ---------- authClient ---------- */
|
||||
|
||||
export const authClient = {
|
||||
isAuthenticated() {
|
||||
const expiresAt = Number(localStorage.getItem(STORAGE_EXPIRES) || 0);
|
||||
return localStorage.getItem(STORAGE_ACCESS) !== null && Date.now() < expiresAt * 1000;
|
||||
},
|
||||
|
||||
async login(username, password) {
|
||||
const body = new URLSearchParams({ username, password });
|
||||
const res = await fetch(`${API_BASE}/api/auth/token`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
body,
|
||||
});
|
||||
if (!res.ok) {
|
||||
const detail = await res.json().catch(() => ({ detail: 'login failed' }));
|
||||
throw new Error(detail.detail || 'login failed');
|
||||
}
|
||||
const payload = await res.json();
|
||||
const expiresAt = Math.floor(Date.now() / 1000) + Number(payload.expires_in || 900);
|
||||
localStorage.setItem(STORAGE_ACCESS, payload.access_token);
|
||||
localStorage.setItem(STORAGE_REFRESH, payload.refresh_token);
|
||||
localStorage.setItem(STORAGE_EXPIRES, String(expiresAt));
|
||||
},
|
||||
|
||||
logout() {
|
||||
localStorage.removeItem(STORAGE_ACCESS);
|
||||
localStorage.removeItem(STORAGE_REFRESH);
|
||||
localStorage.removeItem(STORAGE_EXPIRES);
|
||||
},
|
||||
|
||||
requireSession({ loginPath = '/login.html' } = {}) {
|
||||
if (!this.isAuthenticated()) {
|
||||
window.location.href = loginPath;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
};
|
||||
|
||||
export async function apiFetch(path, { params, ...opts } = {}) {
|
||||
const url = new URL(path, window.location.origin);
|
||||
if (params) {
|
||||
for (const [k, v] of Object.entries(params)) {
|
||||
if (v !== undefined && v !== null && v !== '') {
|
||||
url.searchParams.set(k, typeof v === 'string' ? v : JSON.stringify(v));
|
||||
}
|
||||
}
|
||||
}
|
||||
const token = localStorage.getItem(STORAGE_ACCESS);
|
||||
const res = await fetch(url.toString(), {
|
||||
...opts,
|
||||
headers: {
|
||||
...(opts.headers || {}),
|
||||
...(token ? { Authorization: `Bearer ${token}` } : {}),
|
||||
Accept: 'application/json',
|
||||
},
|
||||
});
|
||||
if (res.status === 401) {
|
||||
authClient.logout();
|
||||
window.location.href = '/login.html';
|
||||
throw new Error('unauthorized');
|
||||
}
|
||||
if (!res.ok) {
|
||||
throw new Error(`${res.status} ${res.statusText}`);
|
||||
}
|
||||
return res.json();
|
||||
}
|
||||
|
||||
/* ---------- map ---------- */
|
||||
|
||||
const STYLE_CLASS_COLORS = {
|
||||
'vehicle-moving': '#10b981',
|
||||
'vehicle-parked': '#3b82f6',
|
||||
'vehicle-offline': '#9ca3af',
|
||||
};
|
||||
|
||||
const VEHICLE_SOURCE = 'vehicles';
|
||||
const VEHICLE_LAYER = 'vehicles-circle';
|
||||
|
||||
export function initMap(elementId, opts = {}) {
|
||||
const center = opts.center || [36.8172, -1.2864]; // Nairobi
|
||||
const zoom = opts.zoom ?? 7;
|
||||
const styleUrl =
|
||||
opts.styleUrl ||
|
||||
'https://basemaps.cartocdn.com/gl/voyager-gl-style/style.json';
|
||||
|
||||
// eslint-disable-next-line no-undef
|
||||
const map = new maplibregl.Map({
|
||||
container: elementId,
|
||||
style: styleUrl,
|
||||
center,
|
||||
zoom,
|
||||
attributionControl: true,
|
||||
});
|
||||
|
||||
map.on('load', () => {
|
||||
map.addSource(VEHICLE_SOURCE, {
|
||||
type: 'geojson',
|
||||
data: { type: 'FeatureCollection', features: [] },
|
||||
});
|
||||
map.addLayer({
|
||||
id: VEHICLE_LAYER,
|
||||
type: 'circle',
|
||||
source: VEHICLE_SOURCE,
|
||||
paint: {
|
||||
'circle-radius': 7,
|
||||
'circle-color': [
|
||||
'match',
|
||||
['get', 'style_class'],
|
||||
'vehicle-moving', STYLE_CLASS_COLORS['vehicle-moving'],
|
||||
'vehicle-parked', STYLE_CLASS_COLORS['vehicle-parked'],
|
||||
'vehicle-offline', STYLE_CLASS_COLORS['vehicle-offline'],
|
||||
'#6b7280',
|
||||
],
|
||||
'circle-stroke-color': '#ffffff',
|
||||
'circle-stroke-width': 1.5,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
map.on('click', VEHICLE_LAYER, (e) => {
|
||||
if (!e.features || !e.features[0]) return;
|
||||
const p = e.features[0].properties;
|
||||
// eslint-disable-next-line no-undef
|
||||
new maplibregl.Popup()
|
||||
.setLngLat(e.features[0].geometry.coordinates)
|
||||
.setHTML(_popupHtml(p))
|
||||
.addTo(map);
|
||||
});
|
||||
|
||||
map.on('mouseenter', VEHICLE_LAYER, () => (map.getCanvas().style.cursor = 'pointer'));
|
||||
map.on('mouseleave', VEHICLE_LAYER, () => (map.getCanvas().style.cursor = ''));
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
function _popupHtml(props) {
|
||||
const safe = (s) => String(s ?? '').replace(/[&<>"']/g, (c) => `&#${c.charCodeAt(0)};`);
|
||||
return `
|
||||
<div style="font-family:system-ui,sans-serif;font-size:13px;line-height:1.4">
|
||||
<div><strong>${safe(props.plate)}</strong> · ${safe(props.operational_state)}</div>
|
||||
<div>Cost centre: ${safe(props.cost_centre || '—')}</div>
|
||||
<div>City: ${safe(props.assigned_city || '—')}</div>
|
||||
<div>Speed: ${props.speed_kmh ?? '—'} km/h</div>
|
||||
<div>Last fix: ${safe(props.occurred_at)}</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
/* ---------- render ---------- */
|
||||
|
||||
export function renderView(map, payload, { summaryRoot, sloRoot } = {}) {
|
||||
if (!payload || !payload.geojson) return;
|
||||
|
||||
const src = map.getSource(VEHICLE_SOURCE);
|
||||
if (src) src.setData(payload.geojson);
|
||||
|
||||
if (summaryRoot) _renderSummary(summaryRoot, payload.summary || {});
|
||||
if (sloRoot) _renderSlos(sloRoot, payload.slo_status || {});
|
||||
}
|
||||
|
||||
function _renderSummary(root, summary) {
|
||||
const tiles = [
|
||||
{ label: 'Active', value: summary.total_active ?? '—' },
|
||||
{ label: 'Moving', value: summary.moving ?? '—' },
|
||||
{ label: 'Parked', value: summary.parked ?? '—' },
|
||||
{ label: 'Offline', value: summary.offline ?? '—' },
|
||||
{ label: 'Below freshness SLO', value: summary.below_freshness_slo ?? '—' },
|
||||
];
|
||||
root.innerHTML = tiles
|
||||
.map(
|
||||
(t) => `
|
||||
<div class="tile">
|
||||
<div class="tile-label">${t.label}</div>
|
||||
<div class="tile-value">${t.value}</div>
|
||||
</div>`,
|
||||
)
|
||||
.join('');
|
||||
}
|
||||
|
||||
function _renderSlos(root, slos) {
|
||||
const entries = Object.entries(slos);
|
||||
if (entries.length === 0) {
|
||||
root.innerHTML = '<div class="slo-empty">SLO data not yet available</div>';
|
||||
return;
|
||||
}
|
||||
root.innerHTML = entries
|
||||
.map(([metric, info]) => {
|
||||
const status = info.status || 'unknown';
|
||||
const current = info.current ?? '—';
|
||||
const threshold = info.threshold ?? '—';
|
||||
return `
|
||||
<div class="slo slo-${status}">
|
||||
<span class="slo-name">${metric}</span>
|
||||
<span class="slo-value">${current} / ${threshold}</span>
|
||||
<span class="slo-status">${status}</span>
|
||||
</div>`;
|
||||
})
|
||||
.join('');
|
||||
}
|
||||
|
||||
/* ---------- filters ---------- */
|
||||
|
||||
export function initFilters(formEl, onChange) {
|
||||
const handler = () => {
|
||||
const fd = new FormData(formEl);
|
||||
const filters = {};
|
||||
for (const [k, v] of fd.entries()) {
|
||||
if (v) filters[k] = v;
|
||||
}
|
||||
onChange(filters);
|
||||
};
|
||||
formEl.addEventListener('change', handler);
|
||||
formEl.addEventListener('submit', (e) => {
|
||||
e.preventDefault();
|
||||
handler();
|
||||
});
|
||||
}
|
||||
|
||||
/* ---------- clockEAT ---------- */
|
||||
|
||||
export function clockEAT(elementId) {
|
||||
const el = document.getElementById(elementId);
|
||||
if (!el) return;
|
||||
const tick = () => {
|
||||
const now = new Date();
|
||||
const eat = new Date(now.getTime());
|
||||
const fmt = new Intl.DateTimeFormat('en-GB', {
|
||||
timeZone: 'Africa/Nairobi',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
second: '2-digit',
|
||||
hour12: false,
|
||||
});
|
||||
el.textContent = `${fmt.format(eat)} EAT`;
|
||||
};
|
||||
tick();
|
||||
setInterval(tick, 1000);
|
||||
}
|
||||
|
|
@ -1,111 +0,0 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>Live Fleet · rahamafresh</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/maplibre-gl@4.7.1/dist/maplibre-gl.css" />
|
||||
<style>
|
||||
:root {
|
||||
--bg: #0f172a;
|
||||
--panel: #1e293b;
|
||||
--text: #f1f5f9;
|
||||
--muted: #94a3b8;
|
||||
--accent: #10b981;
|
||||
--warn: #f59e0b;
|
||||
--bad: #ef4444;
|
||||
}
|
||||
* { box-sizing: border-box; }
|
||||
html, body { margin: 0; height: 100%; background: var(--bg); color: var(--text);
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, system-ui, sans-serif; }
|
||||
body { display: grid; grid-template-rows: auto 1fr; }
|
||||
header {
|
||||
display: flex; align-items: center; justify-content: space-between;
|
||||
padding: 10px 16px; background: var(--panel); border-bottom: 1px solid #0b1220;
|
||||
}
|
||||
header h1 { margin: 0; font-size: 14px; font-weight: 600; letter-spacing: 0.06em; text-transform: uppercase; color: var(--muted); }
|
||||
header .right { display: flex; gap: 16px; align-items: center; font-size: 13px; color: var(--muted); }
|
||||
main { display: grid; grid-template-columns: 320px 1fr; min-height: 0; }
|
||||
aside { padding: 12px; overflow: auto; border-right: 1px solid #0b1220; background: var(--panel); }
|
||||
#map { width: 100%; height: 100%; }
|
||||
.tile-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 8px; margin-bottom: 16px; }
|
||||
.tile { background: #0b1220; padding: 10px; border-radius: 6px; }
|
||||
.tile-label { font-size: 11px; text-transform: uppercase; color: var(--muted); letter-spacing: 0.04em; }
|
||||
.tile-value { font-size: 22px; font-weight: 600; margin-top: 2px; }
|
||||
h3 { font-size: 11px; text-transform: uppercase; color: var(--muted); margin: 16px 0 8px; letter-spacing: 0.06em; }
|
||||
.slo { display: grid; grid-template-columns: 1fr auto auto; gap: 6px; padding: 6px 8px; border-radius: 4px; font-size: 12px; margin-bottom: 4px; background: #0b1220; }
|
||||
.slo-name { color: var(--muted); }
|
||||
.slo-status { text-transform: uppercase; font-size: 10px; letter-spacing: 0.06em; }
|
||||
.slo-green .slo-status { color: var(--accent); }
|
||||
.slo-red .slo-status { color: var(--bad); }
|
||||
.slo-unknown .slo-status { color: var(--muted); }
|
||||
.slo-empty { color: var(--muted); font-size: 12px; }
|
||||
form.filters { display: grid; gap: 8px; }
|
||||
form.filters input, form.filters select { width: 100%; background: #0b1220; color: var(--text); border: 1px solid #0b1220; border-radius: 4px; padding: 6px 8px; font-size: 12px; }
|
||||
form.filters label { font-size: 11px; text-transform: uppercase; color: var(--muted); letter-spacing: 0.06em; }
|
||||
button.logout { background: transparent; color: var(--muted); border: 1px solid var(--muted); padding: 4px 10px; border-radius: 4px; cursor: pointer; font-size: 12px; }
|
||||
button.logout:hover { color: var(--text); border-color: var(--text); }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>fleet-platform · live</h1>
|
||||
<div class="right">
|
||||
<span id="clock"></span>
|
||||
<button class="logout" onclick="window.fleetLogout()">Sign out</button>
|
||||
</div>
|
||||
</header>
|
||||
<main>
|
||||
<aside>
|
||||
<h3>Fleet now</h3>
|
||||
<div id="summary" class="tile-grid"></div>
|
||||
|
||||
<h3>SLOs</h3>
|
||||
<div id="slos"></div>
|
||||
|
||||
<h3>Filters</h3>
|
||||
<form class="filters" id="filters">
|
||||
<label for="f-cost">Cost centre</label>
|
||||
<input id="f-cost" name="cost_centre" placeholder="e.g. Nairobi-North" />
|
||||
<label for="f-city">Assigned city</label>
|
||||
<input id="f-city" name="assigned_city" placeholder="e.g. Nairobi" />
|
||||
</form>
|
||||
</aside>
|
||||
<div id="map"></div>
|
||||
</main>
|
||||
|
||||
<script src="https://unpkg.com/maplibre-gl@4.7.1/dist/maplibre-gl.js"></script>
|
||||
<script type="module">
|
||||
import { authClient, apiFetch, initMap, renderView, initFilters, clockEAT } from '/fleet-core.js';
|
||||
|
||||
if (!authClient.requireSession()) { /* redirected */ }
|
||||
|
||||
window.fleetLogout = () => { authClient.logout(); window.location.href = '/login.html'; };
|
||||
|
||||
clockEAT('clock');
|
||||
|
||||
const map = initMap('map');
|
||||
const summaryEl = document.getElementById('summary');
|
||||
const slosEl = document.getElementById('slos');
|
||||
let currentFilters = {};
|
||||
|
||||
async function refresh() {
|
||||
try {
|
||||
const params = Object.keys(currentFilters).length ? { filters: currentFilters } : {};
|
||||
const payload = await apiFetch('/api/views/live', { params });
|
||||
renderView(map, payload, { summaryRoot: summaryEl, sloRoot: slosEl });
|
||||
} catch (err) {
|
||||
console.error('refresh.failed', err);
|
||||
}
|
||||
}
|
||||
|
||||
initFilters(document.getElementById('filters'), (filters) => {
|
||||
currentFilters = filters;
|
||||
refresh();
|
||||
});
|
||||
|
||||
refresh();
|
||||
setInterval(refresh, 15000);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>Sign in · fleet-platform</title>
|
||||
<style>
|
||||
:root {
|
||||
--bg: #0f172a;
|
||||
--panel: #1e293b;
|
||||
--text: #f1f5f9;
|
||||
--muted: #94a3b8;
|
||||
--accent: #10b981;
|
||||
--bad: #ef4444;
|
||||
}
|
||||
* { box-sizing: border-box; }
|
||||
html, body { margin: 0; height: 100%; background: var(--bg); color: var(--text);
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, system-ui, sans-serif; }
|
||||
body { display: grid; place-items: center; }
|
||||
.card { background: var(--panel); padding: 28px 32px; border-radius: 10px; width: 320px; }
|
||||
h1 { margin: 0 0 4px; font-size: 14px; text-transform: uppercase; letter-spacing: 0.08em; color: var(--muted); }
|
||||
h2 { margin: 0 0 24px; font-size: 22px; font-weight: 600; }
|
||||
label { display: block; font-size: 11px; text-transform: uppercase; color: var(--muted); letter-spacing: 0.06em; margin: 12px 0 4px; }
|
||||
input { width: 100%; background: #0b1220; color: var(--text); border: 1px solid #0b1220; border-radius: 5px; padding: 9px 10px; font-size: 14px; }
|
||||
button { width: 100%; margin-top: 18px; background: var(--accent); color: #062a1f; border: 0; border-radius: 5px; padding: 10px; font-size: 14px; font-weight: 600; cursor: pointer; }
|
||||
button:disabled { opacity: 0.6; cursor: progress; }
|
||||
.error { margin-top: 12px; color: var(--bad); font-size: 12px; min-height: 1em; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<form class="card" id="login">
|
||||
<h1>fleet-platform</h1>
|
||||
<h2>Sign in</h2>
|
||||
|
||||
<label for="username">Username</label>
|
||||
<input id="username" name="username" autocomplete="username" required />
|
||||
|
||||
<label for="password">Password</label>
|
||||
<input id="password" name="password" type="password" autocomplete="current-password" required />
|
||||
|
||||
<button type="submit" id="submit">Sign in</button>
|
||||
<div class="error" id="error"></div>
|
||||
</form>
|
||||
|
||||
<script type="module">
|
||||
import { authClient } from '/fleet-core.js';
|
||||
|
||||
if (authClient.isAuthenticated()) {
|
||||
window.location.href = '/index-live.html';
|
||||
}
|
||||
|
||||
const form = document.getElementById('login');
|
||||
const btn = document.getElementById('submit');
|
||||
const errEl = document.getElementById('error');
|
||||
|
||||
form.addEventListener('submit', async (e) => {
|
||||
e.preventDefault();
|
||||
errEl.textContent = '';
|
||||
btn.disabled = true;
|
||||
try {
|
||||
const username = document.getElementById('username').value.trim();
|
||||
const password = document.getElementById('password').value;
|
||||
await authClient.login(username, password);
|
||||
window.location.href = '/index-live.html';
|
||||
} catch (err) {
|
||||
errEl.textContent = err.message || 'Sign in failed';
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Reference in a new issue