From 6ef4e3fd53d42986cce18128911f538cd6cb9f9a Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Tue, 20 Feb 2024 10:10:51 +0100 Subject: [PATCH 1/4] Optimize shapely operations --- api/v1/tile.py | 52 +++++++++++++++++++++++------------- models/aed_group.py | 18 ++++++++----- models/bbox.py | 65 +++++++++++++++++++++++++-------------------- states/aed_state.py | 23 +++++++++------- 4 files changed, 94 insertions(+), 64 deletions(-) diff --git a/api/v1/tile.py b/api/v1/tile.py index 031deac..bc6fd8c 100644 --- a/api/v1/tile.py +++ b/api/v1/tile.py @@ -1,5 +1,4 @@ from collections.abc import Sequence -from itertools import chain from math import atan, degrees, pi, sinh from typing import Annotated @@ -8,8 +7,7 @@ from anyio import create_task_group from fastapi import APIRouter, Path, Response from sentry_sdk import start_span, trace -from shapely import Point -from shapely.ops import transform +from shapely import get_coordinates, points, set_coordinates from config import ( DEFAULT_CACHE_MAX_AGE, @@ -33,17 +31,18 @@ router = APIRouter() -def _tile_to_point(z: int, x: int, y: int) -> Point: +def _tile_to_point(z: int, x: int, y: int) -> tuple[float, float]: n = 2**z lon_deg = x / n * 360.0 - 180.0 lat_rad = atan(sinh(pi * (1 - 2 * y / n))) lat_deg = degrees(lat_rad) - return Point(lon_deg, lat_deg) + return lon_deg, lat_deg def _tile_to_bbox(z: int, x: int, y: int) -> BBox: - p1 = _tile_to_point(z, x, y + 1) - p2 = _tile_to_point(z, x + 1, y) + p1_coords = _tile_to_point(z, x, y + 1) + p2_coords = _tile_to_point(z, x + 1, y) + p1, p2 = points((p1_coords, p2_coords)) return BBox(p1, p2) @@ -74,22 +73,37 @@ def _mvt_rescale(x: float, y: float, x_min: float, y_min: float, x_span: float, return x_scaled, y_scaled -def _mvt_encode(bbox: BBox, data: Sequence[dict]) -> bytes: - x_min, y_min = MVT_TRANSFORMER.transform(bbox.p1.x, bbox.p1.y) - x_max, y_max = MVT_TRANSFORMER.transform(bbox.p2.x, bbox.p2.y) - x_span = x_max - x_min - y_span = y_max - y_min - +def _mvt_encode(bbox: BBox, layers: Sequence[dict]) -> bytes: with start_span(description='Transforming MVT geometry'): - for feature in chain.from_iterable(d['features'] for d in data): - feature['geometry'] = transform( - func=lambda x, y: _mvt_rescale(x, y, x_min, y_min, x_span, y_span), - geom=feature['geometry'], - ) + bbox_coords = np.asarray((get_coordinates(bbox.p1)[0], get_coordinates(bbox.p2)[0])) + bbox_coords = np.asarray(MVT_TRANSFORMER.transform(bbox_coords[:, 0], bbox_coords[:, 1])).T + span = bbox_coords[1] - bbox_coords[0] + + coords_range = [] + coords = [] + + for layer in layers: + for feature in layer['features']: + feature_coords = get_coordinates(feature['geometry']) + coords_len = len(coords) + coords_range.append((coords_len, coords_len + len(feature_coords))) + coords.extend(feature_coords) + + coords = np.asarray(coords) + coords = np.asarray(MVT_TRANSFORMER.transform(coords[:, 0], coords[:, 1])).T + coords = np.rint((coords - bbox_coords[0]) / span * MVT_EXTENT).astype(int) + + i = 0 + for layer in layers: + for feature in layer['features']: + feature_coords_range = coords_range[i] + feature_coords = coords[feature_coords_range[0] : feature_coords_range[1]] + feature['geometry'] = set_coordinates(feature['geometry'], feature_coords) + i += 1 with start_span(description='Encoding MVT'): return mvt.encode( - data, + layers, default_options={ 'extents': MVT_EXTENT, 'check_winding_order': False, diff --git a/models/aed_group.py b/models/aed_group.py index 0108c1d..327cbad 100644 --- a/models/aed_group.py +++ b/models/aed_group.py @@ -21,14 +21,18 @@ def decide_access(accesses: Iterable[str]) -> str: 'no': 5, } - min_access = '', float('inf') + min_access = '' + min_tier = float('inf') for access in accesses: - if access == 'yes': - return 'yes' # early stopping + tier = tiered.get(access) - tier = tiered.get(access, float('inf')) - if tier < min_access[1]: - min_access = access, tier + if (tier is not None) and (tier < min_tier): + min_access = access + min_tier = tier - return min_access[0] + # early stopping + if min_tier == 0: + break + + return min_access diff --git a/models/bbox.py b/models/bbox.py index 6699274..0cefe4f 100644 --- a/models/bbox.py +++ b/models/bbox.py @@ -1,7 +1,7 @@ from typing import NamedTuple, Self import numpy as np -from shapely import Point +from shapely import Point, get_coordinates, points from shapely.geometry import Polygon @@ -10,47 +10,51 @@ class BBox(NamedTuple): p2: Point def extend(self, percentage: float) -> Self: - lon_span = self.p2.x - self.p1.x - lat_span = self.p2.y - self.p1.y - lon_delta = lon_span * percentage - lat_delta = lat_span * percentage + p1_coords = get_coordinates(self.p1)[0] + p2_coords = get_coordinates(self.p2)[0] - new_p1_lon = max(-180, min(180, self.p1.x - lon_delta)) - new_p1_lat = max(-90, min(90, self.p1.y - lat_delta)) - new_p2_lon = max(-180, min(180, self.p2.x + lon_delta)) - new_p2_lat = max(-90, min(90, self.p2.y + lat_delta)) + spans = p2_coords - p1_coords + deltas = spans * percentage - return BBox( - p1=Point(new_p1_lon, new_p1_lat), - p2=Point(new_p2_lon, new_p2_lat), - ) + p1_coords = np.clip(p1_coords - deltas, [-180, -90], [180, 90]) + p2_coords = np.clip(p2_coords + deltas, [-180, -90], [180, 90]) + + p1, p2 = points((p1_coords, p2_coords)) + return BBox(p1, p2) @classmethod def from_tuple(cls, bbox: tuple[float, float, float, float]) -> Self: - return cls(Point(bbox[0], bbox[1]), Point(bbox[2], bbox[3])) + p1, p2 = points(((bbox[0], bbox[1]), (bbox[2], bbox[3]))) + return cls(p1, p2) def to_tuple(self) -> tuple[float, float, float, float]: - return (self.p1.x, self.p1.y, self.p2.x, self.p2.y) + p1_x, p1_y = get_coordinates(self.p1)[0] + p2_x, p2_y = get_coordinates(self.p2)[0] + + return (p1_x, p1_y, p2_x, p2_y) def to_polygon(self, *, nodes_per_edge: int = 2) -> Polygon: + p1_x, p1_y = get_coordinates(self.p1)[0] + p2_x, p2_y = get_coordinates(self.p2)[0] + if nodes_per_edge <= 2: return Polygon( - [ - (self.p1.x, self.p1.y), - (self.p2.x, self.p1.y), - (self.p2.x, self.p2.y), - (self.p1.x, self.p2.y), - (self.p1.x, self.p1.y), - ] + ( + (p1_x, p1_y), + (p2_x, p1_y), + (p2_x, p2_y), + (p1_x, p2_y), + (p1_x, p1_y), + ) ) - x_vals = np.linspace(self.p1.x, self.p2.x, nodes_per_edge) - y_vals = np.linspace(self.p1.y, self.p2.y, nodes_per_edge) + x_vals = np.linspace(p1_x, p2_x, nodes_per_edge) + y_vals = np.linspace(p1_y, p2_y, nodes_per_edge) - bottom_edge = np.column_stack((x_vals, np.full(nodes_per_edge, self.p1.y))) - top_edge = np.column_stack((x_vals, np.full(nodes_per_edge, self.p2.y))) - left_edge = np.column_stack((np.full(nodes_per_edge - 2, self.p1.x), y_vals[1:-1])) - right_edge = np.column_stack((np.full(nodes_per_edge - 2, self.p2.x), y_vals[1:-1])) + bottom_edge = np.column_stack((x_vals, np.full(nodes_per_edge, p1_y))) + top_edge = np.column_stack((x_vals, np.full(nodes_per_edge, p2_y))) + left_edge = np.column_stack((np.full(nodes_per_edge - 2, p1_x), y_vals[1:-1])) + right_edge = np.column_stack((np.full(nodes_per_edge - 2, p2_x), y_vals[1:-1])) all_coords = np.concatenate((bottom_edge, right_edge, top_edge[::-1], left_edge[::-1])) @@ -58,6 +62,9 @@ def to_polygon(self, *, nodes_per_edge: int = 2) -> Polygon: def correct_for_dateline(self) -> tuple[Self, ...]: if self.p1.x > self.p2.x: - return (BBox(self.p1, Point(180, self.p2.y)), BBox(Point(-180, self.p1.y), self.p2)) + b1_p1 = self.p1 + b2_p2 = self.p2 + b1_p2, b2_p1 = points(((180, self.p2.y), (-180, self.p1.y))) + return (BBox(b1_p1, b1_p2), BBox(b2_p1, b2_p2)) else: return (self,) diff --git a/states/aed_state.py b/states/aed_state.py index 8879b6a..7c5c780 100644 --- a/states/aed_state.py +++ b/states/aed_state.py @@ -8,7 +8,7 @@ from cachetools import TTLCache from pymongo import DeleteOne, ReplaceOne, UpdateOne from sentry_sdk import start_span, start_transaction, trace -from shapely import Point +from shapely import Point, points from shapely.geometry import mapping from shapely.geometry.base import BaseGeometry from sklearn.cluster import Birch @@ -238,12 +238,16 @@ async def get_aed_by_id(aed_id: int) -> AED | None: @trace async def get_all_aeds(filter: dict | None = None) -> Sequence[AED]: cursor = AED_COLLECTION.find(filter, projection={'_id': False}) - result = [] + docs = await cursor.to_list(None) + coords = tuple(doc['position']['coordinates'] for doc in docs) + positions = points(coords) - async for doc in cursor: - doc['position'] = geometry_validator(doc['position']) + result = [None] * len(docs) + + for i, doc, position in zip(range(len(docs)), docs, positions, strict=True): + doc['position'] = position aed = AED.model_construct(**doc) - result.append(aed) + result[i] = aed return result @@ -264,16 +268,17 @@ async def get_aeds_within_geom(cls, geometry: BaseGeometry, group_eps: float | N max_fit_samples = 7000 if len(positions) > max_fit_samples: indices = np.linspace(0, len(positions), max_fit_samples, endpoint=False, dtype=int) - fit_positions = np.array(positions)[indices] + fit_positions = np.asarray(positions)[indices] else: fit_positions = positions with start_span(description=f'Fitting model with {len(fit_positions)} samples'): model = Birch(threshold=group_eps, n_clusters=None, compute_labels=False, copy=False) model.fit(fit_positions) + center_points = points(model.subcluster_centers_) with start_span(description=f'Processing {len(aeds)} samples'): - cluster_groups: tuple[list[AED]] = tuple([] for _ in range(len(model.subcluster_centers_))) + cluster_groups: tuple[list[AED]] = tuple([] for _ in range(len(center_points))) result: list[AED | AEDGroup] = [] with start_span(description='Clustering'): @@ -282,7 +287,7 @@ async def get_aeds_within_geom(cls, geometry: BaseGeometry, group_eps: float | N for aed, cluster in zip(aeds, clusters, strict=True): cluster_groups[cluster].append(aed) - for group, center in zip(cluster_groups, model.subcluster_centers_, strict=True): + for group, center_point in zip(cluster_groups, center_points, strict=True): if len(group) == 0: continue if len(group) == 1: @@ -291,7 +296,7 @@ async def get_aeds_within_geom(cls, geometry: BaseGeometry, group_eps: float | N result.append( AEDGroup( - position=Point(center[0], center[1]), + position=center_point, count=len(group), access=AEDGroup.decide_access(aed.access for aed in group), ) From 949e4d23f16521d5678559ca6248a8dcfaa989bb Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Tue, 20 Feb 2024 10:16:51 +0100 Subject: [PATCH 2/4] Remove dead code --- api/v1/tile.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/api/v1/tile.py b/api/v1/tile.py index bc6fd8c..3ed8cfc 100644 --- a/api/v1/tile.py +++ b/api/v1/tile.py @@ -64,15 +64,6 @@ async def get_tile( return Response(content, headers=headers, media_type='application/vnd.mapbox-vector-tile') -def _mvt_rescale(x: float, y: float, x_min: float, y_min: float, x_span: float, y_span: float) -> tuple[int, int]: - x_mvt, y_mvt = MVT_TRANSFORMER.transform(np.array(x), np.array(y)) - - # subtract minimum boundary and scale to MVT extent - x_scaled = np.rint((x_mvt - x_min) / x_span * MVT_EXTENT).astype(int) - y_scaled = np.rint((y_mvt - y_min) / y_span * MVT_EXTENT).astype(int) - return x_scaled, y_scaled - - def _mvt_encode(bbox: BBox, layers: Sequence[dict]) -> bytes: with start_span(description='Transforming MVT geometry'): bbox_coords = np.asarray((get_coordinates(bbox.p1)[0], get_coordinates(bbox.p2)[0])) From 7c22b437d76cfcd8a6effb056eb85fe010bcfe2e Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Tue, 20 Feb 2024 10:17:44 +0100 Subject: [PATCH 3/4] Version bump --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index 7d2e7dd..9439c04 100644 --- a/config.py +++ b/config.py @@ -12,7 +12,7 @@ from sentry_sdk.integrations.pymongo import PyMongoIntegration NAME = 'openaedmap-backend' -VERSION = '2.7.4' +VERSION = '2.7.5' CREATED_BY = f'{NAME} {VERSION}' WEBSITE = 'https://openaedmap.org' From 1b95988e9cde6bb0a2cc12cf6facd1e0da04db33 Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Tue, 20 Feb 2024 10:27:56 +0100 Subject: [PATCH 4/4] More direct shapely operations, Bug fixes --- api/v1/node.py | 14 ++++++++------ api/v1/tile.py | 31 ++++++++++++++++--------------- states/aed_state.py | 8 +++++--- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/api/v1/node.py b/api/v1/node.py index 2738658..a28feaf 100644 --- a/api/v1/node.py +++ b/api/v1/node.py @@ -4,7 +4,7 @@ from fastapi import APIRouter, HTTPException from pytz import timezone -from shapely import Point +from shapely import get_coordinates from tzfpy import get_tz from middlewares.cache_middleware import configure_cache @@ -17,8 +17,8 @@ photo_id_re = re.compile(r'view/(?P\S+)\.') -def _get_timezone(position: Point) -> tuple[str | None, str | None]: - timezone_name: str | None = get_tz(position.x, position.y) +def _get_timezone(x: float, y: float) -> tuple[str | None, str | None]: + timezone_name: str | None = get_tz(x, y) timezone_offset = None if timezone_name: @@ -78,9 +78,11 @@ async def get_node(node_id: int): if aed is None: raise HTTPException(404, f'Node {node_id} not found') + x, y = get_coordinates(aed.position)[0] + photo_dict = await _get_image_data(aed.tags) - timezone_name, timezone_offset = _get_timezone(aed.position) + timezone_name, timezone_offset = _get_timezone(x, y) timezone_dict = { '@timezone_name': timezone_name, '@timezone_offset': timezone_offset, @@ -97,8 +99,8 @@ async def get_node(node_id: int): **timezone_dict, 'type': 'node', 'id': aed.id, - 'lat': aed.position.y, - 'lon': aed.position.x, + 'lat': y, + 'lon': x, 'tags': aed.tags, 'version': aed.version, } diff --git a/api/v1/tile.py b/api/v1/tile.py index 3ed8cfc..66d7c87 100644 --- a/api/v1/tile.py +++ b/api/v1/tile.py @@ -66,10 +66,6 @@ async def get_tile( def _mvt_encode(bbox: BBox, layers: Sequence[dict]) -> bytes: with start_span(description='Transforming MVT geometry'): - bbox_coords = np.asarray((get_coordinates(bbox.p1)[0], get_coordinates(bbox.p2)[0])) - bbox_coords = np.asarray(MVT_TRANSFORMER.transform(bbox_coords[:, 0], bbox_coords[:, 1])).T - span = bbox_coords[1] - bbox_coords[0] - coords_range = [] coords = [] @@ -80,17 +76,22 @@ def _mvt_encode(bbox: BBox, layers: Sequence[dict]) -> bytes: coords_range.append((coords_len, coords_len + len(feature_coords))) coords.extend(feature_coords) - coords = np.asarray(coords) - coords = np.asarray(MVT_TRANSFORMER.transform(coords[:, 0], coords[:, 1])).T - coords = np.rint((coords - bbox_coords[0]) / span * MVT_EXTENT).astype(int) - - i = 0 - for layer in layers: - for feature in layer['features']: - feature_coords_range = coords_range[i] - feature_coords = coords[feature_coords_range[0] : feature_coords_range[1]] - feature['geometry'] = set_coordinates(feature['geometry'], feature_coords) - i += 1 + if coords: + bbox_coords = np.asarray((get_coordinates(bbox.p1)[0], get_coordinates(bbox.p2)[0])) + bbox_coords = np.asarray(MVT_TRANSFORMER.transform(bbox_coords[:, 0], bbox_coords[:, 1])).T + span = bbox_coords[1] - bbox_coords[0] + + coords = np.asarray(coords) + coords = np.asarray(MVT_TRANSFORMER.transform(coords[:, 0], coords[:, 1])).T + coords = np.rint((coords - bbox_coords[0]) / span * MVT_EXTENT).astype(int) + + i = 0 + for layer in layers: + for feature in layer['features']: + feature_coords_range = coords_range[i] + feature_coords = coords[feature_coords_range[0] : feature_coords_range[1]] + feature['geometry'] = set_coordinates(feature['geometry'], feature_coords) + i += 1 with start_span(description='Encoding MVT'): return mvt.encode( diff --git a/states/aed_state.py b/states/aed_state.py index 7c5c780..6313061 100644 --- a/states/aed_state.py +++ b/states/aed_state.py @@ -8,7 +8,7 @@ from cachetools import TTLCache from pymongo import DeleteOne, ReplaceOne, UpdateOne from sentry_sdk import start_span, start_transaction, trace -from shapely import Point, points +from shapely import Point, get_coordinates, points from shapely.geometry import mapping from shapely.geometry.base import BaseGeometry from sklearn.cluster import Birch @@ -239,9 +239,11 @@ async def get_aed_by_id(aed_id: int) -> AED | None: async def get_all_aeds(filter: dict | None = None) -> Sequence[AED]: cursor = AED_COLLECTION.find(filter, projection={'_id': False}) docs = await cursor.to_list(None) + if not docs: + return () + coords = tuple(doc['position']['coordinates'] for doc in docs) positions = points(coords) - result = [None] * len(docs) for i, doc, position in zip(range(len(docs)), docs, positions, strict=True): @@ -262,7 +264,7 @@ async def get_aeds_within_geom(cls, geometry: BaseGeometry, group_eps: float | N if len(aeds) <= 1 or group_eps is None: return aeds - positions = tuple((aed.position.x, aed.position.y) for aed in aeds) + positions = tuple(get_coordinates(aed.position)[0] for aed in aeds) # deterministic sampling max_fit_samples = 7000