Skip to content

openavmkit.utilities.openstreetmap

OpenStreetMapService

OpenStreetMapService(settings=None)

Service for retrieving and processing data from OpenStreetMap.

Attributes:

Name Type Description
settings dict

Settings dictionary

features dict

Dictionary containing internal features that have been loaded

Initialize the OpenStreetMap service.

Parameters:

Name Type Description Default
settings dict

Configuration settings for the service

None
Source code in openavmkit/utilities/openstreetmap.py
24
25
26
27
28
29
30
31
32
33
def __init__(self, settings: dict = None):
    """Initialize the OpenStreetMap service.

    Parameters
    ----------
    settings : dict
        Configuration settings for the service
    """
    self.settings = settings or {}
    self.features = {}

calculate_distances

calculate_distances(gdf, features, feature_type)

Calculate distances to features, both aggregate and specific top N features.

Parameters:

Name Type Description Default
gdf GeoDataFrame

Parcel GeoDataFrame

required
features GeoDataFrame

Features GeoDataFrame

required
feature_type str

Type of feature (e.g., 'water', 'park')

required

Returns:

Type Description
DataFrame

DataFrame with distances

Source code in openavmkit/utilities/openstreetmap.py
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def calculate_distances(
    self, gdf: gpd.GeoDataFrame, features: gpd.GeoDataFrame, feature_type: str
) -> pd.DataFrame:
    """Calculate distances to features, both aggregate and specific top N features.

    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        Parcel GeoDataFrame
    features : gpd.GeoDataFrame
        Features GeoDataFrame
    feature_type : str
        Type of feature (e.g., 'water', 'park')

    Returns
    -------
    pd.DataFrame
        DataFrame with distances
    """

    # check if we have already cached this data, AND the settings are the same
    # construct a unique signature:
    signature = {"feature_type": feature_type, "features": hash(features.to_json())}
    if check_cache(
        f"osm/{feature_type}_distances", signature=signature, filetype="df"
    ):
        print("----> using cached distances")
        # if so return the cached version
        return read_cache(f"osm/{feature_type}_distances", "df")

    # Project to UTM for accurate distance calculation
    utm_crs = self._get_utm_crs(gdf.total_bounds)
    gdf_proj = gdf.to_crs(utm_crs)
    features_proj = features.to_crs(utm_crs)

    # Initialize dictionary to store all distance calculations
    distance_data = {}

    # Calculate aggregate distance (distance to nearest feature of any type)
    distance_data[f"dist_to_{feature_type}_any"] = gdf_proj.geometry.apply(
        lambda g: features_proj.geometry.distance(g).min()
    )

    # Calculate distances to top N features if available
    if f"{feature_type}_top" in self.features:
        top_features = self.features[f"{feature_type}_top"]
        for _, feature in top_features.iterrows():
            feature_name = feature["name"]
            feature_geom = feature.geometry
            feature_proj = gpd.GeoSeries([feature_geom]).to_crs(utm_crs)[0]

            distance_data[f"dist_to_{feature_type}_{feature_name}"] = (
                gdf_proj.geometry.apply(lambda g: feature_proj.distance(g))
            )

    # write to cache so we can skip on next run
    write_cache(f"osm/{feature_type}_distances", signature, distance_data, "df")

    # Create DataFrame from all collected distances at once
    return pd.DataFrame(distance_data, index=gdf.index)

calculate_elevation_stats

calculate_elevation_stats(gdf, elevation_data, lon_lat_ranges)

Calculate elevation statistics for each parcel.

Parameters:

Name Type Description Default
gdf GeoDataFrame

Parcel GeoDataFrame

required
elevation_data ndarray

Elevation data as a 2D array

required
lon_lat_ranges Tuple[np.ndarray, np.ndarray])

Longitude and latitude ranges

required

Returns:

Type Description
DataFrame

DataFrame containing elevation statistics

Source code in openavmkit/utilities/openstreetmap.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def calculate_elevation_stats(
    self,
    gdf: gpd.GeoDataFrame,
    elevation_data: np.ndarray,
    lon_lat_ranges: Tuple[np.ndarray, np.ndarray],
) -> pd.DataFrame:
    """Calculate elevation statistics for each parcel.

    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        Parcel GeoDataFrame
    elevation_data : np.ndarray
        Elevation data as a 2D array
    lon_lat_ranges : Tuple[np.ndarray, np.ndarray])
        Longitude and latitude ranges

    Returns
    -------
    pd.DataFrame
        DataFrame containing elevation statistics
    """

    lon_range, lat_range = lon_lat_ranges

    # Initialize arrays for elevation statistics
    avg_elevation = np.full(len(gdf), np.nan)
    avg_slope = np.full(len(gdf), np.nan)

    # For each parcel, calculate elevation statistics
    for i, geom in enumerate(gdf.geometry):
        # Get the bounds of the parcel
        minx, miny, maxx, maxy = geom.bounds

        # Find the indices in the elevation grid that correspond to the parcel bounds
        lon_indices = np.where((lon_range >= minx) & (lon_range <= maxx))[0]
        lat_indices = np.where((lat_range >= miny) & (lat_range <= maxy))[0]

        if len(lon_indices) == 0 or len(lat_indices) == 0:
            continue

        # Extract the elevation data for the parcel
        parcel_elevation = elevation_data[
            lat_indices[0] : lat_indices[-1] + 1,
            lon_indices[0] : lon_indices[-1] + 1,
        ]

        # Calculate average elevation
        avg_elevation[i] = np.mean(parcel_elevation)

        # Calculate slope (simplified)
        # In a real implementation, you would use a more sophisticated method
        if parcel_elevation.shape[0] > 1 and parcel_elevation.shape[1] > 1:
            # Calculate slope in x and y directions
            slope_x = np.gradient(parcel_elevation, axis=1)
            slope_y = np.gradient(parcel_elevation, axis=0)

            # Calculate average slope
            avg_slope[i] = np.mean(np.sqrt(slope_x**2 + slope_y**2))

    # Create a DataFrame with the elevation statistics
    elevation_stats = pd.DataFrame(
        {"avg_elevation": avg_elevation, "avg_slope": avg_slope}, index=gdf.index
    )

    return elevation_stats

enrich_parcels

enrich_parcels(gdf, settings)

Get OpenStreetMap features and prepare them for spatial joins. Returns a dictionary of feature dataframes for use by data.py's spatial join logic.

Parameters:

Name Type Description Default
gdf GeoDataFrame

Parcel GeoDataFrame (used for bbox)

required
settings dict

Settings for enrichment

required

Returns:

Type Description
dict[str, GeoDataFrame]

Dictionary of feature dataframes

Source code in openavmkit/utilities/openstreetmap.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
def enrich_parcels(
    self,
    gdf: gpd.GeoDataFrame,
    settings: Dict
) -> Dict[str, gpd.GeoDataFrame]:
    """Get OpenStreetMap features and prepare them for spatial joins. Returns a
    dictionary of feature dataframes for use by data.py's spatial join logic.

    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        Parcel GeoDataFrame (used for bbox)
    settings : dict
        Settings for enrichment

    Returns
    -------
    dict[str, gpd.GeoDataFrame]
        Dictionary of feature dataframes
    """
    # Get the bounding box of the GeoDataFrame
    bbox = gdf.total_bounds

    # Dictionary to store all dataframes
    dataframes = {}

    # Process each feature type based on settings
    if settings.get("water_bodies", {}).get("enabled", False):
        water_bodies = self.get_features(bbox, "water_bodies", settings["water_bodies"])
        if not water_bodies.empty:
            # Store both the main and top features in dataframes
            dataframes["water_bodies"] = self.features["water_bodies"]
            dataframes["water_bodies_top"] = self.features["water_bodies_top"]

    if settings.get("transportation", {}).get("enabled", False):
        transportation = self.get_features(bbox, "transportation", settings["transportation"])
        if not transportation.empty:
            dataframes["transportation"] = self.features["transportation"]
            dataframes["transportation_top"] = self.features["transportation_top"]

    if settings.get("educational", {}).get("enabled", False):
        institutions = self.get_features(bbox, "educational", settings["educational"])
        if not institutions.empty:
            dataframes["educational"] = self.features["educational"]
            dataframes["educational_top"] = self.features["educational_top"]

    if settings.get("parks", {}).get("enabled", False):
        parks = self.get_features(bbox, "parks", settings["parks"])
        if not parks.empty:
            dataframes["parks"] = self.features["parks"]
            dataframes["parks_top"] = self.features["parks_top"]

    if settings.get("golf_courses", {}).get("enabled", False):
        golf_courses = self.get_features(bbox, "golf_courses", settings["golf_courses"])
        if not golf_courses.empty:
            dataframes["golf_courses"] = self.features["golf_courses"]
            dataframes["golf_courses_top"] = self.features["golf_courses_top"]

    return dataframes

get_elevation_data

get_elevation_data(bbox, resolution=30)

Get digital elevation model (DEM) data from USGS.

Parameters:

Name Type Description Default
bbox Tuple[float, float, float, float]):

Bounding box (min_lon, min_lat, max_lon, max_lat)

required
resolution int

Resolution in meters (default: 30m)

30

Returns:

Type Description
ndarray

Elevation data as a 2D array

Source code in openavmkit/utilities/openstreetmap.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def get_elevation_data(
    self, bbox: Tuple[float, float, float, float], resolution: int = 30
) -> np.ndarray:
    """Get digital elevation model (DEM) data from USGS.

    Parameters
    ----------
    bbox : Tuple[float, float, float, float]):
        Bounding box (min_lon, min_lat, max_lon, max_lat)
    resolution : int
        Resolution in meters (default: 30m)

    Returns
    -------
    np.ndarray
        Elevation data as a 2D array
    """
    # This is a placeholder. In a real implementation, you would use the USGS API
    # or a library like elevation to download DEM data
    # For now, we'll return a dummy array
    print("DEM data retrieval not implemented yet. Using dummy data.")

    # Create a dummy elevation array
    # In a real implementation, this would be replaced with actual DEM data
    lat_range = np.linspace(bbox[1], bbox[3], 100)
    lon_range = np.linspace(bbox[0], bbox[2], 100)
    lon_grid, lat_grid = np.meshgrid(lon_range, lat_range)

    # Create a simple elevation model (for demonstration)
    elevation = 100 + 50 * np.sin(lon_grid * 10) + 50 * np.cos(lat_grid * 10)

    return elevation, (lon_range, lat_range)

init_service_openstreetmap

init_service_openstreetmap(settings=None)

Initialize an OpenStreetMap service with the provided settings.

Parameters:

Name Type Description Default
settings dict

Configuration settings for the service

None

Returns:

Type Description
OpenStreetMapService

Initialized OpenStreetMap service

Source code in openavmkit/utilities/openstreetmap.py
416
417
418
419
420
421
422
423
424
425
426
427
428
429
def init_service_openstreetmap(settings: Dict = None) -> OpenStreetMapService:
    """Initialize an OpenStreetMap service with the provided settings.

    Parameters
    ----------
    settings : dict
        Configuration settings for the service

    Returns
    -------
    OpenStreetMapService
        Initialized OpenStreetMap service
    """
    return OpenStreetMapService(settings)