`openavmkit.utilities.openstreetmap`

OpenStreetMapService

OpenStreetMapService(settings=None)

Service for retrieving and processing data from OpenStreetMap.

Attributes:

Name	Type	Description
`settings`	`dict`	Settings dictionary
`features`	`dict`	Dictionary containing internal features that have been loaded

Initialize the OpenStreetMap service.

Parameters:

Name	Type	Description	Default
`settings`	`dict`	Configuration settings for the service	`None`

Source code in openavmkit/utilities/openstreetmap.py

def __init__(self, settings: dict = None):
    """Initialize the OpenStreetMap service.

    Parameters
    ----------
    settings : dict
        Configuration settings for the service
    """
    self.settings = settings or {}
    self.features = {}

calculate_distances

calculate_distances(gdf, features, feature_type)

Calculate distances to features, both aggregate and specific top N features.

Parameters:

Name	Type	Description	Default
`gdf`	`GeoDataFrame`	Parcel GeoDataFrame	required
`features`	`GeoDataFrame`	Features GeoDataFrame	required
`feature_type`	`str`	Type of feature (e.g., 'water', 'park')	required

Returns:

Type	Description
`DataFrame`	DataFrame with distances

Source code in openavmkit/utilities/openstreetmap.py

def calculate_distances(
    self, gdf: gpd.GeoDataFrame, features: gpd.GeoDataFrame, feature_type: str
) -> pd.DataFrame:
    """Calculate distances to features, both aggregate and specific top N features.

    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        Parcel GeoDataFrame
    features : gpd.GeoDataFrame
        Features GeoDataFrame
    feature_type : str
        Type of feature (e.g., 'water', 'park')

    Returns
    -------
    pd.DataFrame
        DataFrame with distances
    """

    # check if we have already cached this data, AND the settings are the same
    # construct a unique signature:
    signature = {"feature_type": feature_type, "features": hash(features.to_json())}
    if check_cache(
        f"osm/{feature_type}_distances", signature=signature, filetype="df"
    ):
        print("----> using cached distances")
        # if so return the cached version
        return read_cache(f"osm/{feature_type}_distances", "df")

    # Project to UTM for accurate distance calculation
    utm_crs = self._get_utm_crs(gdf.total_bounds)
    gdf_proj = gdf.to_crs(utm_crs)
    features_proj = features.to_crs(utm_crs)

    # Initialize dictionary to store all distance calculations
    distance_data = {}

    # Calculate aggregate distance (distance to nearest feature of any type)
    distance_data[f"dist_to_{feature_type}_any"] = gdf_proj.geometry.apply(
        lambda g: features_proj.geometry.distance(g).min()
    )

    # Calculate distances to top N features if available
    if f"{feature_type}_top" in self.features:
        top_features = self.features[f"{feature_type}_top"]
        for _, feature in top_features.iterrows():
            feature_name = feature["name"]
            feature_geom = feature.geometry
            feature_proj = gpd.GeoSeries([feature_geom]).to_crs(utm_crs)[0]

            distance_data[f"dist_to_{feature_type}_{feature_name}"] = (
                gdf_proj.geometry.apply(lambda g: feature_proj.distance(g))
            )

    # write to cache so we can skip on next run
    write_cache(f"osm/{feature_type}_distances", signature, distance_data, "df")

    # Create DataFrame from all collected distances at once
    return pd.DataFrame(distance_data, index=gdf.index)

calculate_elevation_stats

calculate_elevation_stats(gdf, elevation_data, lon_lat_ranges)

Calculate elevation statistics for each parcel.

Parameters:

Name	Type	Description	Default
`gdf`	`GeoDataFrame`	Parcel GeoDataFrame	required
`elevation_data`	`ndarray`	Elevation data as a 2D array	required
`lon_lat_ranges`	`Tuple[np.ndarray, np.ndarray])`	Longitude and latitude ranges	required

Returns:

Type	Description
`DataFrame`	DataFrame containing elevation statistics

Source code in openavmkit/utilities/openstreetmap.py

def calculate_elevation_stats(
    self,
    gdf: gpd.GeoDataFrame,
    elevation_data: np.ndarray,
    lon_lat_ranges: Tuple[np.ndarray, np.ndarray],
) -> pd.DataFrame:
    """Calculate elevation statistics for each parcel.

    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        Parcel GeoDataFrame
    elevation_data : np.ndarray
        Elevation data as a 2D array
    lon_lat_ranges : Tuple[np.ndarray, np.ndarray])
        Longitude and latitude ranges

    Returns
    -------
    pd.DataFrame
        DataFrame containing elevation statistics
    """

    lon_range, lat_range = lon_lat_ranges

    # Initialize arrays for elevation statistics
    avg_elevation = np.full(len(gdf), np.nan)
    avg_slope = np.full(len(gdf), np.nan)

    # For each parcel, calculate elevation statistics
    for i, geom in enumerate(gdf.geometry):
        # Get the bounds of the parcel
        minx, miny, maxx, maxy = geom.bounds

        # Find the indices in the elevation grid that correspond to the parcel bounds
        lon_indices = np.where((lon_range >= minx) & (lon_range <= maxx))[0]
        lat_indices = np.where((lat_range >= miny) & (lat_range <= maxy))[0]

        if len(lon_indices) == 0 or len(lat_indices) == 0:
            continue

        # Extract the elevation data for the parcel
        parcel_elevation = elevation_data[
            lat_indices[0] : lat_indices[-1] + 1,
            lon_indices[0] : lon_indices[-1] + 1,
        ]

        # Calculate average elevation
        avg_elevation[i] = np.mean(parcel_elevation)

        # Calculate slope (simplified)
        # In a real implementation, you would use a more sophisticated method
        if parcel_elevation.shape[0] > 1 and parcel_elevation.shape[1] > 1:
            # Calculate slope in x and y directions
            slope_x = np.gradient(parcel_elevation, axis=1)
            slope_y = np.gradient(parcel_elevation, axis=0)

            # Calculate average slope
            avg_slope[i] = np.mean(np.sqrt(slope_x**2 + slope_y**2))

    # Create a DataFrame with the elevation statistics
    elevation_stats = pd.DataFrame(
        {"avg_elevation": avg_elevation, "avg_slope": avg_slope}, index=gdf.index
    )

    return elevation_stats

enrich_parcels

enrich_parcels(gdf, settings)

Get OpenStreetMap features and prepare them for spatial joins. Returns a dictionary of feature dataframes for use by data.py's spatial join logic.

Parameters:

Name	Type	Description	Default
`gdf`	`GeoDataFrame`	Parcel GeoDataFrame (used for bbox)	required
`settings`	`dict`	Settings for enrichment	required

Returns:

Type	Description
`dict[str, GeoDataFrame]`	Dictionary of feature dataframes

Source code in openavmkit/utilities/openstreetmap.py

def enrich_parcels(
    self,
    gdf: gpd.GeoDataFrame,
    settings: Dict
) -> Dict[str, gpd.GeoDataFrame]:
    """Get OpenStreetMap features and prepare them for spatial joins. Returns a
    dictionary of feature dataframes for use by data.py's spatial join logic.

    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        Parcel GeoDataFrame (used for bbox)
    settings : dict
        Settings for enrichment

    Returns
    -------
    dict[str, gpd.GeoDataFrame]
        Dictionary of feature dataframes
    """
    # Get the bounding box of the GeoDataFrame
    bbox = gdf.total_bounds

    # Dictionary to store all dataframes
    dataframes = {}

    # Process each feature type based on settings
    if settings.get("water_bodies", {}).get("enabled", False):
        water_bodies = self.get_features(bbox, "water_bodies", settings["water_bodies"])
        if not water_bodies.empty:
            # Store both the main and top features in dataframes
            dataframes["water_bodies"] = self.features["water_bodies"]
            dataframes["water_bodies_top"] = self.features["water_bodies_top"]

    if settings.get("transportation", {}).get("enabled", False):
        transportation = self.get_features(bbox, "transportation", settings["transportation"])
        if not transportation.empty:
            dataframes["transportation"] = self.features["transportation"]
            dataframes["transportation_top"] = self.features["transportation_top"]

    if settings.get("educational", {}).get("enabled", False):
        institutions = self.get_features(bbox, "educational", settings["educational"])
        if not institutions.empty:
            dataframes["educational"] = self.features["educational"]
            dataframes["educational_top"] = self.features["educational_top"]

    if settings.get("parks", {}).get("enabled", False):
        parks = self.get_features(bbox, "parks", settings["parks"])
        if not parks.empty:
            dataframes["parks"] = self.features["parks"]
            dataframes["parks_top"] = self.features["parks_top"]

    if settings.get("golf_courses", {}).get("enabled", False):
        golf_courses = self.get_features(bbox, "golf_courses", settings["golf_courses"])
        if not golf_courses.empty:
            dataframes["golf_courses"] = self.features["golf_courses"]
            dataframes["golf_courses_top"] = self.features["golf_courses_top"]

    return dataframes

get_elevation_data

get_elevation_data(bbox, resolution=30)

Get digital elevation model (DEM) data from USGS.

Parameters:

Name	Type	Description	Default
`bbox`	`Tuple[float, float, float, float]):`	Bounding box (min_lon, min_lat, max_lon, max_lat)	required
`resolution`	`int`	Resolution in meters (default: 30m)	`30`

Returns:

Type	Description
`ndarray`	Elevation data as a 2D array

Source code in openavmkit/utilities/openstreetmap.py

def get_elevation_data(
    self, bbox: Tuple[float, float, float, float], resolution: int = 30
) -> np.ndarray:
    """Get digital elevation model (DEM) data from USGS.

    Parameters
    ----------
    bbox : Tuple[float, float, float, float]):
        Bounding box (min_lon, min_lat, max_lon, max_lat)
    resolution : int
        Resolution in meters (default: 30m)

    Returns
    -------
    np.ndarray
        Elevation data as a 2D array
    """
    # This is a placeholder. In a real implementation, you would use the USGS API
    # or a library like elevation to download DEM data
    # For now, we'll return a dummy array
    print("DEM data retrieval not implemented yet. Using dummy data.")

    # Create a dummy elevation array
    # In a real implementation, this would be replaced with actual DEM data
    lat_range = np.linspace(bbox[1], bbox[3], 100)
    lon_range = np.linspace(bbox[0], bbox[2], 100)
    lon_grid, lat_grid = np.meshgrid(lon_range, lat_range)

    # Create a simple elevation model (for demonstration)
    elevation = 100 + 50 * np.sin(lon_grid * 10) + 50 * np.cos(lat_grid * 10)

    return elevation, (lon_range, lat_range)

init_service_openstreetmap

init_service_openstreetmap(settings=None)

Initialize an OpenStreetMap service with the provided settings.

Parameters:

Name	Type	Description	Default
`settings`	`dict`	Configuration settings for the service	`None`

Returns:

Type	Description
`OpenStreetMapService`	Initialized OpenStreetMap service

Source code in openavmkit/utilities/openstreetmap.py

def init_service_openstreetmap(settings: Dict = None) -> OpenStreetMapService:
    """Initialize an OpenStreetMap service with the provided settings.

    Parameters
    ----------
    settings : dict
        Configuration settings for the service

    Returns
    -------
    OpenStreetMapService
        Initialized OpenStreetMap service
    """
    return OpenStreetMapService(settings)