Skip to content

openavmkit.utilities.geometry

clean_geometry

clean_geometry(gdf, ensure_polygon=True, target_crs=None)

Preprocess a GeoDataFrame by diagnosing and fixing common geometry issues.

Parameters:

Name Type Description Default
gdf GeoDataFrame

The input GeoDataFrame with geometries.

required
ensure_polygon bool

If True, removes non-polygon geometries.

True
target_crs str | int

If specified, ensures the GeoDataFrame is in this CRS.

None

Returns:

Type Description
GeoDataFrame

A cleaned and fixed GeoDataFrame.

Source code in openavmkit/utilities/geometry.py
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
def clean_geometry(gdf, ensure_polygon=True, target_crs=None):
    """Preprocess a GeoDataFrame by diagnosing and fixing common geometry issues.

    Parameters
    ----------
    gdf : GeoDataFrame
        The input GeoDataFrame with geometries.
    ensure_polygon : bool
        If True, removes non-polygon geometries.
    target_crs : str | int, optional
        If specified, ensures the GeoDataFrame is in this CRS.

    Returns
    -------
    gpd.GeoDataFrame
        A cleaned and fixed GeoDataFrame.
    """

    # Drop null geometries
    warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)
    gdf = gdf[gdf.geometry.notna()]
    warnings.filterwarnings("default", "GeoSeries.notna", UserWarning)

    # Fix invalid geometries using buffer(0)
    gdf["geometry"] = gdf["geometry"].apply(
        lambda geom: geom.buffer(0) if not geom.is_valid else geom
    )

    # Remove empty geometries
    gdf = gdf[~gdf.is_empty]

    # Ensure all polygons are properly closed (for Polygons and MultiPolygons)
    def close_polygon(geom):
        if isinstance(geom, Polygon):
            if not geom.exterior.is_closed:
                return Polygon(list(geom.exterior.coords) + [geom.exterior.coords[0]])
        return geom

    gdf["geometry"] = gdf["geometry"].apply(close_polygon)

    # Remove geometries with fewer than 4 points (invalid for polygons)
    def valid_polygon(geom):
        if isinstance(geom, Polygon):
            return len(geom.exterior.coords) >= 4
        elif isinstance(geom, MultiPolygon):
            for poly in list(geom.geoms):
                if len(poly.exterior.coords) < 4:
                    return False
            return True
        return False

    gdf = gdf[gdf.geometry.apply(valid_polygon)]

    # Remove non-polygon geometries if ensure_polygon is True
    if ensure_polygon:
        gdf = gdf[gdf.geometry.type.isin(["Polygon", "MultiPolygon"])]

    # Ensure the CRS is consistent
    if target_crs:
        gdf = gdf.to_crs(target_crs)

    return gdf

create_geo_circle

create_geo_circle(lat, lon, crs, radius_km, num_points=100)

Creates a GeoDataFrame containing a circle centered at the specified latitude and longitude.

Parameters:

Name Type Description Default
lat float

Latitude

required
lon float

Longitude

required
crs CRS

Coordinate Reference System

required
radius_km float

Radius of the circle, in kilometers

required
num_points int

Number of points used to approximate the circle

100

Returns:

Type Description
GeoDataFrame

A GeoDataFrame containing the circle

Source code in openavmkit/utilities/geometry.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
def create_geo_circle(lat, lon, crs, radius_km, num_points=100):
    """Creates a GeoDataFrame containing a circle centered at the specified latitude and
    longitude.

    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    crs : CRS
        Coordinate Reference System
    radius_km : float
        Radius of the circle, in kilometers
    num_points : int
        Number of points used to approximate the circle

    Returns
    -------
    gpd.GeoDataFrame
        A GeoDataFrame containing the circle
    """
    # Create a list of points around the circle
    points = []
    for i in range(num_points):
        angle = 2 * 3.14159 * i / num_points
        x = radius_km * math.cos(angle)
        y = radius_km * math.sin(angle)
        pt_lat, pt_lon = offset_coordinate_km(lat, lon, x, y)
        points.append(shapely.Point(pt_lon, pt_lat))

    points.append(points[0])
    polygon = shapely.Polygon(points)

    # Create a GeoDataFrame from the points
    gdf = gpd.GeoDataFrame(geometry=[polygon], crs=crs)
    return gdf

create_geo_rect

create_geo_rect(lat, lon, crs, width_km, height_km, anchor_point='center')

Creates a GeoDataFrame containing a rectangle centered at the specified latitude and longitude.

Parameters:

Name Type Description Default
lat float

Latitude

required
lon float

Longitude

required
crs CRS

Coordinate Reference System

required
width_km float

Width in kilometers

required
height_km float

Height in kilometers

required
anchor_point str

Anchor point of the rectangle ("center", "nw")

'center'

Returns:

Type Description
GeoDataFrame

A GeoDataFrame containing the rectangle.

Source code in openavmkit/utilities/geometry.py
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
def create_geo_rect(lat, lon, crs, width_km, height_km, anchor_point="center"):
    """Creates a GeoDataFrame containing a rectangle centered at the specified latitude
    and longitude.

    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    crs : CRS
        Coordinate Reference System
    width_km : float
        Width in kilometers
    height_km : float
        Height in kilometers
    anchor_point : str, optional
        Anchor point of the rectangle ("center", "nw")

    Returns
    -------
    gpd.GeoDataFrame
        A GeoDataFrame containing the rectangle.
    """

    polygon = create_geo_rect_shape_km(
        lat, lon, width_km, height_km, anchor_point=anchor_point
    )

    # Create a GeoDataFrame
    gdf = gpd.GeoDataFrame(geometry=[polygon], crs=crs)

    return gdf

create_geo_rect_shape_deg

create_geo_rect_shape_deg(lat, lon, width_deg, height_deg, anchor_point='center')

Creates a GeoDataFrame containing a rectangle centered at the specified latitude and longitude.

Parameters:

Name Type Description Default
lat float

Latitude

required
lon float

Longitude

required
height_deg float

The height of the rectangle in degrees

required
width_deg float

The height of the rectangle in degrees

required
anchor_point str

The anchor point of the rectangle ("center" or "nw")

'center'

Returns:

Type Description
GeoDataFrame

A GeoDataFrame containing the rectangle.

Source code in openavmkit/utilities/geometry.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def create_geo_rect_shape_deg(lat, lon, width_deg, height_deg, anchor_point="center"):
    """Creates a GeoDataFrame containing a rectangle centered at the specified latitude
    and longitude.

    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    height_deg : float
        The height of the rectangle in degrees
    width_deg : float
        The height of the rectangle in degrees
    anchor_point : str
        The anchor point of the rectangle ("center" or "nw")

    Returns
    -------
    gpd.GeoDataFrame
        A GeoDataFrame containing the rectangle.
    """

    if anchor_point == "center":
        off_nw_x = -width_deg / 2
        off_sw_x = -width_deg / 2

        off_ne_x = width_deg / 2
        off_se_x = width_deg / 2

        off_nw_y = height_deg / 2
        off_ne_y = height_deg / 2

        off_se_y = -height_deg / 2
        off_sw_y = -height_deg / 2
    elif anchor_point == "nw":
        off_nw_x = 0
        off_sw_x = 0

        off_ne_x = width_deg
        off_se_x = width_deg

        off_nw_y = 0
        off_ne_y = 0

        off_se_y = -height_deg
        off_sw_y = -height_deg
    else:
        raise ValueError("Invalid anchor point. Choose 'center' or 'nw'.")

    # Calculate the four corners of the rectangle
    nw_lat, nw_lon = lat + off_nw_y, lon + off_nw_x  # NW
    ne_lat, ne_lon = lat + off_ne_y, lon + off_ne_x  # NE
    se_lat, se_lon = lat + off_se_y, lon + off_se_x  # SE
    sw_lat, sw_lon = lat + off_sw_y, lon + off_sw_x  # SW

    # Order: NW → NE → SE → SW → NW (to close polygon)
    polygon_coords = [
        (nw_lon, nw_lat),
        (ne_lon, ne_lat),
        (se_lon, se_lat),
        (sw_lon, sw_lat),
        (nw_lon, nw_lat),
    ]

    # Create a Polygon
    polygon = Polygon(polygon_coords)
    return polygon

create_geo_rect_shape_km

create_geo_rect_shape_km(lat, lon, width_km, height_km, anchor_point='center')

Creates a GeoDataFrame containing a rectangle centered at the specified latitude and longitude.

Parameters:

Name Type Description Default
lat float

Latitude

required
lon float

Longitude

required
width_km float

Width of the rectangle in kilometers

required
height_km float

Height of the rectangle in kilometers

required
anchor_point str

Anchor point of the rectangle ("center", "now")

'center'

Returns:

Type Description
GeoDataFrame

A GeoDataFrame containing the rectangle.

Source code in openavmkit/utilities/geometry.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
def create_geo_rect_shape_km(lat, lon, width_km, height_km, anchor_point="center"):
    """Creates a GeoDataFrame containing a rectangle centered at the specified latitude
    and longitude.

    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    width_km : float
        Width of the rectangle in kilometers
    height_km : float
        Height of the rectangle in kilometers
    anchor_point : str
        Anchor point of the rectangle ("center", "now")

    Returns
    -------
    gpd.GeoDataFrame
        A GeoDataFrame containing the rectangle.
    """

    if anchor_point == "center":
        off_nw_x = -width_km / 2
        off_sw_x = -width_km / 2

        off_ne_x = width_km / 2
        off_se_x = width_km / 2

        off_nw_y = height_km / 2
        off_ne_y = height_km / 2

        off_se_y = -height_km / 2
        off_sw_y = -height_km / 2
    elif anchor_point == "nw":
        off_nw_x = 0
        off_sw_x = 0

        off_ne_x = width_km
        off_se_x = width_km

        off_nw_y = 0
        off_ne_y = 0

        off_se_y = -height_km
        off_sw_y = -height_km
    else:
        raise ValueError("Invalid anchor point. Choose 'center' or 'nw'.")

    # Calculate the four corners of the rectangle
    nw_lat, nw_lon = offset_coordinate_km(lat, lon, off_nw_y, off_nw_x)  # NW
    ne_lat, ne_lon = offset_coordinate_km(lat, lon, off_ne_y, off_ne_x)  # NE
    se_lat, se_lon = offset_coordinate_km(lat, lon, off_se_y, off_se_x)  # SE
    sw_lat, sw_lon = offset_coordinate_km(lat, lon, off_sw_y, off_sw_x)  # SW

    # Order: NW → NE → SE → SW → NW (to close polygon)
    polygon_coords = [
        (nw_lon, nw_lat),
        (ne_lon, ne_lat),
        (se_lon, se_lat),
        (sw_lon, sw_lat),
        (nw_lon, nw_lat),
    ]

    # Create a Polygon
    polygon = Polygon(polygon_coords)
    return polygon

detect_triangular_lots

detect_triangular_lots(geom, compactness_threshold=0.85, angle_tolerance=10, min_aspect=0.5, max_aspect=2.0)

Determine if a geometry is approximately triangular based on compactness, hull vertex angles, and bounding box aspect ratio.

This function evaluates a shape by:

  1. Computing the ratio of its area to its convex hull area (compactness).
  2. Checking that the convex hull has at most three non-nearly-180° angles within the specified tolerance, indicating a triangle-like hull.
  3. Verifying the bounding box aspect ratio is within [min_aspect, max_aspect].

Parameters:

Name Type Description Default
geom BaseGeometry

Input polygonal geometry to test (must have an 'area' and 'convex_hull').

required
compactness_threshold float

Minimum allowed ratio of geom.area to its convex_hull.area. Shapes with more concave boundaries (lower ratios) are rejected.

0.85
angle_tolerance float

Degrees of tolerance from 180° for hull vertex angles.

10
min_aspect float

Minimum width/height ratio of the geometry's bounding box.

0.5
max_aspect float

Maximum width/height ratio of the geometry's bounding box.

2.0

Returns:

Type Description
bool

True if geom passes all triangularity checks; False otherwise.

Source code in openavmkit/utilities/geometry.py
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
def detect_triangular_lots(
    geom, compactness_threshold=0.85, angle_tolerance=10, min_aspect=0.5, max_aspect=2.0
):
    """
    Determine if a geometry is approximately triangular based on compactness,
    hull vertex angles, and bounding box aspect ratio.

    This function evaluates a shape by:

    1. Computing the ratio of its area to its convex hull area (compactness).
    2. Checking that the convex hull has at most three non-nearly-180° angles within
       the specified tolerance, indicating a triangle-like hull.
    3. Verifying the bounding box aspect ratio is within [min_aspect, max_aspect].

    Parameters
    ----------
    geom : shapely.geometry.base.BaseGeometry
        Input polygonal geometry to test (must have an 'area' and 'convex_hull').
    compactness_threshold : float, default 0.85
        Minimum allowed ratio of geom.area to its convex_hull.area.
        Shapes with more concave boundaries (lower ratios) are rejected.
    angle_tolerance : float, default 10
        Degrees of tolerance from 180° for hull vertex angles.
    min_aspect : float, default 0.5
        Minimum width/height ratio of the geometry's bounding box.
    max_aspect : float, default 2.0
        Maximum width/height ratio of the geometry's bounding box.

    Returns
    -------
    bool
        True if geom passes all triangularity checks; False otherwise.

    """
    hull = geom.convex_hull
    area_ratio = geom.area / hull.area
    if area_ratio < compactness_threshold:
        return False

    # Check approximate triangular shape
    coords = list(hull.exterior.coords[:-1])
    edges = [
        LineString([coords[i], coords[(i + 1) % len(coords)]])
        for i in range(len(coords))
    ]

    # Calculate angles
    def edge_angle(edge1, edge2):
        vec1 = np.array(edge1.coords[1]) - np.array(edge1.coords[0])
        vec2 = np.array(edge2.coords[1]) - np.array(edge2.coords[0])
        angle = np.arctan2(np.cross(vec1, vec2), np.dot(vec1, vec2))
        return np.degrees(abs(angle))

    angles = [
        edge_angle(edges[i], edges[(i + 1) % len(edges)]) for i in range(len(edges))
    ]
    near_180 = sum(abs(180 - angle) < angle_tolerance for angle in angles)
    if len(edges) - near_180 > 3:
        return False

    # Check bounding box aspect ratio
    bounds = geom.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    aspect_ratio = width / height
    if not (min_aspect <= aspect_ratio <= max_aspect):
        return False

    return True

distance_km

distance_km(lat1, lon1, lat2, lon2)

Calculates the distance in kilometers between two latitude/longitude points. :param lat1: Latitude of the first point. :param lon1: Longitude of the first point. :param lat2: Latitude of the second point. :param lon2: Longitude of the second point. :return: Distance in kilometers.

Source code in openavmkit/utilities/geometry.py
232
233
234
235
236
237
238
239
240
241
242
def distance_km(lat1, lon1, lat2, lon2):
  """
  Calculates the distance in kilometers between two latitude/longitude points.
  :param lat1: Latitude of the first point.
  :param lon1: Longitude of the first point.
  :param lat2: Latitude of the second point.
  :param lon2: Longitude of the second point.
  :return: Distance in kilometers.
  """
  _, _, dist_m = geod.inv(lon1, lat1, lon2, lat2)
  return dist_m / 1000.0

ensure_geometries

ensure_geometries(df, geom_col='geometry', crs=None)

Parse a DataFrame whose geom_col may be:

  • Shapely geometries
  • WKT strings
  • WKB bytes/bytearray
  • Hex‐encoded WKB strings (with or without "0x" prefix)
  • numpy.bytes_ scalars, memoryviews, etc.

Parameters:

Name Type Description Default
df DataFrame

Input DataFrame

required
geom_col str

name of the geometry column

'geometry'
crs CRS

Coordinate Reference System

None

Returns:

Type Description
GeoDataFrame

a brand‑new GeoDataFrame with a clean geometry column.

Source code in openavmkit/utilities/geometry.py
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
def ensure_geometries(df, geom_col="geometry", crs=None):
    """Parse a DataFrame whose `geom_col` may be:

      - Shapely geometries
      - WKT strings
      - WKB bytes/bytearray
      - Hex‐encoded WKB strings (with or without "0x" prefix)
      - numpy.bytes_ scalars, memoryviews, etc.

    Parameters
    ----------
    df : pd.DataFrame
        Input DataFrame
    geom_col : str
        name of the geometry column
    crs : CRS, optional
        Coordinate Reference System

    Returns
    -------
    gpd.GeoDataFrame
        a brand‑new GeoDataFrame with a _clean_ geometry column.
    """
    # Copy into a plain DataFrame (drops any old GeoDataFrame metadata)
    data = pd.DataFrame(df).copy()

    def _parse(val):
        # 1) Nulls
        if val is None or (isinstance(val, float) and pd.isna(val)):
            return None
        # 2) Already Shapely?
        if isinstance(val, BaseGeometry):
            return val
        # 3) Geo‑interface dicts
        if hasattr(val, "__geo_interface__"):
            from shapely.geometry import shape

            return shape(val)
        # 4) Strings: try WKT first, then hex WKB
        if isinstance(val, str):
            s = val.strip()
            if s.lower().startswith("0x"):
                s = s[2:]
            try:
                return wkt.loads(val)
            except Exception:
                raw = binascii.unhexlify(s)
                return wkb.loads(raw)
        # 5) Bytes‐like
        if isinstance(val, (bytes, bytearray, memoryview)):
            raw = val.tobytes() if isinstance(val, memoryview) else val
            return wkb.loads(raw)
        # 6) numpy bytes_ or other numpy scalar
        if isinstance(val, np.generic):
            try:
                b = bytes(val)
                return wkb.loads(b)
            except Exception:
                pass
        # 7) Anything with tobytes()
        if hasattr(val, "tobytes"):
            try:
                raw = val.tobytes()
                return wkb.loads(raw)
            except Exception:
                pass

        raise TypeError(f"Cannot parse geometry of type {type(val)}")

    # Parse into a plain pandas Series of Shapely objects
    parsed = data[geom_col].apply(_parse)

    # Build a GeoSeries (so GeoPandas trusts it)
    geom_series = gpd.GeoSeries(parsed.values.tolist(), index=data.index, crs=crs)

    # Drop the old column and assemble
    df_clean = data.drop(columns=[geom_col])
    return gpd.GeoDataFrame(df_clean, geometry=geom_series, crs=crs)

geolocate_point_to_polygon

geolocate_point_to_polygon(gdf, df_in, lat_field, lon_field, parcel_id_field)

Assign each point (latitude/longitude) in a DataFrame to a containing polygon ID.

Converts input latitude/longitude columns into Point geometries, performs a spatial join against a GeoDataFrame of parcel polygons, and returns the original DataFrame augmented with the matching parcel identifier.

Parameters:

Name Type Description Default
gdf GeoDataFrame

GeoDataFrame of polygon geometries. Must include:

  • A geometry column of type Polygon or MultiPolygon.
  • The column named by parcel_id_field containing unique parcel IDs.
required
df_in DataFrame

Input DataFrame with at least the latitude and longitude columns. Any existing geometry column will be dropped.

required
lat_field str

Name of the column in df_in containing latitude values.

required
lon_field str

Name of the column in df_in containing longitude values.

required
parcel_id_field str

Name of the parcel ID column in gdf to attach to each point.

required

Returns:

Type Description
DataFrame

Copy of df_in with:

  • Any preexisting geometry column removed.
  • A new column parcel_id_field containing the ID of the polygon in gdf that contains each point. Rows with no containing polygon will have NaN.
Notes
  • Input DataFrame is temporarily converted to a GeoDataFrame with Point geometries; its CRS is set to match gdf.
  • Spatial join uses the 'within' predicate to ensure points fall inside parcels.
  • A temporary index column is used to preserve original row order.
Source code in openavmkit/utilities/geometry.py
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
def geolocate_point_to_polygon(
    gdf: gpd.GeoDataFrame,
    df_in: pd.DataFrame,
    lat_field: str,
    lon_field: str,
    parcel_id_field: str,
) -> pd.DataFrame:
    """
    Assign each point (latitude/longitude) in a DataFrame to a containing polygon ID.

    Converts input latitude/longitude columns into Point geometries, performs a
    spatial join against a GeoDataFrame of parcel polygons, and returns the
    original DataFrame augmented with the matching parcel identifier.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        GeoDataFrame of polygon geometries.  Must include:

        - A geometry column of type Polygon or MultiPolygon.
        - The column named by `parcel_id_field` containing unique parcel IDs.
    df_in : pandas.DataFrame
        Input DataFrame with at least the latitude and longitude columns.
        Any existing `geometry` column will be dropped.
    lat_field : str
        Name of the column in `df_in` containing latitude values.
    lon_field : str
        Name of the column in `df_in` containing longitude values.
    parcel_id_field : str
        Name of the parcel ID column in `gdf` to attach to each point.

    Returns
    -------
    pandas.DataFrame
        Copy of `df_in` with:

        - Any preexisting `geometry` column removed.
        - A new column `parcel_id_field` containing the ID of the polygon in `gdf`
          that contains each point.  Rows with no containing polygon will have NaN.

    Notes
    -----
    - Input DataFrame is temporarily converted to a GeoDataFrame with Point
      geometries; its CRS is set to match `gdf`.
    - Spatial join uses the 'within' predicate to ensure points fall inside
      parcels.
    - A temporary index column is used to preserve original row order.
    """
    # Make a local copy
    df = df_in.copy()

    if "geometry" in df.columns:
        warnings.warn(
            "You're doing a `lat_lon` merge of a DataFrame that also has a `geometry` column. The geometry column will be discarded."
        )

    # Drop fields that don't have lat/lon values
    df = df[df[lon_field].notna() & df[lat_field].notna()]

    # Strip old geometry if it exists and replace it with points corresponding to lat/lon fields
    df = df.drop(columns="geometry", errors="ignore")
    df["geometry"] = df.apply(
        lambda row: shapely.geometry.Point(row[lon_field], row[lat_field]), axis=1
    )
    df = gpd.GeoDataFrame(df, geometry="geometry")

    # Match the CRS
    df.set_crs(gdf.crs, inplace=True)

    # reset index and set a "temp_index" of numerical index values
    df.reset_index(drop=True)
    df["temp_index"] = df.index

    gdf_keys = gdf[[parcel_id_field, "geometry"]]

    # deduplicate:
    gdf_keys = gdf_keys.drop_duplicates(subset=[parcel_id_field])

    # perform the spatial join, each lat/lon matched to the parcel it's inside of
    gdf_joined = gpd.sjoin(df, gdf_keys, how="left", predicate="within")

    # now we have a DataFrame that matches each row in df_in to a `parcel_id_field` in gdf

    # merge parcel_id_field back onto df:
    df = df.merge(
        gdf_joined[[parcel_id_field, "temp_index"]], on="temp_index", how="left"
    )

    # drop the temporary index and the point geometry:
    df = df.drop(columns=["temp_index", "geometry"])

    # now df has the parcel_id_field for each lat/lon pair in gdf

    return df

get_crs

get_crs(gdf, projection_type)

Returns the appropriate CRS for a GeoDataFrame based on the specified projection type.

Parameters:

Name Type Description Default
gdf GeoDataFrame

Input GeoDataFrame.

required
projection_type str

Type of projection ('latlon', 'equal_area', 'equal_distance').

required

Returns:

Type Description
CRS

Appropriate CRS for the specified projection type.

Source code in openavmkit/utilities/geometry.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def get_crs(gdf, projection_type):
    """Returns the appropriate CRS for a GeoDataFrame based on the specified projection
    type.

    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        Input GeoDataFrame.
    projection_type : str
        Type of projection ('latlon', 'equal_area', 'equal_distance').

    Returns
    -------
    pyproj.CRS
        Appropriate CRS for the specified projection type.
    """
    # Ensure the GeoDataFrame is in EPSG:4326
    gdf = gdf.to_crs("EPSG:4326")

    # Calculate the centroid of the entire GeoDataFrame

    # supress user warning:
    warnings.filterwarnings("ignore", category=UserWarning)

    # get centroid:
    lon, lat = gdf.centroid.x.mean(), gdf.centroid.y.mean()

    return get_crs_from_lat_lon(lat, lon, projection_type)

get_crs_from_lat_lon

get_crs_from_lat_lon(lat, lon, projection_type, units='m')

Return a Coordinate Reference System (CRS) suitable for the requested projection type at the given latitude/longitude location.

Parameters:

Name Type Description Default
lat float

Latitude

required
lon float

Longitude

required
projection_type str

The desired projection type. Legal values are "latlon", "equal_area", "equal_distance", and "conformal"

required
units str

The desired units. Legal values are "ft" and "m"

'm'

Returns:

Type Description
CRS

The appropriately Coordinate Reference System

Notes

The following kind of CRS will be returned for each of the projection_type values:

  • 'latlon' : geographic (EPSG:4326)
  • 'equal_area' : local azimuthal equal-area (LAEA)
  • 'equal_distance': azimuthal equidistant (AEQD)
  • 'conformal' : local UTM zone (minimal distortion in shape/angle)
Source code in openavmkit/utilities/geometry.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def get_crs_from_lat_lon(
    lat: float, lon: float, projection_type: str, units: str = "m"
):
    """Return a Coordinate Reference System (CRS) suitable for the requested projection type at the given
    latitude/longitude location.

    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    projection_type : str
        The desired projection type. Legal values are "latlon", "equal_area", "equal_distance", and "conformal"
    units : str
        The desired units. Legal values are "ft" and "m"

    Returns
    -------
    CRS
        The appropriately Coordinate Reference System

    Notes
    -----
    The following kind of CRS will be returned for each of the ``projection_type`` values:

      - 'latlon'       : geographic (EPSG:4326)
      - 'equal_area'   : local azimuthal equal-area (LAEA)
      - 'equal_distance': azimuthal equidistant (AEQD)
      - 'conformal'    : local UTM zone (minimal distortion in shape/angle)
    """
    if projection_type == "latlon":
        return CRS.from_epsg(4326)

    elif projection_type == "equal_area":
        # Lambert Azimuthal Equal‐Area about your centroid
        return CRS.from_proj4(
            f"+proj=laea +lat_0={lat} +lon_0={lon} +datum=WGS84 +units={units}"
        )

    elif projection_type == "equal_distance":
        # Azimuthal Equidistant about your centroid
        return CRS.from_proj4(
            f"+proj=aeqd +lat_0={lat} +lon_0={lon} +datum=WGS84 +units={units}"
        )

    elif projection_type == "conformal":
        # Use the UTM zone for minimal scale error over a small area
        # pyproj can detect the right UTM zone automatically:
        return CRS.from_user_input(
            f"+proj=utm +zone={int((lon+180)/6)+1} +datum=WGS84 +units={units}"
        )

    else:
        raise ValueError(f"Unknown projection_type: {projection_type}")

get_exterior_coords

get_exterior_coords(geom)

Gets a list of all the exterior coordinates, regardless of whether the Geometry is a Polygon or MultiPolygon

Parameters:

Name Type Description Default
geom Polygon

The shapely polygon whose exterior coordinates you want

required

Returns:

Type Description
list

The list of exterior coordinates

Source code in openavmkit/utilities/geometry.py
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
def get_exterior_coords(geom):
    """Gets a list of all the exterior coordinates, regardless of whether the Geometry is a Polygon or MultiPolygon

    Parameters
    ----------
    geom : shapely.Polygon
        The shapely polygon whose exterior coordinates you want

    Returns
    -------
    list
        The list of exterior coordinates
    """
    if geom.geom_type == "Polygon":
        return list(geom.exterior.coords)
    elif geom.geom_type == "MultiPolygon":
        # Return a list of exterior coordinates for each polygon in the MultiPolygon
        return [list(poly.exterior.coords) for poly in geom.geoms]
    else:
        return None  # or handle other geometry types if necessary

identify_irregular_parcels

identify_irregular_parcels(gdf, verbose=False, tolerance=10, complex_threshold=12, rectangularity_threshold=0.75, elongation_threshold=5)

Detect and flag irregular parcel geometries based on shape metrics.

Applies a sequence of geometric tests to identify triangular, overly complex, or elongated parcel shapes. The input GeoDataFrame is temporarily projected to EPSG:3857 for distance-based operations, then restored to its original CRS.

Parameters:

Name Type Description Default
gdf GeoDataFrame

Must contain a geometry column of Polygon geometries, plus precomputed fields:

  • geom_rectangularity_num : numeric rectangularity measure (0–1).
  • geom_aspect_ratio : width/height ratio of each parcel.
required
verbose bool

If True, print timing and progress for each processing phase.

False
tolerance int

Simplification tolerance (projection units) for reducing geometry complexity before analysis.

10
complex_threshold int

Minimum vertex count (post-simplification) for a parcel to be considered 'complex' when rectangularity is low.

12
rectangularity_threshold float

Maximum rectangularity below which a high-vertex-count geometry is flagged as irregular.

0.75
elongation_threshold float

Minimum bounding-box aspect ratio that qualifies a parcel as 'elongated'.

5

Returns:

Type Description
GeoDataFrame

A copy of the input with these added columns:

  • is_geom_triangular : bool flag for approximate triangular shapes.
  • geom_vertices : int count of vertices after simplification.
  • is_geom_complex : bool flag for complex non-rectangular shapes.
  • is_geom_elongated : bool flag for elongated shapes.
  • is_geom_irregular : bool flag if any irregular condition is met.
Notes
  • The original CRS is preserved in the final output.
  • Triangle detection delegates to :func:detect_triangular_lots.
  • Complex shapes satisfy both: vertex count ≥ complex_threshold AND rectangularity ≤ rectangularity_threshold.
  • Elongation is evaluated on the axis-aligned bounding box of each parcel.
Source code in openavmkit/utilities/geometry.py
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
def identify_irregular_parcels(
    gdf,
    verbose=False,
    tolerance=10,
    complex_threshold=12,
    rectangularity_threshold=0.75,
    elongation_threshold=5,
):
    """
    Detect and flag irregular parcel geometries based on shape metrics.

    Applies a sequence of geometric tests to identify triangular, overly
    complex, or elongated parcel shapes.  The input GeoDataFrame is temporarily
    projected to EPSG:3857 for distance-based operations, then restored to its
    original CRS.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        Must contain a ``geometry`` column of Polygon geometries, plus
        precomputed fields:

        - ``geom_rectangularity_num`` : numeric rectangularity measure (0–1).
        - ``geom_aspect_ratio``     : width/height ratio of each parcel.
    verbose : bool, default False
        If True, print timing and progress for each processing phase.
    tolerance : int, default 10
        Simplification tolerance (projection units) for reducing geometry
        complexity before analysis.
    complex_threshold : int, default 12
        Minimum vertex count (post-simplification) for a parcel to be
        considered 'complex' when rectangularity is low.
    rectangularity_threshold : float, default 0.75
        Maximum rectangularity below which a high-vertex-count geometry is
        flagged as irregular.
    elongation_threshold : float, default 5
        Minimum bounding-box aspect ratio that qualifies a parcel as
        'elongated'.

    Returns
    -------
    geopandas.GeoDataFrame
        A copy of the input with these added columns:

        - ``is_geom_triangular`` : bool flag for approximate triangular shapes.
        - ``geom_vertices``      : int count of vertices after simplification.
        - ``is_geom_complex``    : bool flag for complex non-rectangular shapes.
        - ``is_geom_elongated``  : bool flag for elongated shapes.
        - ``is_geom_irregular``  : bool flag if any irregular condition is met.

    Notes
    -----
    - The original CRS is preserved in the final output.
    - Triangle detection delegates to :func:`detect_triangular_lots`.
    - Complex shapes satisfy both: vertex count ≥ ``complex_threshold`` AND
      rectangularity ≤ ``rectangularity_threshold``.
    - Elongation is evaluated on the axis-aligned bounding box of each parcel.
    """
    if verbose:
        print(f"--> identifying irregular parcels...")

    t = TimingData()
    t.start("all")
    t.start("setup")
    old_crs = gdf.crs
    if gdf.crs.is_geographic:
        gdf = gdf.to_crs("EPSG:3857")
    gdf["simplified_geometry"] = gdf.geometry.simplify(
        tolerance, preserve_topology=True
    )
    t.stop("setup")

    t.start("tri")
    gdf["is_geom_triangular"] = gdf["simplified_geometry"].apply(detect_triangular_lots)
    t.stop("tri")
    if verbose:
        _t = t.get("setup")
        print(f"----> simplified geometry...({_t:.2f}s)")
        _t = t.get("tri")
        print(f"----> identified triangular parcels...({_t:.2f}s)")

    # Detect complex geometry based on rectangularity and vertex count
    t.start("complex")
    gdf["geom_vertices"] = gdf["simplified_geometry"].apply(
        lambda geom: len(geom.exterior.coords) if geom.geom_type == "Polygon" else 0
    )
    gdf["is_geom_complex"] = (gdf["geom_vertices"].ge(complex_threshold)) & (
        gdf["geom_rectangularity_num"].le(rectangularity_threshold)
    )
    t.stop("complex")
    if verbose:
        _t = t.get("complex")
        print(f"----> identified complex geometry...({_t:.2f}s)")

    t.start("long")
    gdf["is_geom_elongated"] = gdf["geom_aspect_ratio"].ge(elongation_threshold)
    t.stop("long")
    if verbose:
        _t = t.get("long")
        print(f"----> identified elongated parcels...({_t:.2f}s)")

    t.start("finish")
    # Combine criteria for irregular lots
    gdf["is_geom_irregular"] = (
        gdf["is_geom_complex"] | gdf["is_geom_elongated"] | gdf["is_geom_triangular"]
    )

    # Fill any NA values with false:
    for field in [
        "is_geom_complex",
        "is_geom_elongated",
        "is_geom_triangular",
        "is_geom_irregular",
    ]:
        gdf[field] = gdf[field].fillna(False)
        gdf[field] = gdf[field].astype(bool)

    gdf = gdf.drop(columns="simplified_geometry")
    gdf = gdf.to_crs(old_crs)
    t.stop("finish")
    if verbose:
        _t = t.get("finish")
        print(f"----> finished up...({_t:.2f}s)")
    t.stop("all")
    if verbose:
        _t = t.get("all")
        print(f"--> identified irregular parcels (total)...({_t:.2f}s)")

    return gdf

is_likely_epsg4326

is_likely_epsg4326(gdf)

Checks if the GeoDataFrame is likely using EPSG:4326.

This is a heuristic function that inspects the geometries.

Parameters:

Name Type Description Default
gdf GeoDataFrame

The GeoDataFrame to check

required

Returns:

Type Description
bool

True if it's likely EPSG:4326, False if it's not.

Source code in openavmkit/utilities/geometry.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def is_likely_epsg4326(gdf: gpd.GeoDataFrame) -> bool:
    """Checks if the GeoDataFrame is likely using EPSG:4326.

    This is a heuristic function that inspects the geometries.

    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        The GeoDataFrame to check

    Returns
    -------
    bool
        True if it's likely EPSG:4326, False if it's not.

    """
    # Check if geometries have lat/lon coordinates within typical ranges
    for geom in gdf.geometry.head(10):  # Check first 10 geometries for efficiency
        if geom.is_empty:
            continue
        if not geom.bounds:
            continue
        min_x, min_y, max_x, max_y = geom.bounds
        # Longitude range: -180 to 180, Latitude range: -90 to 90
        if not (-180 <= min_x <= 180 and -180 <= max_x <= 180):
            return False
        if not (-90 <= min_y <= 90 and -90 <= max_y <= 90):
            return False
    return True

offset_coordinate_feet

offset_coordinate_feet(lat, lon, lat_feet, lon_feet)

Offset a lat/long coordinate by the designated number of feet

Parameters:

Name Type Description Default
lat float

Latitude

required
lon float

Longitude

required
lat_feet float

Number of feet to offset latitude by

required
lon_feet float

Number of feet to offset longitude by

required

Returns:

Type Description
tuple[float, float]

The offset latitude, longitude pair (in degrees)

Source code in openavmkit/utilities/geometry.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def offset_coordinate_feet(lat, lon, lat_feet, lon_feet) -> (float, float):
    """Offset a lat/long coordinate by the designated number of feet

    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    lat_feet : float
        Number of feet to offset latitude by
    lon_feet : float
        Number of feet to offset longitude by

    Returns
    -------
    tuple[float, float]
        The offset latitude, longitude pair (in degrees)
    """
    lat_km = lat_feet * 0.0003048
    lon_km = lon_feet * 0.0003048
    return offset_coordinate_km(lat, lon, lat_km, lon_km)

offset_coordinate_m

offset_coordinate_m(lat, lon, lat_m, lon_m)

Offset a lat/long coordinate by the designated number of meters

Parameters:

Name Type Description Default
lat float

Latitude

required
lon float

Longitude

required
lat_m float

Number of meters to offset latitude by

required
lon_m float

Number of meters to offset longitude by

required

Returns:

Type Description
tuple[float, float]

The offset latitude, longitude pair (in degrees)

Source code in openavmkit/utilities/geometry.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def offset_coordinate_m(lat, lon, lat_m, lon_m) -> (float, float):
    """Offset a lat/long coordinate by the designated number of meters

    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    lat_m : float
        Number of meters to offset latitude by
    lon_m : float
        Number of meters to offset longitude by

    Returns
    -------
    tuple[float, float]
        The offset latitude, longitude pair (in degrees)
    """
    lat_km = lat_m / 1000
    lon_km = lon_m / 1000
    return offset_coordinate_km(lat, lon, lat_km, lon_km)

offset_coordinate_miles

offset_coordinate_miles(lat, lon, lat_miles, lon_miles)

Offset a lat/long coordinate by the designated number of miles

Parameters:

Name Type Description Default
lat float

Latitude

required
lon float

Longitude

required
lat_miles float

Number of miles to offset latitude by

required
lon_miles float

Number of miles to offset longitude by

required

Returns:

Type Description
tuple[float, float]

The offset latitude, longitude pair (in degrees)

Source code in openavmkit/utilities/geometry.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def offset_coordinate_miles(lat, lon, lat_miles, lon_miles) -> (float, float):
    """Offset a lat/long coordinate by the designated number of miles

    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    lat_miles : float
        Number of miles to offset latitude by
    lon_miles : float
        Number of miles to offset longitude by

    Returns
    -------
    tuple[float, float]
        The offset latitude, longitude pair (in degrees)
    """
    lat_km = lat_miles * 1.60934
    lon_km = lon_miles * 1.60934
    return offset_coordinate_km(lat, lon, lat_km, lon_km)