`openavmkit.condos`

Condo resolution: bring condo units into the "one row per parcel" model.

Condo units are assessment parcels with no geometry of their own -- they physically sit inside a shared building/land parcel. openavmkit drops geometry-less rows, so condos normally vanish from the universe. This module gives each condo unit a borrowed building polygon (so every centroid-based enrichment -- DEM, census, OSM distances, basic-geo, Overture -- works and yields identical "shared parcel" values per building), assigns a condo_group building identifier (a location, like neighborhood), and computes a per-unit allocated land size.

It is opt-in via data.process.condos in settings and runs once at the top of process_data -- BEFORE the universe merge / geometry attach -- mutating the loaded dataframes dict. After it runs, the existing pipeline does everything else unchanged.

Settings schema (see resolve_condos docstring)::

"condos": {
  "enabled": true,
  "select": ["isin", "bldg_type", ["CONDOMINIUM", ...]],
  "link":   { "method": "id_prefix", "id_field": "parcel_num", "prefix_len": 9, "from": "geo_parcels" },
  "group_field": "condo_group",
  "borrow_geometry": true,
  "land_share": { "method": "field", "field": "land_area_sqft" }
}

resolve_condos

resolve_condos(dataframes, settings, verbose=False)

Resolve condo units into the universe by borrowing building geometry.

Opt-in via data.process.condos.enabled. For each universe-merge source frame containing condo rows (matched by select), this: 1. links each unit to a building polygon (link.method: id_prefix | parent_id | spatial); 2. borrows that polygon as the unit's geometry (appends rows to geo_parcels); 3. writes group_field (the building id) onto the source; 4. writes a per-unit allocated land size (land_share.method: field | floor_area) to land_area_alloc_sqft and into land_area_sqft for condos; 5. auto-registers the new fields in field_classification.

Returns the (mutated) dataframes dict. A no-op when disabled.

Source code in openavmkit/condos.py

def resolve_condos(dataframes: dict, settings: dict, verbose: bool = False) -> dict:
    """Resolve condo units into the universe by borrowing building geometry.

    Opt-in via ``data.process.condos.enabled``. For each universe-merge source frame
    containing condo rows (matched by ``select``), this:
      1. links each unit to a building polygon (``link.method``: id_prefix | parent_id | spatial);
      2. borrows that polygon as the unit's geometry (appends rows to ``geo_parcels``);
      3. writes ``group_field`` (the building id) onto the source;
      4. writes a per-unit allocated land size (``land_share.method``: field | floor_area)
         to ``land_area_alloc_sqft`` and into ``land_area_sqft`` for condos;
      5. auto-registers the new fields in field_classification.

    Returns the (mutated) dataframes dict. A no-op when disabled.
    """
    s = _settings_condos(settings)
    if not s.get("enabled", False):
        return dataframes

    geo = dataframes.get("geo_parcels")
    if geo is None or "geometry" not in geo:
        warnings.warn("resolve_condos: no 'geo_parcels' with geometry; skipping.")
        return dataframes

    select = s.get("select")
    link = s.get("link", {})
    group_field = s.get("group_field", "condo_group")
    borrow = s.get("borrow_geometry", True)
    land_share = s.get("land_share", {})

    merge_univ = (
        settings.get("data", {}).get("process", {}).get("merge", {}).get("universe", [])
    )
    univ_ids = [e if isinstance(e, str) else e.get("id") for e in merge_univ]

    geo_keys = set(geo["key"].astype(str))
    if BORROWED_FLAG not in geo.columns:
        geo[BORROWED_FLAG] = False

    total_borrowed = 0
    for uid in univ_ids:
        df = dataframes.get(uid)
        if df is None or "key" not in df.columns:
            continue
        mask = resolve_filter(df, select) if select else pd.Series(True, index=df.index)
        if mask.sum() == 0:
            continue
        condos = df[mask]
        condo_idx = condos.index

        link_ids, geom_map = _resolve_link(condos, dataframes, link, verbose)

        # cache building polygon areas (sqft) for floor_area land-share
        if geom_map:
            rep_gdf = gpd.GeoDataFrame(
                {"_b": list(geom_map.keys())},
                geometry=list(geom_map.values()),
                crs=geo.crs,
            )
            _GEOM_AREA_CACHE.update(dict(zip(rep_gdf["_b"], _polygon_areas_sqft(rep_gdf).values)))

        # 3) condo_group
        if group_field not in df.columns:
            df[group_field] = pd.NA
        df.loc[condo_idx, group_field] = link_ids.values

        # 2) borrow geometry for units lacking their own polygon
        if borrow:
            keys = condos["key"].astype(str)
            need = condos[(~keys.isin(geo_keys)).values & link_ids.notna().values]
            need_ids = link_ids.loc[need.index]
            rows = []
            id_field = link.get("id_field")
            for i, (_, row) in enumerate(need.iterrows()):
                bid = need_ids.iloc[i]
                geom = geom_map.get(bid)
                if geom is None:
                    continue
                rec = {"key": str(row["key"]), "geometry": geom, BORROWED_FLAG: True}
                if id_field and id_field in need.columns:
                    rec[id_field] = row[id_field]
                rows.append(rec)
            if rows:
                add = gpd.GeoDataFrame(rows, geometry="geometry", crs=geo.crs)
                geo = gpd.GeoDataFrame(
                    pd.concat([geo, add], ignore_index=True), geometry="geometry", crs=geo.crs
                )
                geo_keys.update(add["key"].astype(str))
                total_borrowed += len(add)

        # 4) land share
        df = _apply_land_share(df, condo_idx, link_ids, geom_map, land_share, verbose)
        dataframes[uid] = df

        if verbose:
            print(
                f"resolve_condos['{uid}']: {int(mask.sum())} condo rows, "
                f"{int(link_ids.notna().sum())} linked"
            )

    dataframes["geo_parcels"] = geo
    if verbose:
        print(f"resolve_condos: borrowed geometry for {total_borrowed} condo units total")
    _auto_register_fields(settings, s)
    return dataframes