Skip to content

openavmkit.condos

Condo resolution: bring condo units into the "one row per parcel" model.

Condo units are assessment parcels with no geometry of their own -- they physically sit inside a shared building/land parcel. openavmkit drops geometry-less rows, so condos normally vanish from the universe. This module gives each condo unit a borrowed building polygon (so every centroid-based enrichment -- DEM, census, OSM distances, basic-geo, Overture -- works and yields identical "shared parcel" values per building), assigns a condo_group building identifier (a location, like neighborhood), and computes a per-unit allocated land size.

It is opt-in via data.process.condos in settings and runs once at the top of process_data -- BEFORE the universe merge / geometry attach -- mutating the loaded dataframes dict. After it runs, the existing pipeline does everything else unchanged.

Settings schema (see resolve_condos docstring)::

"condos": {
  "enabled": true,
  "select": ["isin", "bldg_type", ["CONDOMINIUM", ...]],
  "link":   { "method": "id_prefix", "id_field": "parcel_num", "prefix_len": 9, "from": "geo_parcels" },
  "group_field": "condo_group",
  "borrow_geometry": true,
  "land_share": { "method": "field", "field": "land_area_sqft" }
}

resolve_condos

resolve_condos(dataframes, settings, verbose=False)

Resolve condo units into the universe by borrowing building geometry.

Opt-in via data.process.condos.enabled. For each universe-merge source frame containing condo rows (matched by select), this: 1. links each unit to a building polygon (link.method: id_prefix | parent_id | spatial); 2. borrows that polygon as the unit's geometry (appends rows to geo_parcels); 3. writes group_field (the building id) onto the source; 4. writes a per-unit allocated land size (land_share.method: field | floor_area) to land_area_alloc_sqft and into land_area_sqft for condos; 5. auto-registers the new fields in field_classification.

Returns the (mutated) dataframes dict. A no-op when disabled.

Source code in openavmkit/condos.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
def resolve_condos(dataframes: dict, settings: dict, verbose: bool = False) -> dict:
    """Resolve condo units into the universe by borrowing building geometry.

    Opt-in via ``data.process.condos.enabled``. For each universe-merge source frame
    containing condo rows (matched by ``select``), this:
      1. links each unit to a building polygon (``link.method``: id_prefix | parent_id | spatial);
      2. borrows that polygon as the unit's geometry (appends rows to ``geo_parcels``);
      3. writes ``group_field`` (the building id) onto the source;
      4. writes a per-unit allocated land size (``land_share.method``: field | floor_area)
         to ``land_area_alloc_sqft`` and into ``land_area_sqft`` for condos;
      5. auto-registers the new fields in field_classification.

    Returns the (mutated) dataframes dict. A no-op when disabled.
    """
    s = _settings_condos(settings)
    if not s.get("enabled", False):
        return dataframes

    geo = dataframes.get("geo_parcels")
    if geo is None or "geometry" not in geo:
        warnings.warn("resolve_condos: no 'geo_parcels' with geometry; skipping.")
        return dataframes

    select = s.get("select")
    link = s.get("link", {})
    group_field = s.get("group_field", "condo_group")
    borrow = s.get("borrow_geometry", True)
    land_share = s.get("land_share", {})

    merge_univ = (
        settings.get("data", {}).get("process", {}).get("merge", {}).get("universe", [])
    )
    univ_ids = [e if isinstance(e, str) else e.get("id") for e in merge_univ]

    geo_keys = set(geo["key"].astype(str))
    if BORROWED_FLAG not in geo.columns:
        geo[BORROWED_FLAG] = False

    total_borrowed = 0
    for uid in univ_ids:
        df = dataframes.get(uid)
        if df is None or "key" not in df.columns:
            continue
        mask = resolve_filter(df, select) if select else pd.Series(True, index=df.index)
        if mask.sum() == 0:
            continue
        condos = df[mask]
        condo_idx = condos.index

        link_ids, geom_map = _resolve_link(condos, dataframes, link, verbose)

        # cache building polygon areas (sqft) for floor_area land-share
        if geom_map:
            rep_gdf = gpd.GeoDataFrame(
                {"_b": list(geom_map.keys())},
                geometry=list(geom_map.values()),
                crs=geo.crs,
            )
            _GEOM_AREA_CACHE.update(dict(zip(rep_gdf["_b"], _polygon_areas_sqft(rep_gdf).values)))

        # 3) condo_group
        if group_field not in df.columns:
            df[group_field] = pd.NA
        df.loc[condo_idx, group_field] = link_ids.values

        # 2) borrow geometry for units lacking their own polygon
        if borrow:
            keys = condos["key"].astype(str)
            need = condos[(~keys.isin(geo_keys)).values & link_ids.notna().values]
            need_ids = link_ids.loc[need.index]
            rows = []
            id_field = link.get("id_field")
            for i, (_, row) in enumerate(need.iterrows()):
                bid = need_ids.iloc[i]
                geom = geom_map.get(bid)
                if geom is None:
                    continue
                rec = {"key": str(row["key"]), "geometry": geom, BORROWED_FLAG: True}
                if id_field and id_field in need.columns:
                    rec[id_field] = row[id_field]
                rows.append(rec)
            if rows:
                add = gpd.GeoDataFrame(rows, geometry="geometry", crs=geo.crs)
                geo = gpd.GeoDataFrame(
                    pd.concat([geo, add], ignore_index=True), geometry="geometry", crs=geo.crs
                )
                geo_keys.update(add["key"].astype(str))
                total_borrowed += len(add)

        # 4) land share
        df = _apply_land_share(df, condo_idx, link_ids, geom_map, land_share, verbose)
        dataframes[uid] = df

        if verbose:
            print(
                f"resolve_condos['{uid}']: {int(mask.sum())} condo rows, "
                f"{int(link_ids.notna().sum())} linked"
            )

    dataframes["geo_parcels"] = geo
    if verbose:
        print(f"resolve_condos: borrowed geometry for {total_borrowed} condo units total")
    _auto_register_fields(settings, s)
    return dataframes