Condo resolution: bring condo units into the "one row per parcel" model.
Condo units are assessment parcels with no geometry of their own -- they physically
sit inside a shared building/land parcel. openavmkit drops geometry-less rows, so condos
normally vanish from the universe. This module gives each condo unit a borrowed building
polygon (so every centroid-based enrichment -- DEM, census, OSM distances, basic-geo,
Overture -- works and yields identical "shared parcel" values per building), assigns a
condo_group building identifier (a location, like neighborhood), and computes a per-unit
allocated land size.
It is opt-in via data.process.condos in settings and runs once at the top of
process_data -- BEFORE the universe merge / geometry attach -- mutating the loaded
dataframes dict. After it runs, the existing pipeline does everything else unchanged.
Settings schema (see resolve_condos docstring)::
"condos": {
"enabled": true,
"select": ["isin", "bldg_type", ["CONDOMINIUM", ...]],
"link": { "method": "id_prefix", "id_field": "parcel_num", "prefix_len": 9, "from": "geo_parcels" },
"group_field": "condo_group",
"borrow_geometry": true,
"land_share": { "method": "field", "field": "land_area_sqft" }
}
resolve_condos
resolve_condos(dataframes, settings, verbose=False)
Resolve condo units into the universe by borrowing building geometry.
Opt-in via data.process.condos.enabled. For each universe-merge source frame
containing condo rows (matched by select), this:
1. links each unit to a building polygon (link.method: id_prefix | parent_id | spatial);
2. borrows that polygon as the unit's geometry (appends rows to geo_parcels);
3. writes group_field (the building id) onto the source;
4. writes a per-unit allocated land size (land_share.method: field | floor_area)
to land_area_alloc_sqft and into land_area_sqft for condos;
5. auto-registers the new fields in field_classification.
Returns the (mutated) dataframes dict. A no-op when disabled.
Source code in openavmkit/condos.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274 | def resolve_condos(dataframes: dict, settings: dict, verbose: bool = False) -> dict:
"""Resolve condo units into the universe by borrowing building geometry.
Opt-in via ``data.process.condos.enabled``. For each universe-merge source frame
containing condo rows (matched by ``select``), this:
1. links each unit to a building polygon (``link.method``: id_prefix | parent_id | spatial);
2. borrows that polygon as the unit's geometry (appends rows to ``geo_parcels``);
3. writes ``group_field`` (the building id) onto the source;
4. writes a per-unit allocated land size (``land_share.method``: field | floor_area)
to ``land_area_alloc_sqft`` and into ``land_area_sqft`` for condos;
5. auto-registers the new fields in field_classification.
Returns the (mutated) dataframes dict. A no-op when disabled.
"""
s = _settings_condos(settings)
if not s.get("enabled", False):
return dataframes
geo = dataframes.get("geo_parcels")
if geo is None or "geometry" not in geo:
warnings.warn("resolve_condos: no 'geo_parcels' with geometry; skipping.")
return dataframes
select = s.get("select")
link = s.get("link", {})
group_field = s.get("group_field", "condo_group")
borrow = s.get("borrow_geometry", True)
land_share = s.get("land_share", {})
merge_univ = (
settings.get("data", {}).get("process", {}).get("merge", {}).get("universe", [])
)
univ_ids = [e if isinstance(e, str) else e.get("id") for e in merge_univ]
geo_keys = set(geo["key"].astype(str))
if BORROWED_FLAG not in geo.columns:
geo[BORROWED_FLAG] = False
total_borrowed = 0
for uid in univ_ids:
df = dataframes.get(uid)
if df is None or "key" not in df.columns:
continue
mask = resolve_filter(df, select) if select else pd.Series(True, index=df.index)
if mask.sum() == 0:
continue
condos = df[mask]
condo_idx = condos.index
link_ids, geom_map = _resolve_link(condos, dataframes, link, verbose)
# cache building polygon areas (sqft) for floor_area land-share
if geom_map:
rep_gdf = gpd.GeoDataFrame(
{"_b": list(geom_map.keys())},
geometry=list(geom_map.values()),
crs=geo.crs,
)
_GEOM_AREA_CACHE.update(dict(zip(rep_gdf["_b"], _polygon_areas_sqft(rep_gdf).values)))
# 3) condo_group
if group_field not in df.columns:
df[group_field] = pd.NA
df.loc[condo_idx, group_field] = link_ids.values
# 2) borrow geometry for units lacking their own polygon
if borrow:
keys = condos["key"].astype(str)
need = condos[(~keys.isin(geo_keys)).values & link_ids.notna().values]
need_ids = link_ids.loc[need.index]
rows = []
id_field = link.get("id_field")
for i, (_, row) in enumerate(need.iterrows()):
bid = need_ids.iloc[i]
geom = geom_map.get(bid)
if geom is None:
continue
rec = {"key": str(row["key"]), "geometry": geom, BORROWED_FLAG: True}
if id_field and id_field in need.columns:
rec[id_field] = row[id_field]
rows.append(rec)
if rows:
add = gpd.GeoDataFrame(rows, geometry="geometry", crs=geo.crs)
geo = gpd.GeoDataFrame(
pd.concat([geo, add], ignore_index=True), geometry="geometry", crs=geo.crs
)
geo_keys.update(add["key"].astype(str))
total_borrowed += len(add)
# 4) land share
df = _apply_land_share(df, condo_idx, link_ids, geom_map, land_share, verbose)
dataframes[uid] = df
if verbose:
print(
f"resolve_condos['{uid}']: {int(mask.sum())} condo rows, "
f"{int(link_ids.notna().sum())} linked"
)
dataframes["geo_parcels"] = geo
if verbose:
print(f"resolve_condos: borrowed geometry for {total_borrowed} condo units total")
_auto_register_fields(settings, s)
return dataframes
|