Skip to content

openavmkit.time_adjustment

Time adjustment of sale prices.

Computes a per-day multiplier that adjusts historical sale prices to the valuation date, using a rolling median of price per area unit. The multiplier is applied via sale_price_time_adj = sale_price * multiplier and used throughout downstream ratio studies, modeling, and reporting.

The built-in engine can be wholly replaced for any model group by setting data.process.time_adjustment.from_file.<model_group> in settings.json to point at a precomputed CSV — useful when a jurisdiction publishes its own time-adjustment factors.

apply_time_adjustment

apply_time_adjustment(df_sales_in, settings, period='M', write=False, verbose=False)

Compute time adjustment multipliers and apply them to adjust sale prices forward in time.

Parameters:

Name Type Description Default
df_sales_in DataFrame

Input sales DataFrame.

required
settings dict

Settings dictionary containing time adjustment parameters.

required
period str

Period type to use for adjustment ("M", "Q", or "Y"). Defaults to "M".

'M'
write bool

Whether to write out the time adjustment data as a separate CSV file

False
verbose bool

If True, print verbose output during computation. Defaults to False.

False

Returns:

Type Description
DataFrame

Sales DataFrame with an added sale_price_time_adj column.

Source code in openavmkit/time_adjustment.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def apply_time_adjustment(
    df_sales_in: pd.DataFrame,
    settings: dict,
    period: str = "M",
    write: bool = False,
    verbose: bool = False
) -> pd.DataFrame:
    """
    Compute time adjustment multipliers and apply them to adjust sale prices forward in time.

    Parameters
    ----------
    df_sales_in : pandas.DataFrame
        Input sales DataFrame.
    settings : dict
        Settings dictionary containing time adjustment parameters.
    period : str, optional
        Period type to use for adjustment ("M", "Q", or "Y"). Defaults to "M".
    write : bool, optional
        Whether to write out the time adjustment data as a separate CSV file
    verbose : bool, optional
        If True, print verbose output during computation. Defaults to False.

    Returns
    -------
    pandas.DataFrame
        Sales DataFrame with an added `sale_price_time_adj` column.
    """
    return do_per_model_group(
        df_sales_in,
        settings,
        apply_time_adjustment_per_model_group,
        {"settings":settings,"period":period,"write":write,"verbose":verbose},
        key="key_sale"
    )

apply_time_adjustment_per_model_group

apply_time_adjustment_per_model_group(df_sales_in, settings, model_group, period='M', write=False, verbose=False)

Compute time adjustment multipliers and apply them to adjust sale prices forward in time.

Parameters:

Name Type Description Default
df_sales_in DataFrame

Input sales DataFrame.

required
settings dict

Settings dictionary containing time adjustment parameters.

required
period str

Period type to use for adjustment ("M", "Q", or "Y"). Defaults to "M".

'M'
write bool

Whether to write out the time adjustment data as a separate CSV file

False
verbose bool

If True, print verbose output during computation. Defaults to False.

False

Returns:

Type Description
DataFrame

Sales DataFrame with an added sale_price_time_adj column.

Source code in openavmkit/time_adjustment.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
def apply_time_adjustment_per_model_group(
    df_sales_in: pd.DataFrame,
    settings: dict,
    model_group: str,
    period: str = "M",
    write: bool = False,
    verbose: bool = False
) -> pd.DataFrame:
    """
    Compute time adjustment multipliers and apply them to adjust sale prices forward in time.

    Parameters
    ----------
    df_sales_in : pandas.DataFrame
        Input sales DataFrame.
    settings : dict
        Settings dictionary containing time adjustment parameters.
    period : str, optional
        Period type to use for adjustment ("M", "Q", or "Y"). Defaults to "M".
    write : bool, optional
        Whether to write out the time adjustment data as a separate CSV file
    verbose : bool, optional
        If True, print verbose output during computation. Defaults to False.

    Returns
    -------
    pandas.DataFrame
        Sales DataFrame with an added `sale_price_time_adj` column.
    """
    if len(df_sales_in) == 0:
        return df_sales_in

    unit = area_unit(settings)

    df_sales = df_sales_in.copy()

    try_read_from_file = read_time_adjustment_from_file(get_time_adjustment_instructions(settings), model_group)
    if try_read_from_file is not None:
        print(f"Found a valid time adjustment file for model group {model_group}. Loading...")
        df_time = try_read_from_file
    else:
        df_time = calculate_time_adjustment(df_sales_in, settings, period, verbose)
        df_time = df_time.rename(columns={"value":"start_indexed"})
        df_time["end_indexed"] = df_time["start_indexed"]/df_time["start_indexed"].iloc[-1]
        df_time["correction_factor"] = 1 / df_time["end_indexed"]

    os.makedirs(f"out/time_adjustment/{model_group}", exist_ok=True)
    df_time.to_csv(f"out/time_adjustment/{model_group}/time_adjustment_schedule.csv", index=False)

    # now we have a multiplier that we can straightforwardly multiply sales by, that will bring all sales FORWARDS in time
    # we merge the time adjustment back into the sales data
    df_time = df_time[["period","correction_factor"]].copy().rename(
        columns={"period": "sale_date"}
    )

    # ensure both dtypes are datetime:
    dtype_time = df_time["sale_date"].dtype
    dtype_sales = df_sales["sale_date"].dtype
    if dtype_time != "datetime64[ns]":
        df_time["sale_date"] = pd.to_datetime(df_time["sale_date"])
    if dtype_sales != "datetime64[ns]":
        df_sales["sale_date"] = pd.to_datetime(df_sales["sale_date"])

    # now, ensure both are converted to YYYY-MM-DD format:
    df_time["sale_date"] = df_time["sale_date"].dt.strftime("%Y-%m-%d")
    df_sales["sale_date"] = df_sales["sale_date"].dt.strftime("%Y-%m-%d")

    df_sales = pd.merge(df_sales, df_time, how="left", on="sale_date")

    # we multiply the sale price by the time adjustment
    df_sales["sale_price_time_adj"] = (
            df_sales["sale_price"] * df_sales["correction_factor"]
    )

    # When the time-adjustment schedule yielded no usable correction (e.g. insufficient
    # per-period variance for rural land sales), correction_factor is NaN and so is
    # sale_price_time_adj. Fall back to the unadjusted sale_price so downstream consumers
    # (canonical splits, spatial lag, _get_sales' positive-price filter) don't drop the row.
    n_missing = int(df_sales["sale_price_time_adj"].isna().sum())
    if n_missing > 0:
        df_sales["sale_price_time_adj"] = df_sales["sale_price_time_adj"].fillna(
            df_sales["sale_price"]
        )
        warnings.warn(
            f"Time adjustment unavailable for {n_missing:,} sales (model group "
            f"'{model_group}'); falling back to unadjusted sale_price for those rows."
        )

    # we drop the time adjustment column
    df_sales = df_sales.drop(columns=["correction_factor"])

    if f"sale_price_per_impr_{unit}" in df_sales:
        df_sales[f"sale_price_time_adj_per_impr_{unit}"] = div_df_z_safe(
            df_sales, "sale_price_time_adj", f"bldg_area_finished_{unit}"
        )
    if f"sale_price_per_land_{unit}" in df_sales:
        df_sales[f"sale_price_time_adj_per_land_{unit}"] = div_df_z_safe(
            df_sales, "sale_price_time_adj", f"land_area_{unit}"
        )

    return df_sales

calculate_time_adjustment

calculate_time_adjustment(df_sales_in, settings, period='M', verbose=False)

Calculate a time adjustment multiplier for sales data.

Processes sales data to compute a median sale price per area unit over time (at a resolution determined dynamically), interpolates missing values, and returns a DataFrame with daily time adjustment multipliers.

Parameters:

Name Type Description Default
df_sales_in DataFrame

Input sales DataFrame.

required
settings dict

Settings dictionary.

required
period str

Initial period type ("M", "Q", or "Y"). Defaults to "M".

'M'
verbose bool

If True, print progress information. Defaults to False.

False

Returns:

Type Description
DataFrame

DataFrame with time adjustment values per day.

Source code in openavmkit/time_adjustment.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def calculate_time_adjustment(
    df_sales_in: pd.DataFrame, settings: dict, period: str = "M", verbose: bool = False
) -> pd.DataFrame:
    """
    Calculate a time adjustment multiplier for sales data.

    Processes sales data to compute a median sale price per area unit over time (at a
    resolution determined dynamically), interpolates missing values, and returns a
    DataFrame with daily time adjustment multipliers.

    Parameters
    ----------
    df_sales_in : pandas.DataFrame
        Input sales DataFrame.
    settings : dict
        Settings dictionary.
    period : str, optional
        Initial period type ("M", "Q", or "Y"). Defaults to "M".
    verbose : bool, optional
        If True, print progress information. Defaults to False.

    Returns
    -------
    pandas.DataFrame
        DataFrame with time adjustment values per day.
    """

    unit = area_unit(settings)

    # We assume that all the sales we are presented with are valid sales, and for a single modeling group

    # We need at least 5 sales in a given time period to make a valid time adjustment
    min_sale_count = 5

    # We assume we have access to the following fields:
    essential_fields = [
        "sale_date",
        "sale_year",
        "sale_month",
        "sale_quarter",
        "sale_price",
        f"bldg_area_finished_{unit}",
        f"land_area_{unit}",
    ]
    for field in essential_fields:
        if field not in df_sales_in.columns:
            raise ValueError(f"Field '{field}' not found in the sales data.")

    df_sales = df_sales_in.copy()

    if "sale_quarter" not in df_sales.columns:
        df_sales["sale_quarter"] = (df_sales["sale_month"] - 1) // 3 + 1
        df_sales["sale_quarter"] = (
            df_sales["sale_year"].astype(str)
            + "-Q"
            + df_sales["sale_quarter"].astype(str)
        )

    df_sales[f"sale_price_per_impr_{unit}"] = div_df_z_safe(
        df_sales, "sale_price", f"bldg_area_finished_{unit}"
    )
    df_sales[f"sale_price_per_land_{unit}"] = div_df_z_safe(
        df_sales, "sale_price", f"land_area_{unit}"
    )

    # Determine whether land or improvement drives value the modeling group:
    per = _determine_value_driver(df_sales, settings)
    sale_field = f"sale_price_per_{per}_{unit}"

    df_per_unfiltered = df_sales[df_sales[sale_field].gt(0)]
    # Exclude the wrong-side sales from the per-area median: vacant sales for an
    # impr-driven index, improved sales for a land-driven index. Without this,
    # vacant sales (land-only prices) divided by the parcel's currently-recorded
    # bldg_area pull the impr-PPSF median down by orders of magnitude.
    df_per = df_per_unfiltered
    if "vacant_sale" in df_per_unfiltered.columns and len(df_per_unfiltered) > 0:
        if per == "impr":
            df_per_filtered = df_per_unfiltered[~df_per_unfiltered["vacant_sale"].eq(True)]
        elif per == "land":
            df_per_filtered = df_per_unfiltered[df_per_unfiltered["vacant_sale"].eq(True)]
        else:
            df_per_filtered = df_per_unfiltered
        # If the V/I filter would empty the dataset, fall back to no filter rather
        # than producing a broken time-adjustment curve. This happens when a model
        # group has only one side (e.g. an apartment group with no vacant sales but
        # _determine_value_driver picked "land", or vice versa).
        if len(df_per_filtered) == 0:
            warnings.warn(
                f"Time-adjustment V/I filter for per={per!r} would leave 0 sales "
                f"(of {len(df_per_unfiltered)} candidates). Falling back to the "
                "unfiltered set — the resulting index may be biased."
            )
        else:
            df_per = df_per_filtered

    # If we still have no usable sales, return a flat (no-op) multiplier schedule
    # covering the sales date range. Downstream code applies the multiplier via
    # `sale_price * value`, so value=1.0 leaves prices unchanged.
    if len(df_per) == 0:
        warnings.warn(
            "Time adjustment: no sales with a positive "
            f"{sale_field} remain. Returning a flat multiplier=1.0 schedule."
        )
        if len(df_sales) > 0 and df_sales["sale_date"].notna().any():
            start = df_sales["sale_date"].min()
            end = df_sales["sale_date"].max()
            dates = pd.date_range(start=start, end=end, freq="D")
        else:
            dates = pd.DatetimeIndex([pd.Timestamp.today().normalize()])
        return pd.DataFrame({"period": dates, "value": np.ones(len(dates))})

    # Determine the time resolution (Month, Quarter, Year) -- "M", "Q", or "Y":
    period = _determine_time_resolution(df_per, sale_field, min_sale_count, period)

    if verbose:
        print(f"--> Using period: {period}")
        print(f"--> Crunching time adjustment...")
    # Derive the time adjustment:
    df_crunch = _crunch_time_adjustment(df_per, sale_field, period, min_sale_count)
    if verbose:
        print(f"--> Flattening time adjustment...")
    # Flatten out the time adjustment to daily values:
    df_time = _flatten_periods_to_days(df_per, df_crunch, period, verbose)
    print(f"--> Time adjustment calculated for {len(df_time)} days.")

    return df_time

enrich_time_adjustment

enrich_time_adjustment(df_in, settings, write=False, verbose=False)

Enrich the sales data by generating time-adjusted sales if not already present.

Parameters:

Name Type Description Default
df_in DataFrame

Input sales DataFrame.

required
settings dict

Settings dictionary.

required
write bool

Whether to write out the time adjustment to a separate CSV file. Defaults to False.

False
verbose bool

If True, print verbose output. Defaults to False.

False

Returns:

Type Description
DataFrame

Enriched sales DataFrame.

Source code in openavmkit/time_adjustment.py
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
def enrich_time_adjustment(
    df_in: pd.DataFrame, settings: dict, write: bool = False, verbose: bool = False
) -> pd.DataFrame:
    """
    Enrich the sales data by generating time-adjusted sales if not already present.

    Parameters
    ----------
    df_in : pandas.DataFrame
        Input sales DataFrame.
    settings : dict
        Settings dictionary.
    write : bool
        Whether to write out the time adjustment to a separate CSV file. Defaults to False.
    verbose : bool, optional
        If True, print verbose output. Defaults to False.

    Returns
    -------
    pandas.DataFrame
        Enriched sales DataFrame.
    """
    df = df_in.copy()

    # Gather settings
    ta = get_time_adjustment_instructions(settings)

    # Apply time adjustment if necessary
    if "sale_price_time_adj" not in df:
        if verbose:
            print("Applying time adjustment...")
        period = ta.get("period", "Q")
        df = apply_time_adjustment(df.copy(), settings, period=period, write=write, verbose=verbose)

    return df