Skip to content

openavmkit.time_adjustment

apply_time_adjustment

apply_time_adjustment(df_sales_in, settings, period='M', verbose=False)

Compute time adjustment multipliers and apply them to adjust sale prices forward in time.

Parameters:

Name Type Description Default
df_sales_in DataFrame

Input sales DataFrame.

required
settings dict

Settings dictionary containing time adjustment parameters.

required
period str

Period type to use for adjustment ("M", "Q", or "Y"). Defaults to "M".

'M'
verbose bool

If True, print verbose output during computation. Defaults to False.

False

Returns:

Type Description
DataFrame

Sales DataFrame with an added sale_price_time_adj column.

Source code in openavmkit/time_adjustment.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def apply_time_adjustment(
    df_sales_in: pd.DataFrame, settings: dict, period: str = "M", verbose: bool = False
) -> pd.DataFrame:
    """
    Compute time adjustment multipliers and apply them to adjust sale prices forward in time.

    Parameters
    ----------
    df_sales_in : pandas.DataFrame
        Input sales DataFrame.
    settings : dict
        Settings dictionary containing time adjustment parameters.
    period : str, optional
        Period type to use for adjustment ("M", "Q", or "Y"). Defaults to "M".
    verbose : bool, optional
        If True, print verbose output during computation. Defaults to False.

    Returns
    -------
    pandas.DataFrame
        Sales DataFrame with an added `sale_price_time_adj` column.
    """
    df_sales = df_sales_in.copy()
    df_time = calculate_time_adjustment(df_sales_in, settings, period, verbose)

    # df_time starts with 1.0 on the first day and ends with X.0 on the last day
    # if we were to divide by this value, we would time-adjust all sales BACKWARDS in time
    # what we want is to time-adjust all sales FORWARDS in time
    # we therefore normalize to the last day, then take the reciprocal to reverse the effect
    df_time["value"] = 1 / (df_time["value"] / df_time["value"].iloc[-1])

    # now we have a multiplier that we can straightforwardly multiply sales by, that will bring all sales FORWARDS in time

    # we merge the time adjustment back into the sales data
    df_time = df_time.rename(
        columns={"value": "time_adjustment", "period": "sale_date"}
    )

    # ensure both dtypes are datetime:
    dtype_time = df_time["sale_date"].dtype
    dtype_sales = df_sales["sale_date"].dtype
    if dtype_time != "datetime64[ns]":
        df_time["sale_date"] = pd.to_datetime(df_time["sale_date"])
    if dtype_sales != "datetime64[ns]":
        df_sales["sale_date"] = pd.to_datetime(df_sales["sale_date"])

    # now, ensure both are converted to YYYY-MM-DD format:
    df_time["sale_date"] = df_time["sale_date"].dt.strftime("%Y-%m-%d")
    df_sales["sale_date"] = df_sales["sale_date"].dt.strftime("%Y-%m-%d")

    df_sales = pd.merge(df_sales, df_time, how="left", on="sale_date")

    # we multiply the sale price by the time adjustment
    df_sales["sale_price_time_adj"] = (
        df_sales["sale_price"] * df_sales["time_adjustment"]
    )

    # we drop the time adjustment column
    df_sales = df_sales.drop(columns=["time_adjustment"])

    if "sale_price_per_impr_sqft" in df_sales:
        df_sales["sale_price_time_adj_per_impr_sqft"] = div_df_z_safe(
            df_sales, "sale_price_time_adj", "bldg_area_finished_sqft"
        )
    if "sale_price_per_land_sqft" in df_sales:
        df_sales["sale_price_time_adj_per_land_sqft"] = div_df_z_safe(
            df_sales, "sale_price_time_adj", "land_area_sqft"
        )

    return df_sales

calculate_time_adjustment

calculate_time_adjustment(df_sales_in, settings, period='M', verbose=False)

Calculate a time adjustment multiplier for sales data.

Processes sales data to compute a median sale price per sqft over time (at a resolution determined dynamically), interpolates missing values, and returns a DataFrame with daily time adjustment multipliers.

Parameters:

Name Type Description Default
df_sales_in DataFrame

Input sales DataFrame.

required
settings dict

Settings dictionary.

required
period str

Initial period type ("M", "Q", or "Y"). Defaults to "M".

'M'
verbose bool

If True, print progress information. Defaults to False.

False

Returns:

Type Description
DataFrame

DataFrame with time adjustment values per day.

Source code in openavmkit/time_adjustment.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def calculate_time_adjustment(
    df_sales_in: pd.DataFrame, settings: dict, period: str = "M", verbose: bool = False
) -> pd.DataFrame:
    """
    Calculate a time adjustment multiplier for sales data.

    Processes sales data to compute a median sale price per sqft over time (at a
    resolution determined dynamically), interpolates missing values, and returns a
    DataFrame with daily time adjustment multipliers.

    Parameters
    ----------
    df_sales_in : pandas.DataFrame
        Input sales DataFrame.
    settings : dict
        Settings dictionary.
    period : str, optional
        Initial period type ("M", "Q", or "Y"). Defaults to "M".
    verbose : bool, optional
        If True, print progress information. Defaults to False.

    Returns
    -------
    pandas.DataFrame
        DataFrame with time adjustment values per day.
    """

    # We assume that all the sales we are presented with are valid sales, and for a single modeling group

    # We need at least 5 sales in a given time period to make a valid time adjustment
    min_sale_count = 5

    # We assume we have access to the following fields:
    essential_fields = [
        "sale_date",
        "sale_year",
        "sale_month",
        "sale_quarter",
        "sale_price",
        "bldg_area_finished_sqft",
        "land_area_sqft",
    ]
    for field in essential_fields:
        if field not in df_sales_in:
            raise ValueError(f"Field '{field}' not found in the sales data.")

    df_sales = df_sales_in.copy()

    if "sale_quarter" not in df_sales:
        df_sales["sale_quarter"] = (df_sales["sale_month"] - 1) // 3 + 1
        df_sales["sale_quarter"] = (
            df_sales["sale_year"].astype(str)
            + "-Q"
            + df_sales["sale_quarter"].astype(str)
        )

    df_sales["sale_price_per_impr_sqft"] = div_df_z_safe(
        df_sales, "sale_price", "bldg_area_finished_sqft"
    )
    df_sales["sale_price_per_land_sqft"] = div_df_z_safe(
        df_sales, "sale_price", "land_area_sqft"
    )

    # Determine whether land or improvement drives value the modeling group:
    per = _determine_value_driver(df_sales, settings)
    sale_field = f"sale_price_per_{per}_sqft"

    df_per = df_sales[df_sales[sale_field].gt(0)]

    # Determine the time resolution (Month, Quarter, Year) -- "M", "Q", or "Y":
    period = _determine_time_resolution(df_per, sale_field, min_sale_count, period)

    if verbose:
        print(f"--> Using period: {period}")
        print(f"--> Crunching time adjustment...")
    # Derive the time adjustment:
    df_crunch = _crunch_time_adjustment(df_per, sale_field, period, min_sale_count)
    if verbose:
        print(f"--> Flattening time adjustment...")
    # Flatten out the time adjustment to daily values:
    df_time = _flatten_periods_to_days(df_per, df_crunch, period, verbose)
    print(f"--> Time adjustment calculated for {len(df_time)} days.")

    return df_time

enrich_time_adjustment

enrich_time_adjustment(df_in, settings, verbose=False)

Enrich the sales data by generating time-adjusted sales if not already present.

Parameters:

Name Type Description Default
df_in DataFrame

Input sales DataFrame.

required
settings dict

Settings dictionary.

required
verbose bool

If True, print verbose output. Defaults to False.

False

Returns:

Type Description
DataFrame

Enriched sales DataFrame.

Source code in openavmkit/time_adjustment.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def enrich_time_adjustment(
    df_in: pd.DataFrame, settings: dict, verbose: bool = False
) -> pd.DataFrame:
    """
    Enrich the sales data by generating time-adjusted sales if not already present.

    Parameters
    ----------
    df_in : pandas.DataFrame
        Input sales DataFrame.
    settings : dict
        Settings dictionary.
    verbose : bool, optional
        If True, print verbose output. Defaults to False.

    Returns
    -------
    pandas.DataFrame
        Enriched sales DataFrame.
    """
    df = df_in.copy()

    # Gather settings
    ta = settings.get("data", {}).get("process", {}).get("time_adjustment", {})

    # Apply time adjustment if necessary
    if "sale_price_time_adj" not in df:
        if verbose:
            print("Applying time adjustment...")
        period = ta.get("period", "Q")
        df = apply_time_adjustment(df.copy(), settings, period=period, verbose=verbose)

    return df