`openavmkit.quality_control`

check_land_values

check_land_values(df_in, model_group)

Perform various sanity checks on land values

Parameters:

Name	Type	Description	Default
`df_in`	`DataFrame`	The dataframe you want to check	required
`model_group`	`str`	The model group you want to check	required

Returns:

Type	Description
`DataFrame`	A copy of the original dataframe, with any necessary amendments to land value

Source code in openavmkit/quality_control.py

def check_land_values(df_in: pd.DataFrame, model_group: str) -> pd.DataFrame:
    """Perform various sanity checks on land values

    Parameters
    ----------
    df_in : pd.DataFrame
        The dataframe you want to check
    model_group: str
        The model group you want to check

    Returns
    -------
    pd.DataFrame
        A copy of the original dataframe, with any necessary amendments to land value
    """

    df = df_in.copy()

    # Perform basic sanity checks / error correction to land values

    # Here are all the checks we will perform:
    # 1. land value is not negative
    # 2. land value is not greater than market value
    # 3. if a building exists:
    #    3.1. land allocation is not 1.0
    # 4. if no building exists:
    #    4.1. land allocation is 1.0

    counts = {
        "market_lt_land": 0,
        "negative_market": 0,
        "negative_land": 0,
        "negative_impr": 0,
        "land_gt_market": 0,
        "land_gt_market_vacant": 0,
        "land_gt_market_improved": 0,
        "bldg_yes_land_alloc_ge_1": 0,
        "bldg_no_land_alloc_ne_1": 0,
    }

    labels = {
        "market_lt_land": "Market value less than land value",
        "negative_market": "Negative market value",
        "negative_land": "Negative land value",
        "negative_impr": "Negative improvement value",
        "land_gt_market": "Land value greater than market value",
        "land_gt_market_vacant": "Land value greater than market value (vacant)",
        "land_gt_market_improved": "Land value greater than market value (improved)",
        "bldg_yes_land_alloc_ge_1": "Building exists but land allocation is 1.0",
        "bldg_no_land_alloc_ne_1": "No building exists but land allocation not 1.0",
    }

    _perform_land_checks(df, counts, do_remedy=True)

    #######################

    n = len(df)

    if any(counts.values()):
        warnings.warn(f"Land value sanity check failed for model group {model_group}.")
        for key, value in counts.items():
            if value:
                label = labels[key]
                perc = value / n
                print(f"  {perc:6.2%} -- {label}: {value}/{n} rows")

    # Derive the final improvement values and make sure everything is consistent
    df.loc[df["model_land_value"].lt(0), "model_land_value"] = 0.0

    df["model_land_alloc"] = div_series_z_safe(
        df["model_land_value"], df["model_market_value"]
    )
    df["model_impr_value"] = df["model_market_value"] - df["model_land_value"]
    df["model_impr_alloc"] = div_series_z_safe(
        df["model_impr_value"], df["model_market_value"]
    )

    derived_land_alloc = div_series_z_safe(
        df["model_land_value"], df["model_market_value"]
    )
    assert derived_land_alloc.equals(df["model_land_alloc"])

    # Re-count all the checks

    _perform_land_checks(df, counts, do_remedy=True)
    _perform_land_checks(df, counts, do_remedy=False)

    print("")
    if any(counts.values()):
        warnings.warn(
            f"Remaining issues after error correction for model group {model_group}:"
        )
        for key, value in counts.items():
            if value:
                label = labels[key]
                perc = value / n
                print(f"  {perc:6.2%} -- {label}: {value}/{n} rows")
    else:
        print(f"No issues after error correction {model_group}.")

    return df