Skip to content

openavmkit.quality_control

check_land_values

check_land_values(df_in, model_group)

Perform various sanity checks on land values

Parameters:

Name Type Description Default
df_in DataFrame

The dataframe you want to check

required
model_group str

The model group you want to check

required

Returns:

Type Description
DataFrame

A copy of the original dataframe, with any necessary amendments to land value

Source code in openavmkit/quality_control.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def check_land_values(df_in: pd.DataFrame, model_group: str) -> pd.DataFrame:
    """Perform various sanity checks on land values

    Parameters
    ----------
    df_in : pd.DataFrame
        The dataframe you want to check
    model_group: str
        The model group you want to check

    Returns
    -------
    pd.DataFrame
        A copy of the original dataframe, with any necessary amendments to land value
    """

    df = df_in.copy()

    # Perform basic sanity checks / error correction to land values

    # Here are all the checks we will perform:
    # 1. land value is not negative
    # 2. land value is not greater than market value
    # 3. if a building exists:
    #    3.1. land allocation is not 1.0
    # 4. if no building exists:
    #    4.1. land allocation is 1.0

    counts = {
        "market_lt_land": 0,
        "negative_market": 0,
        "negative_land": 0,
        "negative_impr": 0,
        "land_gt_market": 0,
        "land_gt_market_vacant": 0,
        "land_gt_market_improved": 0,
        "bldg_yes_land_alloc_ge_1": 0,
        "bldg_no_land_alloc_ne_1": 0,
    }

    labels = {
        "market_lt_land": "Market value less than land value",
        "negative_market": "Negative market value",
        "negative_land": "Negative land value",
        "negative_impr": "Negative improvement value",
        "land_gt_market": "Land value greater than market value",
        "land_gt_market_vacant": "Land value greater than market value (vacant)",
        "land_gt_market_improved": "Land value greater than market value (improved)",
        "bldg_yes_land_alloc_ge_1": "Building exists but land allocation is 1.0",
        "bldg_no_land_alloc_ne_1": "No building exists but land allocation not 1.0",
    }

    _perform_land_checks(df, counts, do_remedy=True)

    #######################

    n = len(df)

    if any(counts.values()):
        warnings.warn(f"Land value sanity check failed for model group {model_group}.")
        for key, value in counts.items():
            if value:
                label = labels[key]
                perc = value / n
                print(f"  {perc:6.2%} -- {label}: {value}/{n} rows")

    # Derive the final improvement values and make sure everything is consistent
    df.loc[df["model_land_value"].lt(0), "model_land_value"] = 0.0

    df["model_land_alloc"] = div_series_z_safe(
        df["model_land_value"], df["model_market_value"]
    )
    df["model_impr_value"] = df["model_market_value"] - df["model_land_value"]
    df["model_impr_alloc"] = div_series_z_safe(
        df["model_impr_value"], df["model_market_value"]
    )

    derived_land_alloc = div_series_z_safe(
        df["model_land_value"], df["model_market_value"]
    )
    assert derived_land_alloc.equals(df["model_land_alloc"])

    # Re-count all the checks

    _perform_land_checks(df, counts, do_remedy=True)
    _perform_land_checks(df, counts, do_remedy=False)

    print("")
    if any(counts.values()):
        warnings.warn(
            f"Remaining issues after error correction for model group {model_group}:"
        )
        for key, value in counts.items():
            if value:
                label = labels[key]
                perc = value / n
                print(f"  {perc:6.2%} -- {label}: {value}/{n} rows")
    else:
        print(f"No issues after error correction {model_group}.")

    return df