8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108 | def check_land_values(df_in: pd.DataFrame, model_group: str) -> pd.DataFrame:
"""Perform various sanity checks on land values
Parameters
----------
df_in : pd.DataFrame
The dataframe you want to check
model_group: str
The model group you want to check
Returns
-------
pd.DataFrame
A copy of the original dataframe, with any necessary amendments to land value
"""
df = df_in.copy()
# Perform basic sanity checks / error correction to land values
# Here are all the checks we will perform:
# 1. land value is not negative
# 2. land value is not greater than market value
# 3. if a building exists:
# 3.1. land allocation is not 1.0
# 4. if no building exists:
# 4.1. land allocation is 1.0
counts = {
"market_lt_land": 0,
"negative_market": 0,
"negative_land": 0,
"negative_impr": 0,
"land_gt_market": 0,
"land_gt_market_vacant": 0,
"land_gt_market_improved": 0,
"bldg_yes_land_alloc_ge_1": 0,
"bldg_no_land_alloc_ne_1": 0,
}
labels = {
"market_lt_land": "Market value less than land value",
"negative_market": "Negative market value",
"negative_land": "Negative land value",
"negative_impr": "Negative improvement value",
"land_gt_market": "Land value greater than market value",
"land_gt_market_vacant": "Land value greater than market value (vacant)",
"land_gt_market_improved": "Land value greater than market value (improved)",
"bldg_yes_land_alloc_ge_1": "Building exists but land allocation is 1.0",
"bldg_no_land_alloc_ne_1": "No building exists but land allocation not 1.0",
}
_perform_land_checks(df, counts, do_remedy=True)
#######################
n = len(df)
if any(counts.values()):
warnings.warn(f"Land value sanity check failed for model group {model_group}.")
for key, value in counts.items():
if value:
label = labels[key]
perc = value / n
print(f" {perc:6.2%} -- {label}: {value}/{n} rows")
# Derive the final improvement values and make sure everything is consistent
df.loc[df["model_land_value"].lt(0), "model_land_value"] = 0.0
df["model_land_alloc"] = div_series_z_safe(
df["model_land_value"], df["model_market_value"]
)
df["model_impr_value"] = df["model_market_value"] - df["model_land_value"]
df["model_impr_alloc"] = div_series_z_safe(
df["model_impr_value"], df["model_market_value"]
)
derived_land_alloc = div_series_z_safe(
df["model_land_value"], df["model_market_value"]
)
assert derived_land_alloc.equals(df["model_land_alloc"])
# Re-count all the checks
_perform_land_checks(df, counts, do_remedy=True)
_perform_land_checks(df, counts, do_remedy=False)
print("")
if any(counts.values()):
warnings.warn(
f"Remaining issues after error correction for model group {model_group}:"
)
for key, value in counts.items():
if value:
label = labels[key]
perc = value / n
print(f" {perc:6.2%} -- {label}: {value}/{n} rows")
else:
print(f"No issues after error correction {model_group}.")
return df
|