Skip to content

openavmkit.ratio_study

RatioStudy

RatioStudy(predictions, ground_truth)

Performs an IAAO-standard Ratio Study, generating all the relevant statistics.

Attributes:

Name Type Description
predictions ndarray

Series representing predicted values

ground_truth ndarray

Series representing ground truth values (typically observed sale prices)

count int

The number of observations

median_ratio float

The median value of all prediction/ground_truth ratios

mean_ratio float

The mean value of all prediction/ground_truth ratios

cod float

The coefficient of dispersion, a measure of variability (lower is better)

cod_trim float

The coefficient of dispersion, after outlier ratios outside the interquartile range have been trimmed

prd float

The price-related differential, a measure of vertical equity

prb float

The price-related bias, a measure of vertical equity

Initialize a ratio study object

Parameters:

Name Type Description Default
predictions ndarray

Series representing predicted values

required
ground_truth ndarray

Series representing ground truth values (typically observed sale prices)

required
Source code in openavmkit/ratio_study.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def __init__(self, predictions: np.ndarray, ground_truth: np.ndarray):
    """
    Initialize a ratio study object

    Parameters
    ----------
    predictions : np.ndarray
        Series representing predicted values
    ground_truth : np.ndarray
        Series representing ground truth values (typically observed sale prices)
    """
    if len(predictions) != len(ground_truth):
        raise ValueError("predictions and ground_truth must have the same length")

    if len(predictions) == 0:
        self.count = 0
        self.predictions = np.array([])
        self.ground_truth = np.array([])
        self.median_ratio = float("nan")
        self.cod = float("nan")
        self.cod_trim = float("nan")
        self.prd = float("nan")
        self.prb = float("nan")
        self.prd_trim = float("nan")
        self.prb_trim = float("nan")
        self.median_ratio_trim = float("nan")
        self.mean_ratio = float("nan")
        self.mean_ratio_trim = float("nan")
        return

    self.count = len(predictions)
    self.predictions = predictions
    self.ground_truth = ground_truth

    ratios = div_series_z_safe(predictions, ground_truth).astype(float)
    if len(ratios) > 0:
        median_ratio = float(np.median(ratios))
    else:
        median_ratio = float("nan")

    # trim the ratios to remove outliers -- trim to the interquartile range
    trim_mask = stats.trim_outliers_mask(ratios)

    trim_ratios = ratios[trim_mask]
    trim_predictions = predictions[trim_mask]
    trim_ground_truth = ground_truth[trim_mask]

    cod = stats.calc_cod(ratios)
    cod_trim = stats.calc_cod(trim_ratios)

    prd = stats.calc_prd(predictions, ground_truth)
    prd_trim = stats.calc_prd(trim_predictions, trim_ground_truth)

    prb, _, _ = stats.calc_prb(predictions, ground_truth)
    prb_trim, _, _ = stats.calc_prb(trim_predictions, trim_ground_truth)

    self.median_ratio = median_ratio

    if len(ratios) == 0:
        self.mean_ratio = float("nan")
    else:
        self.mean_ratio = float(np.mean(ratios))

    if len(trim_ratios) == 0:
        self.mean_ratio_trim = float("nan")
        self.median_ratio_trim = float("nan")
    else:
        self.mean_ratio_trim = float(np.mean(trim_ratios))
        self.median_ratio_trim = float(np.median(trim_ratios))

    self.cod = cod
    self.cod_trim = cod_trim

    self.prd = prd
    self.prd_trim = prd_trim

    self.prb = prb
    self.prb_trim = prb_trim

RatioStudyBootstrapped

RatioStudyBootstrapped(predictions, ground_truth, confidence_interval=0.95, iterations=1000)

Bases: RatioStudy

Performs an IAAO-standard Ratio Study, generating all the relevant statistics. This extends the base RatioStudy class, adding confidence intervals.

Attributes:

Name Type Description
iterations float

Number of bootstrap iterations

cod_ci_low float

COD, bottom of the confidence interval

cod_ci_high float

COD, top of the confidence interval

cod_trim_ci_low float

Trimmed COD, bottom of the confidence interval

cod_trim_ci_high float

Trimmed COD, top of the confidence interval

prd_ci_low float

PRD, bottom of the confidence interval

prd_ci_high float

PRD, top of the confidence interval

Initialize a Bootstrapped ratio study object

Parameters:

Name Type Description Default
predictions ndarray

Series representing predicted values

required
ground_truth ndarray

Series representing ground truth values (typically observed sale prices)

required
confidence_interval float

Desired confidence interval (default is 0.95, indicating 95% confidence)

0.95
iterations int

How many bootstrap iterations to perform

1000
Source code in openavmkit/ratio_study.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def __init__(
    self,
    predictions: np.ndarray,
    ground_truth: np.ndarray,
    confidence_interval: float = 0.95,
    iterations: int = 1000,
):
    """
    Initialize a Bootstrapped ratio study object

    Parameters
    ----------
    predictions : np.ndarray
        Series representing predicted values
    ground_truth : np.ndarray
        Series representing ground truth values (typically observed sale prices)
    confidence_interval : float
        Desired confidence interval (default is 0.95, indicating 95% confidence)
    iterations : int
        How many bootstrap iterations to perform
    """
    super().__init__(predictions, ground_truth)

    if len(predictions) == 0:
        self.cod = float("nan")
        self.cod_ci_low = float("nan")
        self.cod_ci_high = float("nan")
        self.cod_trim = float("nan")
        self.cod_trim_ci_low = float("nan")
        self.cod_trim_ci_high = float("nan")
        self.prd = float("nan")
        self.prd_ci_low = float("nan")
        self.prd_ci_high = float("nan")
        self.prb = float("nan")
        self.prb_ci_low = float("nan")
        self.prb_ci_high = float("nan")

    self.iterations = iterations
    ratios = div_series_z_safe(predictions, ground_truth)
    med, low, high = stats.calc_cod_bootstrap(
        ratios, confidence_interval, iterations
    )

    self.cod = med
    self.cod_ci_low = low
    self.cod_ci_high = high

    med, low, high = stats.calc_cod_bootstrap(
        stats.trim_outliers(ratios),
        confidence_interval,
        iterations,
    )

    self.cod_trim = med
    self.cod_trim_ci_low = low
    self.cod_trim_ci_high = high

    med, low, high = 0, 0, 0
    self.prd = med
    self.prd_ci_low = low
    self.prd_ci_high = high

    med, low, high = 0, 0, 0
    self.prb = med
    self.prb_ci_low = low
    self.prb_ci_high = high

run_and_write_ratio_study_breakdowns

run_and_write_ratio_study_breakdowns(settings)

Runs ratio studies, with breakdowns, and writes them to disk.

Parameters:

Name Type Description Default
settings dict

Settings dictionary

required
Source code in openavmkit/ratio_study.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
def run_and_write_ratio_study_breakdowns(settings: dict):
    """Runs ratio studies, with breakdowns, and writes them to disk.

    Parameters
    ----------
    settings : dict
        Settings dictionary
    """
    model_groups = get_model_group_ids(settings)
    for model_group in model_groups:
        print(f"Generating report for {model_group}")
        path = f"out/models/{model_group}/main/model_ensemble.pickle"
        if os.path.exists(path):
            os.makedirs(f"out/models/{model_group}", exist_ok=True)
            ensemble_results = read_pickle(path)
            df_sales = ensemble_results.df_sales
            _run_and_write_ratio_study_breakdowns(
                settings, df_sales, model_group, f"out/models/{model_group}"
            )