`openavmkit.ratio_study`

RatioStudy

RatioStudy(predictions, ground_truth)

Performs an IAAO-standard Ratio Study, generating all the relevant statistics.

Attributes:

Name	Type	Description
`predictions`	`ndarray`	Series representing predicted values
`ground_truth`	`ndarray`	Series representing ground truth values (typically observed sale prices)
`count`	`int`	The number of observations
`median_ratio`	`float`	The median value of all `prediction/ground_truth` ratios
`mean_ratio`	`float`	The mean value of all `prediction/ground_truth` ratios
`cod`	`float`	The coefficient of dispersion, a measure of variability (lower is better)
`cod_trim`	`float`	The coefficient of dispersion, after outlier ratios outside the interquartile range have been trimmed
`prd`	`float`	The price-related differential, a measure of vertical equity
`prb`	`float`	The price-related bias, a measure of vertical equity

Initialize a ratio study object

Parameters:

Name	Type	Description	Default
`predictions`	`ndarray`	Series representing predicted values	required
`ground_truth`	`ndarray`	Series representing ground truth values (typically observed sale prices)	required

Source code in openavmkit/ratio_study.py

def __init__(self, predictions: np.ndarray, ground_truth: np.ndarray):
    """
    Initialize a ratio study object

    Parameters
    ----------
    predictions : np.ndarray
        Series representing predicted values
    ground_truth : np.ndarray
        Series representing ground truth values (typically observed sale prices)
    """
    if len(predictions) != len(ground_truth):
        raise ValueError("predictions and ground_truth must have the same length")

    if len(predictions) == 0:
        self.count = 0
        self.predictions = np.array([])
        self.ground_truth = np.array([])
        self.median_ratio = float("nan")
        self.cod = float("nan")
        self.cod_trim = float("nan")
        self.prd = float("nan")
        self.prb = float("nan")
        self.prd_trim = float("nan")
        self.prb_trim = float("nan")
        self.median_ratio_trim = float("nan")
        self.mean_ratio = float("nan")
        self.mean_ratio_trim = float("nan")
        return

    self.count = len(predictions)
    self.predictions = predictions
    self.ground_truth = ground_truth

    ratios = div_series_z_safe(predictions, ground_truth).astype(float)
    if len(ratios) > 0:
        median_ratio = float(np.median(ratios))
    else:
        median_ratio = float("nan")

    # trim the ratios to remove outliers -- trim to the interquartile range
    trim_mask = stats.trim_outliers_mask(ratios)

    trim_ratios = ratios[trim_mask]
    trim_predictions = predictions[trim_mask]
    trim_ground_truth = ground_truth[trim_mask]

    cod = stats.calc_cod(ratios)
    cod_trim = stats.calc_cod(trim_ratios)

    prd = stats.calc_prd(predictions, ground_truth)
    prd_trim = stats.calc_prd(trim_predictions, trim_ground_truth)

    prb, _, _ = stats.calc_prb(predictions, ground_truth)
    prb_trim, _, _ = stats.calc_prb(trim_predictions, trim_ground_truth)

    self.median_ratio = median_ratio

    if len(ratios) == 0:
        self.mean_ratio = float("nan")
    else:
        self.mean_ratio = float(np.mean(ratios))

    if len(trim_ratios) == 0:
        self.mean_ratio_trim = float("nan")
        self.median_ratio_trim = float("nan")
    else:
        self.mean_ratio_trim = float(np.mean(trim_ratios))
        self.median_ratio_trim = float(np.median(trim_ratios))

    self.cod = cod
    self.cod_trim = cod_trim

    self.prd = prd
    self.prd_trim = prd_trim

    self.prb = prb
    self.prb_trim = prb_trim

RatioStudyBootstrapped

RatioStudyBootstrapped(predictions, ground_truth, confidence_interval=0.95, iterations=1000)

Bases: RatioStudy

Performs an IAAO-standard Ratio Study, generating all the relevant statistics. This extends the base RatioStudy class, adding confidence intervals.

Attributes:

Name	Type	Description
`iterations`	`float`	Number of bootstrap iterations
`cod_ci_low`	`float`	COD, bottom of the confidence interval
`cod_ci_high`	`float`	COD, top of the confidence interval
`cod_trim_ci_low`	`float`	Trimmed COD, bottom of the confidence interval
`cod_trim_ci_high`	`float`	Trimmed COD, top of the confidence interval
`prd_ci_low`	`float`	PRD, bottom of the confidence interval
`prd_ci_high`	`float`	PRD, top of the confidence interval

Initialize a Bootstrapped ratio study object

Parameters:

Name	Type	Description	Default
`predictions`	`ndarray`	Series representing predicted values	required
`ground_truth`	`ndarray`	Series representing ground truth values (typically observed sale prices)	required
`confidence_interval`	`float`	Desired confidence interval (default is 0.95, indicating 95% confidence)	`0.95`
`iterations`	`int`	How many bootstrap iterations to perform	`1000`

Source code in openavmkit/ratio_study.py

def __init__(
    self,
    predictions: np.ndarray,
    ground_truth: np.ndarray,
    confidence_interval: float = 0.95,
    iterations: int = 1000,
):
    """
    Initialize a Bootstrapped ratio study object

    Parameters
    ----------
    predictions : np.ndarray
        Series representing predicted values
    ground_truth : np.ndarray
        Series representing ground truth values (typically observed sale prices)
    confidence_interval : float
        Desired confidence interval (default is 0.95, indicating 95% confidence)
    iterations : int
        How many bootstrap iterations to perform
    """
    super().__init__(predictions, ground_truth)

    if len(predictions) == 0:
        self.cod = float("nan")
        self.cod_ci_low = float("nan")
        self.cod_ci_high = float("nan")
        self.cod_trim = float("nan")
        self.cod_trim_ci_low = float("nan")
        self.cod_trim_ci_high = float("nan")
        self.prd = float("nan")
        self.prd_ci_low = float("nan")
        self.prd_ci_high = float("nan")
        self.prb = float("nan")
        self.prb_ci_low = float("nan")
        self.prb_ci_high = float("nan")

    self.iterations = iterations
    ratios = div_series_z_safe(predictions, ground_truth)
    med, low, high = stats.calc_cod_bootstrap(
        ratios, confidence_interval, iterations
    )

    self.cod = med
    self.cod_ci_low = low
    self.cod_ci_high = high

    med, low, high = stats.calc_cod_bootstrap(
        stats.trim_outliers(ratios),
        confidence_interval,
        iterations,
    )

    self.cod_trim = med
    self.cod_trim_ci_low = low
    self.cod_trim_ci_high = high

    med, low, high = 0, 0, 0
    self.prd = med
    self.prd_ci_low = low
    self.prd_ci_high = high

    med, low, high = 0, 0, 0
    self.prb = med
    self.prb_ci_low = low
    self.prb_ci_high = high

run_and_write_ratio_study_breakdowns

run_and_write_ratio_study_breakdowns(settings)

Runs ratio studies, with breakdowns, and writes them to disk.

Parameters:

Name	Type	Description	Default
`settings`	`dict`	Settings dictionary	required

Source code in openavmkit/ratio_study.py

def run_and_write_ratio_study_breakdowns(settings: dict):
    """Runs ratio studies, with breakdowns, and writes them to disk.

    Parameters
    ----------
    settings : dict
        Settings dictionary
    """
    model_groups = get_model_group_ids(settings)
    for model_group in model_groups:
        print(f"Generating report for {model_group}")
        path = f"out/models/{model_group}/main/model_ensemble.pickle"
        if os.path.exists(path):
            os.makedirs(f"out/models/{model_group}", exist_ok=True)
            ensemble_results = read_pickle(path)
            df_sales = ensemble_results.df_sales
            _run_and_write_ratio_study_breakdowns(
                settings, df_sales, model_group, f"out/models/{model_group}"
            )