`openavmkit.benchmark`

BenchmarkResults

BenchmarkResults(df_time, df_stats_test, df_stats_test_post_val, df_stats_full)

Container for benchmark results.

Attributes:

Name	Type	Description
`df_time`	`DataFrame`	DataFrame containing timing information.
`df_stats_test`	`DataFrame`	DataFrame with statistics for the test set.
`df_stats_test_post_val`	`DataFrame`	DataFrame with statistics for the test set (post-valuation-date only).
`df_stats_full`	`DataFrame`	DataFrame with statistics for the full universe.
`test_empty`	`bool`	Whether df_stats_test contains no records
`full_empty`	`bool`	Whether df_stats_full contains no records
`test_post_val_empty`	`bool`	Whether df_stats_test_post_val contains no records

Initialize a BenchmarkResults instance.

Parameters:

Name	Type	Description	Default
`df_time`	`DataFrame`	DataFrame containing timing data.	required
`df_stats_test`	`DataFrame`	DataFrame with test set statistics.	required
`df_stats_test_post_val`	`DataFrame`	DataFrame with test set (post-valuation-date only) statistics.	required
`df_stats_full`	`DataFrame`	DataFrame with full universe statistics.	required

Source code in openavmkit/benchmark.py

def __init__(
    self,
    df_time: pd.DataFrame,
    df_stats_test: pd.DataFrame,
    df_stats_test_post_val: pd.DataFrame,
    df_stats_full: pd.DataFrame,
):
    """
    Initialize a BenchmarkResults instance.

    Parameters
    ----------
    df_time : pandas.DataFrame
        DataFrame containing timing data.
    df_stats_test : pandas.DataFrame
        DataFrame with test set statistics.
    df_stats_test_post_val : pandas.DataFrame
        DataFrame with test set (post-valuation-date only) statistics.
    df_stats_full : pandas.DataFrame
        DataFrame with full universe statistics.
    """
    self.df_time = df_time
    self.df_stats_test = df_stats_test
    self.df_stats_test_post_val = df_stats_test_post_val
    self.df_stats_full = df_stats_full

    test_empty = False == (df_stats_test["count_sales"].sum() > 0)
    full_empty = False == (df_stats_full["count_sales"].sum() > 0)

    if df_stats_test_post_val is not None:
        test_post_val_empty = False == (df_stats_test_post_val["count_sales"].sum() > 0)
    else:
        test_post_val_empty = True

    self.test_empty = test_empty
    self.full_empty = full_empty
    self.test_post_val_empty = test_post_val_empty

print

print()

Return a formatted string summarizing the benchmark results.

Returns:

Type	Description
`str`	A string that includes timings, test set stats, and universe set stats.

Source code in openavmkit/benchmark.py

def print(self) -> str:
    """
    Return a formatted string summarizing the benchmark results.

    Returns
    -------
    str
        A string that includes timings, test set stats, and universe set stats.
    """
    result = "Timings:\n"
    result += _format_benchmark_df(self.df_time)
    result += "\n\n"
    if (
        self.df_stats_test_post_val is not None
        and not self.test_post_val_empty
    ):
        result += "Holdout set (post-valuation-date only):\n"
        result += _format_benchmark_df(self.df_stats_test_post_val)
        result += "\n\n"
    result += "Holdout set:\n"
    result += _format_benchmark_df(self.df_stats_test)
    result += "\n\n"
    result += "Study set:\n"
    result += _format_benchmark_df(self.df_stats_full)
    result += "\n\n"
    return result

MultiModelResults

MultiModelResults(model_results, benchmark)

Container for results from multiple models along with a benchmark.

Attributes: model_results (dict[str, SingleModelResults]): Dictionary mapping model names to their results. benchmark (BenchmarkResults): Benchmark results computed from the model results.

Initialize a MultiModelResults instance.

Parameters:

Name	Type	Description	Default
`model_results`	`dict[str, SingleModelResults]`	Dictionary of individual model results.	required
`benchmark`	`BenchmarkResults`	Benchmark results.	required

Source code in openavmkit/benchmark.py

def __init__(
    self, model_results: dict[str, SingleModelResults], benchmark: BenchmarkResults
):
    """Initialize a MultiModelResults instance.

    Parameters
    ----------
    model_results: dict[str, SingleModelResults]
        Dictionary of individual model results.
    benchmark: BenchmarkResults
        Benchmark results.
    """
    self.model_results = model_results
    self.benchmark = benchmark

add_model

add_model(model, results)

Add a new model's results and update the benchmark.

Parameters:

Name	Type	Description	Default
`model`	`str`	The model name.	required
`results`	`SingleModelResults`	The results for the given model.	required

Source code in openavmkit/benchmark.py

def add_model(self, model: str, results: SingleModelResults):
    """Add a new model's results and update the benchmark.

    Parameters
    ----------
    model: str
        The model name.
    results: SingleModelResults
        The results for the given model.
    """
    self.model_results[model] = results
    # Recalculate the benchmark based on updated model results.
    self.benchmark = _calc_benchmark(self.model_results)

generate_variable_report

generate_variable_report(report, settings, model_group, best_variables)

Generate a variable selection report.

This function updates the MarkdownReport with various threshold values, weights, and summary tables based on the best variables.

Parameters:

Name	Type	Description	Default
`report`	`MarkdownReport`	The markdown report object.	required
`settings`	`dict`	The settings dictionary.	required
`model_group`	`str`	The model group identifier.	required
`best_variables`	`list[str]`	List of selected best variables.	required

Returns:

Type	Description
`MarkdownReport`	The updated markdown report.

Source code in openavmkit/benchmark.py

def generate_variable_report(
    report: MarkdownReport, settings: dict, model_group: str, best_variables: list[str]
):
    """
    Generate a variable selection report.

    This function updates the MarkdownReport with various threshold values, weights, and
    summary tables based on the best variables.

    Parameters
    ----------
    report : MarkdownReport
        The markdown report object.
    settings : dict
        The settings dictionary.
    model_group : str
        The model group identifier.
    best_variables : list[str]
        List of selected best variables.

    Returns
    -------
    MarkdownReport
        The updated markdown report.
    """
    locality = settings.get("locality", {})
    report.set_var("locality", locality.get("name", "...LOCALITY..."))

    mg = get_model_group(settings, model_group)
    report.set_var("val_date", get_valuation_date(settings).strftime("%Y-%m-%d"))
    report.set_var("model_group", mg.get("name", mg))

    instructions = settings.get("modeling", {}).get("instructions", {})
    feature_selection = instructions.get("feature_selection", {})
    thresh = feature_selection.get("thresholds", {})

    report.set_var("thresh_correlation", thresh.get("correlation", ".2f"))
    report.set_var("thresh_enr_coef", thresh.get("enr_coef", ".2f"))
    report.set_var("thresh_vif", thresh.get("vif", ".2f"))
    report.set_var("thresh_p_value", thresh.get("p_value", ".2f"))
    report.set_var("thresh_t_value", thresh.get("t_value", ".2f"))
    report.set_var("thresh_adj_r2", thresh.get("adj_r2", ".2f"))

    weights = feature_selection.get("weights", {})
    df_weights = pd.DataFrame(weights.items(), columns=["Statistic", "Weight"])
    df_weights["Statistic"] = df_weights["Statistic"].map(
        {
            "vif": "VIF",
            "p_value": "P-value",
            "t_value": "T-value",
            "corr_score": "Correlation",
            "enr_coef": "ENR",
            "coef_sign": "Coef. sign",
            "adj_r2": "R-squared",
        }
    )
    df_weights.set_index("Statistic", inplace=True)
    report.set_var("pre_model_weights", df_weights.to_markdown())

    # TODO: Construct summary and post-model tables as needed.
    post_model_table = "...POST MODEL TABLE..."
    report.set_var("post_model_table", post_model_table)

    return report

get_data_split_for

get_data_split_for(name, model_group, location_fields, ind_vars, df_sales, df_universe, settings, dep_var, dep_var_test, fields_cat, interactions, test_keys, train_keys, vacant_only, hedonic, hedonic_test_against_vacant_sales=True)

Prepare a DataSplit object for a given model.

Parameters:

Name	Type	Description	Default
`name`	`str`	Model name.	required
`model_group`	`str`	The model group identifier.	required
`location_fields`	`list[str] or None`	List of location fields.	required
`ind_vars`	`list[str]`	List of independent variables.	required
`df_sales`	`DataFrame`	Sales DataFrame.	required
`df_universe`	`DataFrame`	Universe DataFrame.	required
`settings`	`dict`	The settings dictionary.	required
`dep_var`	`str`	Dependent variable for training.	required
`dep_var_test`	`str`	Dependent variable for testing.	required
`fields_cat`	`list[str]`	List of categorical fields.	required
`interactions`	`dict`	Dictionary of variable interactions.	required
`test_keys`	`list[str]`	Keys for test split.	required
`train_keys`	`list[str]`	Keys for training split.	required
`vacant_only`	`bool`	Whether to consider only vacant sales.	required
`hedonic`	`bool`	Whether to use hedonic pricing.	required
`hedonic_test_against_vacant_sales`	`bool`	Whether to test hedonic models against vacant sales. Defaults to True.	`True`

Returns:

Type	Description
`DataSplit`	A DataSplit object.

Source code in openavmkit/benchmark.py

def get_data_split_for(
    name: str,
    model_group: str,
    location_fields: list[str] | None,
    ind_vars: list[str],
    df_sales: pd.DataFrame,
    df_universe: pd.DataFrame,
    settings: dict,
    dep_var: str,
    dep_var_test: str,
    fields_cat: list[str],
    interactions: dict,
    test_keys: list[str],
    train_keys: list[str],
    vacant_only: bool,
    hedonic: bool,
    hedonic_test_against_vacant_sales: bool = True,
):
    """
    Prepare a DataSplit object for a given model.

    Parameters
    ----------
    name : str
        Model name.
    model_group : str
        The model group identifier.
    location_fields : list[str] or None
        List of location fields.
    ind_vars : list[str]
        List of independent variables.
    df_sales : pandas.DataFrame
        Sales DataFrame.
    df_universe : pandas.DataFrame
        Universe DataFrame.
    settings : dict
        The settings dictionary.
    dep_var : str
        Dependent variable for training.
    dep_var_test : str
        Dependent variable for testing.
    fields_cat : list[str]
        List of categorical fields.
    interactions : dict
        Dictionary of variable interactions.
    test_keys : list[str]
        Keys for test split.
    train_keys : list[str]
        Keys for training split.
    vacant_only : bool
        Whether to consider only vacant sales.
    hedonic : bool
        Whether to use hedonic pricing.
    hedonic_test_against_vacant_sales : bool, optional
        Whether to test hedonic models against vacant sales. Defaults to True.

    Returns
    -------
    DataSplit
        A DataSplit object.
    """
    if name == "local_sqft":
        _ind_vars = location_fields + ["bldg_area_finished_sqft", "land_area_sqft"]
    elif name == "local_somers":
        _ind_vars = location_fields + [
            "bldg_area_finished_sqft",
            "frontage_ft_1",
            "depth_ft_1",
        ]
    elif name == "assessor":
        _ind_vars = ["assr_land_value"] if hedonic else ["assr_market_value"]
    elif name == "ground_truth":
        _ind_vars = ["true_land_value"] if hedonic else ["true_market_value"]
    elif name == "spatial_lag":
        sale_field = get_sale_field(settings)
        field = f"spatial_lag_{sale_field}"
        if vacant_only or hedonic:
            field = f"{field}_vacant"
        _ind_vars = [field]
    elif name == "spatial_lag_sqft":
        sale_field = get_sale_field(settings)
        _ind_vars = [
            f"spatial_lag_{sale_field}_impr_sqft",
            f"spatial_lag_{sale_field}_land_sqft",
            "bldg_area_finished_sqft",
            "land_area_sqft",
        ]
    elif name == "catboost":
        df_sales = _clean_categoricals(df_sales, fields_cat, settings)
        df_universe = _clean_categoricals(df_universe, fields_cat, settings)
        _ind_vars = ind_vars
    else:
        _ind_vars = ind_vars
        if name == "gwr" or name == "kernel":
            exclude_vars = ["latitude", "longitude", "latitude_norm", "longitude_norm"]
            _ind_vars = [var for var in _ind_vars if var not in exclude_vars]

    return DataSplit(
        df_sales,
        df_universe,
        model_group,
        settings,
        dep_var,
        dep_var_test,
        _ind_vars,
        fields_cat,
        interactions,
        test_keys,
        train_keys,
        vacant_only=vacant_only,
        hedonic=hedonic,
        hedonic_test_against_vacant_sales=hedonic_test_against_vacant_sales,
    )

get_variable_recommendations

get_variable_recommendations(df_sales, df_universe, vacant_only, settings, model_group, variables_to_use=None, tests_to_run=None, do_report=False, verbose=False)

Determine which variables are most likely to be meaningful in a model.

This function examines sales and universe data, applies feature selection via correlations, elastic net regularization, R², p-values, t-values, and VIF, and produces a set of recommended variables along with a written report.

Parameters:

Name	Type	Description	Default
`df_sales`	`DataFrame`	The sales data.	required
`df_universe`	`DataFrame`	The parcel universe data.	required
`vacant_only`	`bool`	Whether to consider only vacant sales.	required
`settings`	`dict`	The settings dictionary.	required
`model_group`	`str`	The model group to consider.	required
`variables_to_use`	`list[str] or None`	A list of variables to use for feature selection. If None, variables are pulled from modeling section	`None`
`tests_to_run`	`list[str] or None`	A list of tests to run. If None, all tests are run. Legal values are "corr", "r2", "p_value", "t_value", "enr", and "vif"	`None`
`do_report`	`bool`	If True, generates a report of the variable selection process.	`False`
`verbose`	`bool`	If True, prints additional debugging information.	`False`

Returns:

Type	Description
`dict`	A dictionary with keys "variables" (the best variables list) and "report" (the generated report).

Source code in openavmkit/benchmark.py

def get_variable_recommendations(
    df_sales: pd.DataFrame,
    df_universe: pd.DataFrame,
    vacant_only: bool,
    settings: dict,
    model_group: str,
    variables_to_use: list[str] | None = None,
    tests_to_run: list[str] | None = None,
    do_report: bool = False,
    verbose: bool = False,
) -> dict:
    """Determine which variables are most likely to be meaningful in a model.

    This function examines sales and universe data, applies feature selection via
    correlations, elastic net regularization, R², p-values, t-values, and VIF, and
    produces a set of recommended variables along with a written report.

    Parameters
    ----------
    df_sales : pandas.DataFrame
        The sales data.
    df_universe : pandas.DataFrame
        The parcel universe data.
    vacant_only : bool
        Whether to consider only vacant sales.
    settings : dict
        The settings dictionary.
    model_group : str
        The model group to consider.
    variables_to_use : list[str] or None
        A list of variables to use for feature selection. If None, variables are pulled
        from modeling section
    tests_to_run : list[str] or None
        A list of tests to run. If None, all tests are run. Legal values are "corr",
        "r2", "p_value", "t_value", "enr", and "vif"
    do_report : bool
        If True, generates a report of the variable selection process.
    verbose : bool, optional
        If True, prints additional debugging information.

    Returns
    -------
    dict
        A dictionary with keys "variables" (the best variables list) and "report"
        (the generated report).
    """

    report = MarkdownReport("variables")

    if tests_to_run is None:
        tests_to_run = ["corr", "r2", "p_value", "t_value", "enr", "vif"]

    if "sale_price_time_adj" not in df_sales:
        warnings.warn("Time adjustment was not found in sales data. Calculating now...")
        df_sales = enrich_time_adjustment(df_sales, settings, verbose=verbose)

    s = settings
    s_model = s.get("modeling", {})
    vacant_status = "vacant" if vacant_only else "main"
    model_entries = s_model.get("models", {}).get(vacant_status, {})
    entry: dict | None = model_entries.get("model", model_entries.get("default", {}))
    if variables_to_use is None:
        variables_to_use: list | None = entry.get("ind_vars", None)

    cats = get_fields_categorical(settings, df_sales, include_boolean=False)
    flagged = []
    for variable in variables_to_use:
        if variable in cats:
            uniques = df_sales[variable].unique()
            if len(uniques) > 50:
                warnings.warn(
                    f"Variable '{variable}' has more than 50 unique values. No variable analysis will be done on it and it will not be auto-dropped. Hope you know what you're doing!"
                )
                flagged.append(variable)

    if len(flagged) > 0:
        variables_to_use = [
            variable for variable in variables_to_use if variable not in flagged
        ]

    # Check for duplicate variables in variables_to_use
    if variables_to_use is not None:
        seen_vars = set()
        duplicates = []
        deduped_vars = []

        for var in variables_to_use:
            if var in seen_vars:
                duplicates.append(var)
            else:
                seen_vars.add(var)
                deduped_vars.append(var)

        if duplicates:
            print(
                f"\n⚠️ WARNING: Found duplicate variables in variables_to_use: {duplicates}"
            )
            print(f"Using only the first occurrence of each variable for analysis.")
            variables_to_use = deduped_vars

    # Check for duplicate columns in DataFrame (could happen from merges)
    duplicate_cols = df_sales.columns[df_sales.columns.duplicated()].tolist()
    if duplicate_cols:
        print(
            f"\n⚠️ WARNING: Found duplicate columns in sales DataFrame: {duplicate_cols}"
        )
        print(
            f"This could cause errors in analysis. Keeping only first occurrence of each column."
        )
        df_sales = df_sales.loc[:, ~df_sales.columns.duplicated()]

    duplicate_cols_univ = df_universe.columns[df_universe.columns.duplicated()].tolist()
    if duplicate_cols_univ:
        print(
            f"\n⚠️ WARNING: Found duplicate columns in universe DataFrame: {duplicate_cols_univ}"
        )
        print(
            f"This could cause errors in analysis. Keeping only first occurrence of each column."
        )
        df_universe = df_universe.loc[:, ~df_universe.columns.duplicated()]

    ds = _prepare_ds(
        df_sales, df_universe, model_group, vacant_only, settings, variables_to_use
    )
    ds = ds.encode_categoricals_with_one_hot()

    ds.split()

    feature_selection = (
        settings.get("modeling", {})
        .get("instructions", {})
        .get("feature_selection", {})
    )
    thresh = feature_selection.get("thresholds", {})

    X_sales = ds.X_sales[ds.ind_vars]
    y_sales = ds.y_sales

    if "corr" in tests_to_run:
        # Correlation
        X_corr = ds.df_sales[[ds.dep_var] + ds.ind_vars]
        corr_results = calc_correlations(X_corr, thresh.get("correlation", 0.1))
    else:
        corr_results = None

    if "enr" in tests_to_run:
        # Elastic net regularization
        try:
            enr_coefs = calc_elastic_net_regularization(
                X_sales, y_sales, thresh.get("enr", 0.01)
            )
        except ValueError as e:
            nulls_in_X = X_sales[X_sales.isna().any(axis=1)]
            print(f"Found {len(nulls_in_X)} rows with nulls in X:")
            # identify columns with nulls in them:
            cols_with_null = nulls_in_X.columns[nulls_in_X.isna().any()].tolist()
            print(f"Columns with nulls: {cols_with_null}")
            raise e
    else:
        enr_coefs = None

    if "r2" in tests_to_run:
        # R² values
        r2_values = calc_r2(ds.df_sales, ds.ind_vars, y_sales)
    else:
        r2_values = None

    if "p_value" in tests_to_run:
        # P Values
        p_values = calc_p_values_recursive_drop(
            X_sales, y_sales, thresh.get("p_value", 0.05)
        )
    else:
        p_values = None

    if "t_value" in tests_to_run:
        # T Values
        t_values = calc_t_values_recursive_drop(
            X_sales, y_sales, thresh.get("t_value", 2)
        )
    else:
        t_values = None

    if "vif" in tests_to_run:
        # VIF
        # Filter out boolean columns before VIF calculation
        bool_cols = []
        vif_X = X_sales.copy()

        for col in X_sales.columns:
            # Check if column is boolean or contains only 0/1 values
            if X_sales[col].dtype == bool or (
                X_sales[col].isin([0, 1, True, False]).all()
                and len(X_sales[col].unique()) <= 2
            ):
                bool_cols.append(col)

        if bool_cols:
            vif_X = vif_X.drop(columns=bool_cols)

        # Don't run VIF if we have no columns left or too few rows
        if 0 < vif_X.shape[1] < len(vif_X):
            vif = calc_vif_recursive_drop(vif_X, thresh.get("vif", 10), settings)

            # Add boolean columns back to the final VIF results with NaN VIF values
            if bool_cols and vif is not None and "final" in vif:
                for bool_col in bool_cols:
                    vif["final"] = pd.concat(
                        [
                            vif["final"],
                            pd.DataFrame(
                                {"variable": [bool_col], "vif": [float("nan")]}
                            ),
                        ],
                        ignore_index=True,
                    )
        else:
            if verbose:
                print(
                    "Skipping VIF calculation - not enough non-boolean variables or samples"
                )
            vif = {
                "initial": pd.DataFrame(columns=["variable", "vif"]),
                "final": pd.DataFrame(columns=["variable", "vif"]),
            }
    else:
        vif = None

    # Generate final results & recommendations
    df_results = _calc_variable_recommendations(
        ds=ds,
        settings=settings,
        correlation_results=corr_results,
        enr_results=enr_coefs,
        r2_values_results=r2_values,
        p_values_results=p_values,
        t_values_results=t_values,
        vif_results=vif,
        report=report,
    )

    curr_variables = df_results["variable"].tolist()
    best_variables = curr_variables.copy()
    best_score = float("inf")

    df_cross = df_results.copy()
    y = ds.y_sales
    while len(curr_variables) > 0:
        X = ds.df_sales[curr_variables]
        cv_score = calc_cross_validation_score(X, y)
        if cv_score < best_score:
            best_score = cv_score
            best_variables = curr_variables.copy()
        worst_idx = df_cross["weighted_score"].idxmin()
        worst_variable = df_cross.loc[worst_idx, "variable"]
        curr_variables.remove(worst_variable)
        # Remove the variable from the results dataframe.
        df_cross = df_cross[df_cross["variable"].ne(worst_variable)]

    # Create a table from the list of best variables.
    df_best = pd.DataFrame(best_variables, columns=["Variable"])
    df_best["Rank"] = range(1, len(df_best) + 1)
    df_best["Description"] = df_best["Variable"]
    df_best = _apply_dd_to_df_rows(
        df_best, "Variable", settings, ds.one_hot_descendants, "name"
    )
    df_best = _apply_dd_to_df_rows(
        df_best, "Description", settings, ds.one_hot_descendants, "description"
    )
    df_best = df_best[["Rank", "Variable", "Description"]]
    df_best.loc[df_best["Variable"].eq(df_best["Description"]), "Description"] = ""
    df_best.set_index("Rank", inplace=True)

    if do_report:
        report.set_var("summary_table", df_best.to_markdown())
        report = generate_variable_report(report, settings, model_group, best_variables)
    else:
        report = None

    return {"variables": best_variables, "report": report, "df_results": df_results}

run_ensemble

run_ensemble(df_sales, df_universe, model_group, vacant_only, dep_var, dep_var_test, outpath, all_results, settings, verbose=False, hedonic=False)

Run an ensemble model based on the provided parameters.

This function optimizes the ensemble model and runs it, returning the results and the list of models used in the ensemble.

Parameters:

Name	Type	Description	Default
`df_sales`	`DataFrame or None`	Sales DataFrame. If None, it will be read from the MultiModelResults.	required
`df_universe`	`DataFrame or None`	Universe DataFrame. If None, it will be read from the MultiModelResults.	required
`model_group`	`str`	Model group identifier.	required
`vacant_only`	`bool`	Whether to use only vacant sales.	required
`dep_var`	`str`	Dependent variable for training.	required
`dep_var_test`	`str`	Dependent variable for testing.	required
`outpath`	`str`	Output path for saving results.	required
`all_results`	`MultiModelResults`	MultiModelResults containing all model results.	required
`settings`	`dict`	Settings dictionary.	required
`verbose`	`bool`	If True, prints additional information. Defaults to False.	`False`
`hedonic`	`bool`	Whether to use hedonic pricing. Defaults to False.	`False`

Returns:

Type	Description
`tuple[SingleModelResults, list[str]]`	A tuple containing the SingleModelResults of the ensemble model and a list of models used in the ensemble.

Source code in openavmkit/benchmark.py

def run_ensemble(
    df_sales: pd.DataFrame | None,
    df_universe: pd.DataFrame | None,
    model_group: str,
    vacant_only: bool,
    dep_var: str,
    dep_var_test: str,
    outpath: str,
    all_results: MultiModelResults,
    settings: dict,
    verbose: bool = False,
    hedonic: bool = False,
) -> tuple[SingleModelResults, list[str]]:
    """Run an ensemble model based on the provided parameters.

    This function optimizes the ensemble model and runs it, returning the results and the list of models used in the ensemble.

    Parameters
    ----------
    df_sales : pandas.DataFrame or None
        Sales DataFrame. If None, it will be read from the MultiModelResults.
    df_universe : pandas.DataFrame or None
        Universe DataFrame. If None, it will be read from the MultiModelResults.
    model_group : str
        Model group identifier.
    vacant_only : bool
        Whether to use only vacant sales.
    dep_var : str
        Dependent variable for training.
    dep_var_test : str
        Dependent variable for testing.
    outpath : str
        Output path for saving results.
    all_results : MultiModelResults
        MultiModelResults containing all model results.
    settings : dict
        Settings dictionary.
    verbose : bool, optional
        If True, prints additional information. Defaults to False.
    hedonic : bool, optional
        Whether to use hedonic pricing. Defaults to False.

    Returns
    -------
    tuple[SingleModelResults, list[str]]
        A tuple containing the SingleModelResults of the ensemble model and a list of models used in the ensemble.
    """
    ensemble_list = _optimize_ensemble(
        df_sales,
        df_universe,
        model_group,
        vacant_only,
        dep_var,
        dep_var_test,
        all_results,
        settings,
        verbose=verbose,
        hedonic=hedonic,
        ensemble_list=None,
    )
    ensemble = _run_ensemble(
        df_sales,
        df_universe,
        model_group,
        vacant_only=vacant_only,
        hedonic=hedonic,
        dep_var=dep_var,
        dep_var_test=dep_var_test,
        outpath=outpath,
        ensemble_list=ensemble_list,
        all_results=all_results,
        settings=settings,
        verbose=verbose,
    )
    return ensemble, ensemble_list

run_models

run_models(sup, settings, save_params=False, use_saved_params=True, save_results=False, verbose=False, run_main=True, run_vacant=True, run_hedonic=True, run_ensemble=True, do_shaps=False, do_plots=False)

Runs predictive models on the given SalesUniversePair.

This function takes detailed instructions from the provided settings dictionary and handles all the internal details like splitting the data, training the models, and saving the results. It performs basic statistic analysis on each model, and optionally combines results into an ensemble model.

If "run_main" is true, it will run normal models as well as hedonic models (if the user so specifies), "hedonic" in this context meaning models that attempt to generate a land value and an improvement value separately. If "run_vacant" is true, it will run vacant models as well -- models that only use vacant models as evidence to generate land values.

This function iterates over model groups and runs models for both main and vacant cases.

Parameters:

Name	Type	Description	Default
`sup`	`SalesUniversePair`	Sales and universe data.	required
`settings`	`dict`	The settings dictionary.	required
`save_params`	`bool`	Whether to save model parameters.	`False`
`use_saved_params`	`bool`	Whether to use saved model parameters.	`True`
`save_results`	`bool`	Whether to save model results.	`False`
`verbose`	`bool`	If True, prints additional information.	`False`
`run_main`	`bool`	Whether to run main (non-vacant) models.	`True`
`run_vacant`	`bool`	Whether to run vacant models.	`True`
`run_hedonic`	`bool`	Whether to run hedonic models.	`True`
`run_ensemble`	`bool`	Whether to run ensemble models.	`True`
`do_shaps`	`bool`	Whether to compute SHAP values.	`False`
`do_plots`	`bool`	Whether to plot scatterplots	`False`

Returns:

Type	Description
`MultiModelResults`	The MultiModelResults containing all model results and benchmarks.

Source code in openavmkit/benchmark.py

def run_models(
    sup: SalesUniversePair,
    settings: dict,
    save_params: bool = False,
    use_saved_params: bool = True,
    save_results: bool = False,
    verbose: bool = False,
    run_main: bool = True,
    run_vacant: bool = True,
    run_hedonic: bool = True,
    run_ensemble: bool = True,
    do_shaps: bool = False,
    do_plots: bool = False
):
    """
    Runs predictive models on the given SalesUniversePair.

    This function takes detailed instructions from the provided settings dictionary and handles all the internal
    details like splitting the data, training the models, and saving the results. It performs basic statistic analysis
    on each model, and optionally combines results into an ensemble model.

    If "run_main" is true, it will run normal models as well as hedonic models (if the user so specifies),
    "hedonic" in this context meaning models that attempt to generate a land value and an improvement value separately.
    If "run_vacant" is true, it will run vacant models as well -- models that only use vacant models as evidence
    to generate land values.

    This function iterates over model groups and runs models for both main and vacant cases.

    Parameters
    ----------
    sup : SalesUniversePair
        Sales and universe data.
    settings : dict
        The settings dictionary.
    save_params : bool, optional
        Whether to save model parameters.
    use_saved_params : bool, optional
        Whether to use saved model parameters.
    save_results : bool, optional
        Whether to save model results.
    verbose : bool, optional
        If True, prints additional information.
    run_main : bool, optional
        Whether to run main (non-vacant) models.
    run_vacant : bool, optional
        Whether to run vacant models.
    run_hedonic : bool, optional
        Whether to run hedonic models.
    run_ensemble : bool, optional
        Whether to run ensemble models.
    do_shaps : bool, optional
        Whether to compute SHAP values.
    do_plots : bool, optional
        Whether to plot scatterplots

    Returns
    -------
    MultiModelResults
        The MultiModelResults containing all model results and benchmarks.
    """

    t = TimingData()

    t.start("setup")
    s = settings
    s_model = s.get("modeling", {})
    s_inst = s_model.get("instructions", {})
    model_groups = s_inst.get("model_groups", [])

    df_univ = sup["universe"]

    if len(model_groups) == 0:
        model_groups = get_model_group_ids(settings, df_univ)

    dict_all_results = {}
    t.stop("setup")

    t.start("run model groups")
    for model_group in model_groups:
        t.start(f"model group: {model_group}")
        for main_vacant_hedonic in ["main", "vacant", "hedonic"]:
            if main_vacant_hedonic == "main" and not run_main:
                continue
            if main_vacant_hedonic == "vacant" and not run_vacant:
                continue
            if main_vacant_hedonic == "hedonic" and not run_hedonic:
                continue

            models_to_skip = s_inst.get(main_vacant_hedonic, {}).get("skip", {}).get(model_group, [])

            if "all" in models_to_skip:
                if verbose:
                    print(
                        f"Skipping all models for model_group: {model_group}/{main_vacant_hedonic}"
                    )
                continue

            if verbose:
                print("")
                print("")
                print("******************************************************")
                print(f"Running models for model_group: {model_group}")
                print("******************************************************")
                print("")
                print("")

            mg_results = _run_models(
                sup,
                model_group,
                settings,
                main_vacant_hedonic,
                save_params,
                use_saved_params,
                save_results,
                verbose,
                run_ensemble,
                do_shaps=do_shaps,
                do_plots=do_plots
            )
            if mg_results is not None:
                dict_all_results[model_group] = mg_results
        t.stop(f"model group: {model_group}")
    t.stop("run model groups")

    if save_results:
        t.start("write")
        write_out_all_results(sup, dict_all_results)
        t.stop("write")

    print("**********TIMING FOR RUN ALL MODELS***********")
    print(t.print())
    print("***********************************************")

    return dict_all_results

run_one_hedonic_model

run_one_hedonic_model(df_sales, df_univ, settings, model, smr, model_group, dep_var, dep_var_test, fields_cat, outpath, hedonic_test_against_vacant_sales=True, save_results=False, verbose=False)

Run a single hedonic model based on provided parameters and return its results.

This function is similar to run_one_model but specifically tailored for hedonic models.

Parameters:

Name	Type	Description	Default
`df_sales`	`DataFrame`	Sales DataFrame.	required
`df_univ`	`DataFrame`	Universe DataFrame.	required
`settings`	`dict`	Settings dictionary.	required
`model`	`str`	Model name.	required
`smr`	`SingleModelResults`	SingleModelResults object containing initial model results.	required
`model_group`	`str`	Model group identifier.	required
`dep_var`	`str`	Dependent variable for training.	required
`dep_var_test`	`str`	Dependent variable for testing.	required
`fields_cat`	`list[str]`	List of categorical fields.	required
`outpath`	`str`	Output path for saving results.	required
`hedonic_test_against_vacant_sales`	`bool`	Whether to test hedonic models against vacant sales. Defaults to True.	`True`
`save_results`	`bool`	Whether to save results. Defaults to False.	`False`
`verbose`	`bool`	If True, prints additional information. Defaults to False.	`False`

Returns:

Type	Description
`SingleModelResults or None`	SingleModelResults if successful, else None.

Source code in openavmkit/benchmark.py

def run_one_hedonic_model(
    df_sales: pd.DataFrame,
    df_univ: pd.DataFrame,
    settings: dict,
    model: str,
    smr: SingleModelResults,
    model_group: str,
    dep_var: str,
    dep_var_test: str,
    fields_cat: list[str],
    outpath: str,
    hedonic_test_against_vacant_sales: bool = True,
    save_results: bool = False,
    verbose: bool = False,
):
    """Run a single hedonic model based on provided parameters and return its results.

    This function is similar to run_one_model but specifically tailored for hedonic models.

    Parameters
    ----------
    df_sales : pandas.DataFrame
        Sales DataFrame.
    df_univ : pandas.DataFrame
        Universe DataFrame.
    settings : dict
        Settings dictionary.
    model : str
        Model name.
    smr : SingleModelResults
        SingleModelResults object containing initial model results.
    model_group : str
        Model group identifier.
    dep_var : str
        Dependent variable for training.
    dep_var_test : str
        Dependent variable for testing.
    fields_cat : list[str]
        List of categorical fields.
    outpath : str
        Output path for saving results.
    hedonic_test_against_vacant_sales : bool, optional
        Whether to test hedonic models against vacant sales. Defaults to True.
    save_results : bool, optional
        Whether to save results. Defaults to False.
    verbose : bool, optional
        If True, prints additional information. Defaults to False.

    Returns
    -------
    SingleModelResults or None
        SingleModelResults if successful, else None.
    """
    location_field_neighborhood = get_important_field(
        settings, "loc_neighborhood", df_sales
    )
    location_field_market_area = get_important_field(
        settings, "loc_market_area", df_sales
    )
    location_fields = [location_field_neighborhood, location_field_market_area]

    ds = get_data_split_for(
        name=model,
        model_group=model_group,
        location_fields=location_fields,
        ind_vars=smr.ind_vars,
        df_sales=df_sales,
        df_universe=df_univ,
        settings=settings,
        dep_var=dep_var,
        dep_var_test=dep_var_test,
        fields_cat=fields_cat,
        interactions=smr.ds.interactions.copy(),
        test_keys=smr.ds.test_keys,
        train_keys=smr.ds.train_keys,
        vacant_only=False,
        hedonic=True,
        hedonic_test_against_vacant_sales=hedonic_test_against_vacant_sales,
    )
    # We call this here because we are re-running prediction without first calling run(), which would call this
    ds.split()
    if hedonic_test_against_vacant_sales and len(ds.y_sales) < 15:
        print(f"Skipping hedonic model because there are not enough sale records...")
        return None
    smr.ds = ds
    results = _predict_one_model(
        smr=smr,
        model=model,
        outpath=outpath,
        settings=settings,
        save_results=save_results,
        verbose=verbose,
    )
    return results

run_one_model

run_one_model(df_sales, df_universe, vacant_only, model_group, model, model_entries, settings, dep_var, dep_var_test, best_variables, fields_cat, outpath, save_params, use_saved_params, save_results, verbose=False, hedonic=False, test_keys=None, train_keys=None)

Run a single model based on provided parameters and return its results.

Parameters:

Name	Type	Description	Default
`df_sales`	`DataFrame`	Sales DataFrame.	required
`df_universe`	`DataFrame`	Universe DataFrame.	required
`vacant_only`	`bool`	Whether to use only vacant sales.	required
`model_group`	`str`	Model group identifier.	required
`model`	`str`	Model name.	required
`model_entries`	`dict`	Dictionary of model configuration entries.	required
`settings`	`dict`	Settings dictionary.	required
`dep_var`	`str`	Dependent variable for training.	required
`dep_var_test`	`str`	Dependent variable for testing.	required
`best_variables`	`list[str]`	List of best variables selected.	required
`fields_cat`	`list[str]`	List of categorical fields.	required
`outpath`	`str`	Output path for saving results.	required
`save_params`	`bool`	Whether to save parameters.	required
`use_saved_params`	`bool`	Whether to use saved parameters.	required
`save_results`	`bool`	Whether to save results.	required
`verbose`	`bool`	If True, prints additional information.	`False`
`hedonic`	`bool`	Whether to use hedonic pricing.	`False`
`test_keys`	`list[str] or None`	Optional list of test keys (will be read from disk if not provided).	`None`
`train_keys`	`list[str] or None`	Optional list of training keys (will be read from disk if not provided).	`None`

Returns:

Type	Description
`SingleModelResults or None`	SingleModelResults if successful, else None.

Source code in openavmkit/benchmark.py

def run_one_model(
    df_sales: pd.DataFrame,
    df_universe: pd.DataFrame,
    vacant_only: bool,
    model_group: str,
    model: str,
    model_entries: dict,
    settings: dict,
    dep_var: str,
    dep_var_test: str,
    best_variables: list[str],
    fields_cat: list[str],
    outpath: str,
    save_params: bool,
    use_saved_params: bool,
    save_results: bool,
    verbose: bool = False,
    hedonic: bool = False,
    test_keys: list[str] | None = None,
    train_keys: list[str] | None = None,
) -> SingleModelResults | None:
    """
    Run a single model based on provided parameters and return its results.

    Parameters
    ----------
    df_sales : pandas.DataFrame
        Sales DataFrame.
    df_universe : pandas.DataFrame
        Universe DataFrame.
    vacant_only : bool
        Whether to use only vacant sales.
    model_group : str
        Model group identifier.
    model : str
        Model name.
    model_entries : dict
        Dictionary of model configuration entries.
    settings : dict
        Settings dictionary.
    dep_var : str
        Dependent variable for training.
    dep_var_test : str
        Dependent variable for testing.
    best_variables : list[str]
        List of best variables selected.
    fields_cat : list[str]
        List of categorical fields.
    outpath : str
        Output path for saving results.
    save_params : bool
        Whether to save parameters.
    use_saved_params : bool
        Whether to use saved parameters.
    save_results : bool
        Whether to save results.
    verbose : bool, optional
        If True, prints additional information.
    hedonic : bool, optional
        Whether to use hedonic pricing.
    test_keys : list[str] or None, optional
        Optional list of test keys (will be read from disk if not provided).
    train_keys : list[str] or None, optional
        Optional list of training keys (will be read from disk if not provided).

    Returns
    -------
    SingleModelResults or None
        SingleModelResults if successful, else None.
    """

    t = TimingData()

    t.start("setup")
    model_name = model

    entry: dict | None = model_entries.get(model, None)
    default_entry: dict | None = model_entries.get("default", {})
    if entry is None:
        entry = default_entry
        if entry is None:
            raise ValueError(
                f"Model entry for {model} not found, and there is no default entry!"
            )

    if "*" in model:
        sales_chase = 0.01
        model_name = model.replace("*", "")
    else:
        sales_chase = False

    if verbose:
        print(f"------------------------------------------------")
        print(f"Running model {model} on {len(df_sales)} rows...")

    are_ind_vars_default = entry.get("ind_vars", None) is None
    ind_vars: list | None = entry.get("ind_vars", default_entry.get("ind_vars", None))
    # no duplicates!
    ind_vars = list(set(ind_vars))
    if ind_vars is None:
        raise ValueError(f"ind_vars not found for model {model}")

    if are_ind_vars_default:
        if (best_variables is not None) and (set(ind_vars) != set(best_variables)):
            if verbose:
                print(
                    f"--> using default variables, auto-optimized variable list: {best_variables}"
                )
            ind_vars = best_variables

    interactions = get_variable_interactions(entry, settings, df_sales)
    location_fields = get_locations(settings, df_sales)

    if test_keys is None or train_keys is None:
        test_keys, train_keys = _read_split_keys(model_group)
    t.stop("setup")

    t.start("data split")
    ds = get_data_split_for(
        name=model_name,
        model_group=model_group,
        location_fields=location_fields,
        ind_vars=ind_vars,
        df_sales=df_sales,
        df_universe=df_universe,
        settings=settings,
        dep_var=dep_var,
        dep_var_test=dep_var_test,
        fields_cat=fields_cat,
        interactions=interactions,
        test_keys=test_keys,
        train_keys=train_keys,
        vacant_only=vacant_only,
        hedonic=hedonic,
        hedonic_test_against_vacant_sales=True,
    )
    t.stop("data split")

    t.start("setup")
    if len(ds.y_sales) < 15:
        if verbose:
            print(f"--> model {model} has less than 15 sales. Skipping...")
        return None

    intercept = entry.get("intercept", True)
    n_trials = entry.get("n_trials", 50)
    print(f"n_trials = {n_trials} for model: {model_name}")
    t.stop("setup")

    t.start("run")
    if model_name == "garbage":
        results = run_garbage(
            ds, normal=False, sales_chase=sales_chase, verbose=verbose
        )
    elif model_name == "garbage_normal":
        results = run_garbage(ds, normal=True, sales_chase=sales_chase, verbose=verbose)
    elif model_name == "mean":
        results = run_average(
            ds, average_type="mean", sales_chase=sales_chase, verbose=verbose
        )
    elif model_name == "median":
        results = run_average(
            ds, average_type="median", sales_chase=sales_chase, verbose=verbose
        )
    elif model_name == "naive_sqft":
        results = run_naive_sqft(ds, sales_chase=sales_chase, verbose=verbose)
    elif model_name == "local_sqft":
        results = run_local_sqft(
            ds,
            location_fields=location_fields,
            sales_chase=sales_chase,
            verbose=verbose,
        )
    elif model_name == "local_somers":
        results = run_local_somers(
            ds,
            location_fields=location_fields,
            sales_chase=sales_chase,
            verbose=verbose,
        )
    elif model_name == "assessor":
        results = run_pass_through(ds, verbose=verbose)
    elif model_name == "ground_truth":
        results = run_ground_truth(ds, verbose=verbose)
    elif model_name == "spatial_lag":
        results = run_spatial_lag(ds, per_sqft=False, verbose=verbose)
    elif model_name == "spatial_lag_sqft":
        results = run_spatial_lag(ds, per_sqft=True, verbose=verbose)
    elif model_name == "mra":
        results = run_mra(ds, intercept=intercept, verbose=verbose)
    elif model_name == "kernel":
        results = run_kernel(
            ds, outpath, save_params, use_saved_params, verbose=verbose
        )
    elif model_name == "gwr":
        results = run_gwr(ds, outpath, save_params, use_saved_params, verbose=verbose)
    elif model_name == "xgboost":
        results = run_xgboost(
            ds, outpath, save_params, use_saved_params, n_trials=n_trials, verbose=verbose
        )
    elif model_name == "lightgbm":
        results = run_lightgbm(
            ds, outpath, save_params, use_saved_params, n_trials=n_trials, verbose=verbose
        )
    elif model_name == "catboost":
        results = run_catboost(
            ds, outpath, save_params, use_saved_params, n_trials=n_trials, verbose=verbose
        )
    else:
        raise ValueError(f"Model {model_name} not found!")
    t.stop("run")

    if ds.vacant_only or ds.hedonic:
        # If this is a vacant or hedonic model, we attempt to load a corresponding "full value" model
        max_trim = _get_max_ratio_study_trim(settings, results.ds.model_group)
        results = _clamp_land_predictions(results, results.ds.model_group, model_name, outpath, max_trim)

    if save_results:
        t.start("write")
        _write_model_results(results, outpath, settings)
        t.stop("write")

    return results

try_variables

try_variables(sup, settings, verbose=False, plot=False, do_report=False)

Experiment with variables to determine which are most useful for modeling.

Parameters:

Name	Type	Description	Default
`sup`	`SalesUniversePair`	The SalesUniversePair containing sales and universe data.	required
`settings`	`dict`	Settings dictionary	required
`verbose`	`bool`	Whether to print verbose output. Default is False.	`False`
`plot`	`bool`	Whether to generate plots. Default is False.	`False`
`do_report`	`bool`	Whether to generate a pdf report. Default is False.	`False`

Source code in openavmkit/benchmark.py

def try_variables(
    sup: SalesUniversePair,
    settings: dict,
    verbose: bool = False,
    plot: bool = False,
    do_report: bool = False,
):
    """Experiment with variables to determine which are most useful for modeling.

    Parameters
    ----------
    sup: SalesUniversePair
        The SalesUniversePair containing sales and universe data.
    settings: dict
        Settings dictionary
    verbose: bool
        Whether to print verbose output. Default is False.
    plot: bool
        Whether to generate plots. Default is False.
    do_report: bool
        Whether to generate a pdf report. Default is False.

    """

    df_hydrated = get_hydrated_sales_from_sup(sup)

    idx_vacant = df_hydrated["vacant_sale"].eq(True)

    df_vacant = df_hydrated[idx_vacant].copy()

    df_vacant = _simulate_removed_buildings(df_vacant, settings, idx_vacant)

    # update df_hydrated with *all* the characteristics of df_vacant where their keys match:
    df_hydrated.loc[idx_vacant, df_vacant.columns] = df_vacant.values

    all_best_variables = {}

    try_vars = settings.get("modeling", {}).get("try_variables", {})
    model_groups_to_skip = try_vars.get("skip", [])

    def _try_variables(
        df_in: pd.DataFrame,
        model_group: str,
        df_univ: pd.DataFrame,
        do_report: bool,
        settings: dict,
        verbose: bool,
        results: dict,
    ):
        bests = {}

        for vacant_only in [False, True]:

            if vacant_only:
                if df_in["vacant_sale"].sum() == 0:
                    if verbose:
                        print("No vacant sales found, skipping...")
                    continue
            else:
                if df_in["valid_sale"].sum() == 0:
                    if verbose:
                        print("No valid sales found, skipping...")
                    continue

            try_vars = settings.get("modeling", {}).get("try_variables", {})
            variables_to_use = (
                try_vars.get("variables", [])
            )

            if len(variables_to_use) == 0:
                raise ValueError(
                    "No variables defined. Please check settings `modeling.try_variables.variables`"
                )

            df_univ = df_univ[df_univ["model_group"].eq(model_group)].copy()

            var_recs = get_variable_recommendations(
                df_in,
                df_univ,
                vacant_only,
                settings,
                model_group,
                variables_to_use=variables_to_use,
                tests_to_run=["corr", "r2"],
                do_report=True,
                verbose=verbose,
            )

            best_variables = var_recs["variables"]
            df_results = var_recs["df_results"]

            if vacant_only:
                bests["vacant_only"] = df_results
            else:
                bests["main"] = df_results

        results[model_group] = bests

    do_per_model_group(
        df_hydrated,
        settings,
        _try_variables,
        params={
            "settings": settings,
            "df_univ": sup.universe,
            "do_report": do_report,
            "verbose": verbose,
            "results": all_best_variables,
        },
        key="key_sale",
        skip=model_groups_to_skip
    )

    sale_field = get_sale_field(settings)

    print("")
    print("********** BEST VARIABLES ***********")
    for model_group in all_best_variables:
        entry = all_best_variables[model_group]
        for vacant_status in entry:
            print("")
            print(f"model group: {model_group} / {vacant_status}")
            results = entry[vacant_status]
            pd.set_option("display.max_rows", None)
            results = results[~results["corr_strength"].isna()]

            styled = results.style.format(
                {
                    "corr_strength": "{:,.2f}",
                    "corr_clarity": "{:,.2f}",
                    "corr_score": "{:,.2f}",
                    "r2": "{:,.2f}",
                    "adj_r2": "{:,.2f}",
                    "coef_sign": "{:,.0f}"
                }
            )

            display(styled)
            file_out = f"out/try/{model_group}/{vacant_status}.csv"
            if not os.path.exists(os.path.dirname(file_out)):
                os.makedirs(os.path.dirname(file_out))
            results.to_csv(file_out, index=False)
            pd.set_option("display.max_rows", 15)

            for var in results["variable"].unique():
                if var in df_hydrated.columns:
                    # do a correlation scatter plot of the variable vs. the dependent variable (sale_field):
                    df_sub = df_hydrated[
                        df_hydrated["model_group"].eq(model_group)
                        & df_hydrated[var].notna()
                        & df_hydrated[sale_field].notna()
                    ]

                    for status in ["vacant", "improved"]:
                        # clear any previous plots with plt:
                        plt.clf()

                        if status == "vacant":
                            df_sub2 = df_sub[df_sub["vacant_sale"].eq(True)]
                        else:
                            df_sub2 = df_sub[df_sub["vacant_sale"].eq(False)]

                        if len(df_sub2) > 0 and plot:
                            # do a scatter plot of the variable vs. the dependent variable (sale_field):
                            df_sub2.plot.scatter(x=var, y=sale_field)
                            # labels
                            plt.xlabel(var)
                            plt.ylabel(sale_field)
                            plt.title(f"'{var}' vs '{sale_field}' ({status} only)")
                            plt.show()

write_out_all_results

write_out_all_results(sup, all_results)

Write out all model results to CSV and Parquet files.

This function collects predictions from all model groups and writes them to a single DataFrame, which is then saved to both CSV and Parquet formats. It also merges the predictions with the universe DataFrame to include all keys.

Parameters:

Name	Type	Description	Default
`sup`	`SalesUniversePair`	The SalesUniversePair containing sales and universe data.	required
`all_results`	`dict`	A dictionary where keys are model group identifiers and values are MultiModelResults containing the results for each model group.	required

Source code in openavmkit/benchmark.py

def write_out_all_results(sup: SalesUniversePair, all_results: dict):
    """Write out all model results to CSV and Parquet files.

    This function collects predictions from all model groups and writes them to a single
    DataFrame, which is then saved to both CSV and Parquet formats. It also merges the
    predictions with the universe DataFrame to include all keys.

    Parameters
    ----------
    sup : SalesUniversePair
        The SalesUniversePair containing sales and universe data.
    all_results : dict
        A dictionary where keys are model group identifiers and values are MultiModelResults
        containing the results for each model group.
    """
    t = TimingData()
    df_all = None

    for model_group in all_results:
        t.start(f"model group: {model_group}")
        t.start("read")
        mm_results: MultiModelResults = all_results[model_group]

        # Skip if no results for this model group
        if mm_results is None:
            t.stop("read")
            t.stop(f"model group: {model_group}")
            continue

        # Collect all ensemble types to output
        output_models = []
        if "ensemble" in mm_results.model_results:
            output_models.append("ensemble")
        if "stacked_ensemble" in mm_results.model_results:
            output_models.append("stacked_ensemble")
        if not output_models:
            t.stop("read")
            t.stop(f"model group: {model_group}")
            continue

        # For each output model, extract predictions and add to df_univ_local
        df_univ_local = None
        for model_type in output_models:
            smr = mm_results.model_results[model_type]
            col_name = (
                f"market_value_{model_type}"
                if model_type != "ensemble"
                else "market_value"
            )
            df_pred = smr.df_universe[["key", smr.field_prediction]].rename(
                columns={smr.field_prediction: col_name}
            )
            if df_univ_local is None:
                df_univ_local = df_pred
            else:
                df_univ_local = df_univ_local.merge(df_pred, on="key", how="outer")
        df_univ_local["model_group"] = model_group

        if df_all is None:
            df_all = df_univ_local
        else:
            t.start("concat")
            df_all = pd.concat([df_all, df_univ_local])
            t.stop("concat")

        t.stop(f"model group: {model_group}")

    # Only proceed with writing if we have results
    if df_all is not None:
        t.start("copy")
        df_univ = sup.universe.copy()
        t.stop("copy")
        t.start("merge")
        df_univ = df_univ.merge(df_all, on="key", how="left")
        t.stop("merge")

        outpath = "out/models/all_model_groups"
        if not os.path.exists(outpath):
            os.makedirs(outpath)

        t.start("csv")
        df_univ.to_csv(f"{outpath}/universe.csv", index=False)
        t.stop("csv")
        t.start("parquet")
        df_univ.to_parquet(f"{outpath}/universe.parquet", engine="pyarrow")
        t.stop("parquet")