Skip to content

openavmkit.calculations

perform_calculations

perform_calculations(df_in, calc, rename_map=None)

Perform calculations on a DataFrame based on a dictionary of calculation instructions.

Parameters:

Name Type Description Default
df_in DataFrame

Input DataFrame.

required
calc dict

Dictionary of calculation instructions.

required
rename_map dict

Optional mapping of original to renamed columns.

None

Returns:

Type Description
DataFrame

DataFrame with calculations applied.

Source code in openavmkit/calculations.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def perform_calculations(df_in: pd.DataFrame, calc: dict, rename_map: dict = None):
    """
    Perform calculations on a DataFrame based on a dictionary of calculation instructions.

    Parameters
    ----------
    df_in : pandas.DataFrame
        Input DataFrame.
    calc : dict
        Dictionary of calculation instructions.
    rename_map : dict, optional
        Optional mapping of original to renamed columns.

    Returns
    -------
    pandas.DataFrame
        DataFrame with calculations applied.
    """
    df = df_in.copy()

    for new_field in calc:
        entry = calc[new_field]
        new_value = _do_calc(df, entry, rename_map=rename_map)
        df[new_field] = new_value

        # Keep only essential debug output for valid_sale
        if new_field == "valid_sale":
            valid_count = df[new_field].sum()
            print(f"Valid sales: {valid_count} out of {len(df)} total")

    # remove temporary columns
    for col in df.columns:
        if col.startswith("__temp_"):
            del df[col]

    return df

perform_tweaks

perform_tweaks(df_in, tweak, rename_map=None)

Perform tweaks on a DataFrame based on a list of tweak instructions.

Will try both original and renamed column names if rename_map is provided.

Parameters:

Name Type Description Default
df_in DataFrame

Input DataFrame.

required
tweak list

List of tweak instructions.

required
rename_map dict

Optional mapping of original to renamed columns.

None

Returns:

Type Description
DataFrame

DataFrame with tweaks applied.

Source code in openavmkit/calculations.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def perform_tweaks(df_in: pd.DataFrame, tweak: list, rename_map: dict = None):
    """
    Perform tweaks on a DataFrame based on a list of tweak instructions.

    Will try both original and renamed column names if rename_map is provided.

    Parameters
    ----------
    df_in : pandas.DataFrame
        Input DataFrame.
    tweak : list
        List of tweak instructions.
    rename_map : dict, optional
        Optional mapping of original to renamed columns.

    Returns
    -------
    pandas.DataFrame
        DataFrame with tweaks applied.
    """
    df = df_in.copy()

    # Create reverse rename map for looking up original names
    reverse_map = {}
    if rename_map:
        reverse_map = {v: k for k, v in rename_map.items()}

    for entry in tweak:
        field = entry.get("field")
        key_field = entry.get("key")
        values = entry.get("values", {})

        # Try both original and renamed field names
        field_to_use = None
        if field in df:
            field_to_use = field
        elif rename_map and field in reverse_map and reverse_map[field] in df:
            field_to_use = reverse_map[field]
        elif rename_map and field in rename_map and rename_map[field] in df:
            field_to_use = rename_map[field]

        if field_to_use is None:
            raise ValueError(
                f'Field not found: "{field}" (also tried looking up original/renamed versions)'
            )

        # Try both original and renamed key field names
        key_field_to_use = None
        if key_field in df:
            key_field_to_use = key_field
        elif rename_map and key_field in reverse_map and reverse_map[key_field] in df:
            key_field_to_use = reverse_map[key_field]
        elif rename_map and key_field in rename_map and rename_map[key_field] in df:
            key_field_to_use = rename_map[key_field]

        if key_field_to_use is None:
            raise ValueError(
                f'Key not found: "{key_field}" (also tried looking up original/renamed versions)'
            )

        for key in values:
            value = values[key]
            df.loc[df[key_field_to_use].eq(key), field_to_use] = value

    return df