Skip to content

openavmkit.checkpoint

delete_checkpoints

delete_checkpoints(prefix)

Delete all checkpoint files that start with the given prefix.

Parameters:

Name Type Description Default
prefix str

The prefix to match checkpoint files against.

required
Source code in openavmkit/checkpoint.py
132
133
134
135
136
137
138
139
140
141
142
143
def delete_checkpoints(prefix: str):
    """Delete all checkpoint files that start with the given prefix.

    Parameters
    ----------
    prefix : str
        The prefix to match checkpoint files against.
    """
    os.makedirs("out/checkpoints", exist_ok=True)
    for file in os.listdir("out/checkpoints"):
        if file.startswith(prefix):
            os.remove(f"out/checkpoints/{file}")

exists_checkpoint

exists_checkpoint(path)

Check if a checkpoint exists at the specified path.

Parameters:

Name Type Description Default
path str

The path to the checkpoint file (without extension).

required

Returns:

Type Description
bool

True if a checkpoint exists, False otherwise.

Source code in openavmkit/checkpoint.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def exists_checkpoint(path: str):
    """Check if a checkpoint exists at the specified path.

    Parameters
    ----------
    path : str
        The path to the checkpoint file (without extension).

    Returns
    -------
    bool
        True if a checkpoint exists, False otherwise.
    """
    extensions = ["parquet", "pickle"]
    for ext in extensions:
        if os.path.exists(f"out/checkpoints/{path}.{ext}"):
            return True
    return False

from_checkpoint

from_checkpoint(path, func, params, use_checkpoint=True)

Run a function with parameters, using a checkpoint if available.

If a checkpoint exists at the specified path, it will read from it, return the results, and not execute the function.

If a checkpoint does not exist, it will execute the function with the provided parameters, save the result to a checkpoint, and return the result.

Parameters:

Name Type Description Default
path str

The path to the checkpoint file (without extension).

required
func callable

The function to execute if the checkpoint does not exist.

required
params dict

The parameters to pass to the function.

required
use_checkpoint bool

Whether to use the checkpoint if it exists. Defaults to True.

True

Returns:

Type Description
DataFrame

The result of the function execution or the checkpoint data.

Source code in openavmkit/checkpoint.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def from_checkpoint(
    path: str, func: callable, params: dict, use_checkpoint: bool = True
) -> pd.DataFrame:
    """Run a function with parameters, using a checkpoint if available.

    If a checkpoint exists at the specified path, it will read from it,
    return the results, and not execute the function.

    If a checkpoint does not exist, it will execute the function with
    the provided parameters, save the result to a checkpoint, and return
    the result.

    Parameters
    ----------
    path : str
        The path to the checkpoint file (without extension).
    func : callable
        The function to execute if the checkpoint does not exist.
    params : dict
        The parameters to pass to the function.
    use_checkpoint : bool, optional
        Whether to use the checkpoint if it exists. Defaults to True.

    Returns
    -------
    pd.DataFrame
        The result of the function execution or the checkpoint data.
    """
    if use_checkpoint and exists_checkpoint(path):
        return read_checkpoint(path)
    else:
        result = func(**params)
        write_checkpoint(result, path)
        return result

read_checkpoint

read_checkpoint(path)

Read a checkpoint from the specified path.

Parameters:

Name Type Description Default
path str

The path to the checkpoint file (without extension).

required

Returns:

Type Description
Any

The data read from the checkpoint, which can be a DataFrame or GeoDataFrame.

Source code in openavmkit/checkpoint.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def read_checkpoint(path: str) -> Any:
    """Read a checkpoint from the specified path.

    Parameters
    ----------
    path : str
        The path to the checkpoint file (without extension).

    Returns
    -------
    Any
        The data read from the checkpoint, which can be a DataFrame or GeoDataFrame.
    """
    full_path = f"out/checkpoints/{path}.parquet"
    if os.path.exists(full_path):
        try:
            # Attempt to load as a GeoDataFrame
            return gpd.read_parquet(full_path)
        except ValueError:
            # Fallback to loading as a regular DataFrame
            df = pd.read_parquet(full_path)

            # Check if 'geometry' column exists and try to convert
            if "geometry" in df.columns:
                df["geometry"] = df["geometry"].apply(wkb.loads)
                gdf = gpd.GeoDataFrame(df, geometry="geometry")

                # Try to infer if CRS is EPSG:4326
                if is_likely_epsg4326(gdf):
                    gdf.set_crs(epsg=4326, inplace=True)
                    return gdf
                else:
                    raise ValueError(
                        "Parquet found with geometry, but CRS is ambiguous. Failed to load."
                    )
        # Return as a regular DataFrame if no geometry column
        return df
    else:
        # If we don't find a parquet file, try to load a pickle
        full_path = f"out/checkpoints/{path}.pickle"
        with open(full_path, "rb") as file:
            return pickle.load(file)

read_pickle

read_pickle(path)

Read a pickle file from the specified path.

Parameters:

Name Type Description Default
path str

The path to the pickle file (without extension).

required

Returns:

Type Description
Any

The data read from the pickle file.

Source code in openavmkit/checkpoint.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def read_pickle(path: str) -> Any:
    """Read a pickle file from the specified path.

    Parameters
    ----------
    path : str
        The path to the pickle file (without extension).

    Returns
    -------
    Any
        The data read from the pickle file.
    """
    full_path = f"{path}.pickle"
    with open(full_path, "rb") as file:
        return pickle.load(file)

write_checkpoint

write_checkpoint(data, path)

Write data to a checkpoint file.

Parameters:

Name Type Description Default
data Any

The data to write to the checkpoint, which can be a DataFrame or GeoDataFrame.

required
path str

The path to the checkpoint file (without extension).

required
Source code in openavmkit/checkpoint.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def write_checkpoint(data: Any, path: str):
    """Write data to a checkpoint file.

    Parameters
    ----------
    data : Any
        The data to write to the checkpoint, which can be a DataFrame or GeoDataFrame.
    path : str
        The path to the checkpoint file (without extension).
    """
    os.makedirs("out/checkpoints", exist_ok=True)
    if isinstance(data, gpd.GeoDataFrame):
        data.to_parquet(f"out/checkpoints/{path}.parquet", engine="pyarrow")
    elif isinstance(data, pd.DataFrame):
        data.to_parquet(f"out/checkpoints/{path}.parquet")
    else:
        with open(f"out/checkpoints/{path}.pickle", "wb") as file:
            pickle.dump(data, file)