Skip to content

openavmkit.cloud.huggingface

HuggingFaceCredentials

HuggingFaceCredentials(token)

Bases: CloudCredentials

Authentication credentials for HuggingFace

Initialize credentials for HuggingFace

Parameters:

Name Type Description Default
token str

Your HuggingFace token

required
Source code in openavmkit/cloud/huggingface.py
13
14
15
16
17
18
19
20
21
22
23
def __init__(self, token: str):
    """
    Initialize credentials for HuggingFace

    Parameters
    ----------
    token : str
        Your HuggingFace token
    """
    super().__init__()
    self.token = token

HuggingFaceService

HuggingFaceService(credentials, repo_id, access, revision='main')

Bases: CloudService

HuggingFace-specific CloudService object.

Attributes:

Name Type Description
repo_id str

Repository identifier

revision str

Revision identifier

token str

Access token

api HfApi

HuggingFace API object

Initialize HuggingFaceService Object

Attributes:

Name Type Description
credentials HuggingFaceCredentials

Authentication credentials for HuggingFace

repo_id str

Repository identifier

access CloudAccess

What kind of access/permission ("read_only", "read_write")

revision str

Revision identifier

Source code in openavmkit/cloud/huggingface.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    credentials: HuggingFaceCredentials,
    repo_id: str,
    access: CloudAccess,
    revision: str = "main",
):
    """Initialize HuggingFaceService Object

    Attributes
    ----------
    credentials : HuggingFaceCredentials
        Authentication credentials for HuggingFace
    repo_id : str
        Repository identifier
    access : CloudAccess
        What kind of access/permission ("read_only", "read_write")
    revision : str
        Revision identifier

    """
    super().__init__("huggingface", credentials, access)
    self.repo_id = repo_id
    self.revision = revision
    self.token = credentials.token
    self.api = HfApi()

download_file

download_file(remote_file, local_file_path)

Download a remote file from the HuggingFace service

Parameters:

Name Type Description Default
remote_file CloudFile

The file to download

required
local_file_path str

The path on your local computer you want to save the remote file to

required
Source code in openavmkit/cloud/huggingface.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def download_file(self, remote_file: CloudFile, local_file_path: str):
    """Download a remote file from the HuggingFace service

    Parameters
    ----------
    remote_file : CloudFile
        The file to download
    local_file_path : str
        The path on your local computer you want to save the remote file to
    """
    super().download_file(remote_file, local_file_path)
    url = hf_hub_url(
        repo_id=self.repo_id,
        filename=remote_file.name,
        repo_type="dataset",
        revision=self.revision,
    )
    headers = {"authorization": f"Bearer {self.token}"}
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    with open(local_file_path, "wb") as f:
        f.write(response.content)

list_files

list_files(remote_path)

List all the files at the given path on HuggingFace

Parameters:

Name Type Description Default
remote_path str

Path on HuggingFace you want to query

required

Returns:

Type Description
list[CloudFile]

A listing of all the files contained within the queried path on the remote HuggingFace service

Source code in openavmkit/cloud/huggingface.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def list_files(self, remote_path: str) -> list[CloudFile]:
    """List all the files at the given path on HuggingFace

    Parameters
    ----------
    remote_path : str
        Path on HuggingFace you want to query

    Returns
    -------
    list[CloudFile]
        A listing of all the files contained within the queried path on the remote HuggingFace service
    """
    infos = self.api.list_repo_tree(
        repo_id=self.repo_id,
        revision=self.revision,
        token=self.token,
        path_in_repo=remote_path,
        repo_type="dataset",
        recursive=True,
        expand=True,
    )

    remote_empty = False
    files = []

    try:
        for info in infos:
            print(info)
            break
    except EntryNotFoundError:
        remote_empty = True

    if not remote_empty:
        for info in infos:

            if isinstance(info, RepoFolder):
                continue

            if info.rfilename.startswith(remote_path):
                last_modified_date: datetime = info.last_commit.date
                last_modified_utc = last_modified_date.astimezone(timezone.utc)
                files.append(
                    CloudFile(
                        name=info.rfilename,
                        last_modified_utc=last_modified_utc,
                        size=info.size,
                    )
                )

    return files

upload_file

upload_file(remote_file_path, local_file_path)

Upload a local file to the HuggingFace service

Parameters:

Name Type Description Default
remote_file_path str

The remote path on the HuggingFace service you want to upload your local file to

required
local_file_path str

The local path to the file on your local computer that you want to upload

required
Source code in openavmkit/cloud/huggingface.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def upload_file(self, remote_file_path: str, local_file_path: str):
    """Upload a local file to the HuggingFace service

    Parameters
    ----------
    remote_file_path : str
        The remote path on the HuggingFace service you want to upload your local file to
    local_file_path : str
        The local path to the file on your local computer that you want to upload
    """
    super().upload_file(remote_file_path, local_file_path)
    hf_upload_file(
        path_or_fileobj=local_file_path,
        path_in_repo=remote_file_path,
        repo_id=self.repo_id,
        token=self.token,
        repo_type="dataset",
        commit_message="Upload via OpenAVMKit",
    )

get_creds_from_env_huggingface

get_creds_from_env_huggingface()

Reads and returns HuggingFace credentials from the environment settings

Returns:

Type Description
HuggingFaceCredentials

The credentials for HuggingFace stored in environment settings

Source code in openavmkit/cloud/huggingface.py
183
184
185
186
187
188
189
190
191
192
def get_creds_from_env_huggingface() -> HuggingFaceCredentials:
    """Reads and returns HuggingFace credentials from the environment settings

    Returns
    -------
    HuggingFaceCredentials
        The credentials for HuggingFace stored in environment settings
    """
    token = os.getenv("HF_TOKEN")
    return HuggingFaceCredentials(token)

init_service_huggingface

init_service_huggingface(credentials, access)

Initializes the HuggingFace service

Parameters:

Name Type Description Default
credentials HuggingFaceCredentials

The credentials to your HuggingFace account

required
access CloudAccess

What kind of access/permission ("read_only", "read_write")

required
Source code in openavmkit/cloud/huggingface.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def init_service_huggingface(credentials: HuggingFaceCredentials, access: CloudAccess):
    """
    Initializes the HuggingFace service

    Parameters
    ----------
    credentials : HuggingFaceCredentials
        The credentials to your HuggingFace account
    access : CloudAccess
        What kind of access/permission ("read_only", "read_write")
    """
    repo_id = os.getenv("HF_REPO_ID")
    if repo_id is None:
        raise ValueError("Missing 'HF_REPO_ID' in environment")
    revision = os.getenv("HF_REVISION", "main")
    service = HuggingFaceService(credentials, repo_id, access, revision)
    return service