Determine the size of a Hugging Face Dataset
#| default_exp size
#|export
from fastcore.net import urljson, HTTPError
def hfsize(repo:str):
"Returns the size in GB of a HuggingFace Dataset."
url = f'https://huggingface.co/api/datasets/{repo}'
try: resp = urljson(f'{url}/treesize/main')
except HTTPError: return f'Did not find repo: {url}'
gb = resp['size'] / 1e9
return f'{gb:.2f} GB'
size
take as an input a Hugging Face Dataset repo and returns the total size in GB of the data.
For example, we can check the size of tglcourse/CelebA-faces-cropped-128 like so:
hfsize("tglcourse/CelebA-faces-cropped-128")
'5.49 GB'
#| hide
import nbdev; nbdev.nbdev_export()