Skip to content

cloud module

Cloud-native helpers for hyperspectral datasets.

open_cloud_dataset(path, chunks=None, **kwargs)

Open a cloud-friendly xarray dataset.

Parameters:

Name Type Description Default
path str

Dataset path or URL.

required
chunks Optional[Dict[str, int]]

Optional Dask chunks. If None, chunks are inferred lazily by xarray.

None
**kwargs Any

Additional xarray.open_dataset keyword arguments.

{}

Returns:

Type Description
xr.Dataset

Opened dataset.

Source code in hypercoast/cloud.py
def open_cloud_dataset(
    path: str,
    chunks: Optional[Dict[str, int]] = None,
    **kwargs: Any,
):
    """Open a cloud-friendly xarray dataset.

    Args:
        path: Dataset path or URL.
        chunks: Optional Dask chunks. If None, chunks are inferred lazily by xarray.
        **kwargs: Additional ``xarray.open_dataset`` keyword arguments.

    Returns:
        xr.Dataset: Opened dataset.
    """
    if chunks is not None:
        kwargs["chunks"] = chunks
    return xr.open_dataset(path, **kwargs)

suggest_chunks(dataset, target_pixels=2000000)

Suggest Dask chunks for a hyperspectral dataset.

Parameters:

Name Type Description Default
dataset xr.Dataset | xr.DataArray

Input dataset or data array.

required
target_pixels int

Approximate target pixels per spatial chunk.

2000000

Returns:

Type Description
dict

Suggested chunk sizes by dimension.

Source code in hypercoast/cloud.py
def suggest_chunks(
    dataset: xr.Dataset | xr.DataArray,
    target_pixels: int = 2_000_000,
) -> Dict[str, int]:
    """Suggest Dask chunks for a hyperspectral dataset.

    Args:
        dataset: Input dataset or data array.
        target_pixels: Approximate target pixels per spatial chunk.

    Returns:
        dict: Suggested chunk sizes by dimension.
    """
    dims = dict(dataset.sizes)
    chunks: Dict[str, int] = {}
    for dim, size in dims.items():
        lower = dim.lower()
        if lower in ("wavelength", "wavelengths", "band"):
            chunks[dim] = min(size, 32)
        elif lower in ("x", "y", "latitude", "longitude"):
            chunks[dim] = max(1, min(size, int(target_pixels**0.5)))
        else:
            chunks[dim] = size
    return chunks

to_cog(data, output, variable=None, **kwargs)

Write a data array to a Cloud Optimized GeoTIFF.

Parameters:

Name Type Description Default
data xr.Dataset | xr.DataArray

Input dataset or data array.

required
output str | Path

Output COG path.

required
variable Optional[str]

Optional dataset variable.

None
**kwargs Any

Additional rioxarray.to_raster keyword arguments.

{}

Returns:

Type Description
str

Output path.

Source code in hypercoast/cloud.py
def to_cog(
    data: xr.Dataset | xr.DataArray,
    output: str | Path,
    variable: Optional[str] = None,
    **kwargs: Any,
) -> str:
    """Write a data array to a Cloud Optimized GeoTIFF.

    Args:
        data: Input dataset or data array.
        output: Output COG path.
        variable: Optional dataset variable.
        **kwargs: Additional ``rioxarray.to_raster`` keyword arguments.

    Returns:
        str: Output path.
    """
    output = Path(output)
    output.parent.mkdir(parents=True, exist_ok=True)
    if isinstance(data, xr.Dataset):
        if variable is None:
            variable = next(iter(data.data_vars))
        arr = data[variable]
    else:
        arr = data
    arr.rio.to_raster(output, driver="COG", **kwargs)
    return str(output)

to_zarr(dataset, output, chunks=None, **kwargs)

Write a dataset to Zarr.

Parameters:

Name Type Description Default
dataset xr.Dataset | xr.DataArray

Input dataset or data array.

required
output str | Path

Output Zarr path.

required
chunks Optional[Dict[str, int]]

Optional chunks to apply before writing.

None
**kwargs Any

Additional to_zarr keyword arguments.

{}

Returns:

Type Description
str

Output path.

Source code in hypercoast/cloud.py
def to_zarr(
    dataset: xr.Dataset | xr.DataArray,
    output: str | Path,
    chunks: Optional[Dict[str, int]] = None,
    **kwargs: Any,
) -> str:
    """Write a dataset to Zarr.

    Args:
        dataset: Input dataset or data array.
        output: Output Zarr path.
        chunks: Optional chunks to apply before writing.
        **kwargs: Additional ``to_zarr`` keyword arguments.

    Returns:
        str: Output path.
    """
    output = Path(output)
    output.parent.mkdir(parents=True, exist_ok=True)
    if isinstance(dataset, xr.DataArray):
        obj = dataset.to_dataset(name=dataset.name or "data")
    else:
        obj = dataset
    if chunks:
        obj = obj.chunk(chunks)
    obj.to_zarr(output, **kwargs)
    return str(output)