cloud module¶
Cloud-native helpers for hyperspectral datasets.
open_cloud_dataset(path, chunks=None, **kwargs)
¶
Open a cloud-friendly xarray dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path |
str |
Dataset path or URL. |
required |
chunks |
Optional[Dict[str, int]] |
Optional Dask chunks. If None, chunks are inferred lazily by xarray. |
None |
**kwargs |
Any |
Additional |
{} |
Returns:
| Type | Description |
|---|---|
xr.Dataset |
Opened dataset. |
Source code in hypercoast/cloud.py
def open_cloud_dataset(
path: str,
chunks: Optional[Dict[str, int]] = None,
**kwargs: Any,
):
"""Open a cloud-friendly xarray dataset.
Args:
path: Dataset path or URL.
chunks: Optional Dask chunks. If None, chunks are inferred lazily by xarray.
**kwargs: Additional ``xarray.open_dataset`` keyword arguments.
Returns:
xr.Dataset: Opened dataset.
"""
if chunks is not None:
kwargs["chunks"] = chunks
return xr.open_dataset(path, **kwargs)
suggest_chunks(dataset, target_pixels=2000000)
¶
Suggest Dask chunks for a hyperspectral dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset |
xr.Dataset | xr.DataArray |
Input dataset or data array. |
required |
target_pixels |
int |
Approximate target pixels per spatial chunk. |
2000000 |
Returns:
| Type | Description |
|---|---|
dict |
Suggested chunk sizes by dimension. |
Source code in hypercoast/cloud.py
def suggest_chunks(
dataset: xr.Dataset | xr.DataArray,
target_pixels: int = 2_000_000,
) -> Dict[str, int]:
"""Suggest Dask chunks for a hyperspectral dataset.
Args:
dataset: Input dataset or data array.
target_pixels: Approximate target pixels per spatial chunk.
Returns:
dict: Suggested chunk sizes by dimension.
"""
dims = dict(dataset.sizes)
chunks: Dict[str, int] = {}
for dim, size in dims.items():
lower = dim.lower()
if lower in ("wavelength", "wavelengths", "band"):
chunks[dim] = min(size, 32)
elif lower in ("x", "y", "latitude", "longitude"):
chunks[dim] = max(1, min(size, int(target_pixels**0.5)))
else:
chunks[dim] = size
return chunks
to_cog(data, output, variable=None, **kwargs)
¶
Write a data array to a Cloud Optimized GeoTIFF.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
data |
xr.Dataset | xr.DataArray |
Input dataset or data array. |
required |
output |
str | Path |
Output COG path. |
required |
variable |
Optional[str] |
Optional dataset variable. |
None |
**kwargs |
Any |
Additional |
{} |
Returns:
| Type | Description |
|---|---|
str |
Output path. |
Source code in hypercoast/cloud.py
def to_cog(
data: xr.Dataset | xr.DataArray,
output: str | Path,
variable: Optional[str] = None,
**kwargs: Any,
) -> str:
"""Write a data array to a Cloud Optimized GeoTIFF.
Args:
data: Input dataset or data array.
output: Output COG path.
variable: Optional dataset variable.
**kwargs: Additional ``rioxarray.to_raster`` keyword arguments.
Returns:
str: Output path.
"""
output = Path(output)
output.parent.mkdir(parents=True, exist_ok=True)
if isinstance(data, xr.Dataset):
if variable is None:
variable = next(iter(data.data_vars))
arr = data[variable]
else:
arr = data
arr.rio.to_raster(output, driver="COG", **kwargs)
return str(output)
to_zarr(dataset, output, chunks=None, **kwargs)
¶
Write a dataset to Zarr.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset |
xr.Dataset | xr.DataArray |
Input dataset or data array. |
required |
output |
str | Path |
Output Zarr path. |
required |
chunks |
Optional[Dict[str, int]] |
Optional chunks to apply before writing. |
None |
**kwargs |
Any |
Additional |
{} |
Returns:
| Type | Description |
|---|---|
str |
Output path. |
Source code in hypercoast/cloud.py
def to_zarr(
dataset: xr.Dataset | xr.DataArray,
output: str | Path,
chunks: Optional[Dict[str, int]] = None,
**kwargs: Any,
) -> str:
"""Write a dataset to Zarr.
Args:
dataset: Input dataset or data array.
output: Output Zarr path.
chunks: Optional chunks to apply before writing.
**kwargs: Additional ``to_zarr`` keyword arguments.
Returns:
str: Output path.
"""
output = Path(output)
output.parent.mkdir(parents=True, exist_ok=True)
if isinstance(dataset, xr.DataArray):
obj = dataset.to_dataset(name=dataset.name or "data")
else:
obj = dataset
if chunks:
obj = obj.chunk(chunks)
obj.to_zarr(output, **kwargs)
return str(output)