tanager module¶
Reader and helpers for Planet Tanager hyperspectral HDF5 products.
Supports all four published product variants via :func:read_tanager:
basic_radiance_hdf5, ortho_radiance_hdf5, basic_sr_hdf5, and
ortho_sr_hdf5. The reader auto-detects the HDF5 layout and sources
wavelength metadata from inside the file when available, falling back to a
caller-supplied STAC URL only when necessary.
download_tanager(items, asset='ortho_radiance_hdf5', out_dir=None, quiet=True, overwrite=False, **kwargs)
¶
Download a Tanager asset from one or more STAC items.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
items |
dict, str, or list |
STAC item dictionary, STAC item URL, or a sequence of either. |
required |
asset |
str |
STAC asset key to download. Defaults to
|
'ortho_radiance_hdf5' |
out_dir |
str |
Output directory. Defaults to the current directory. |
None |
quiet |
bool |
Suppress download output. Defaults to True. |
True |
overwrite |
bool |
Overwrite existing files. Defaults to False. |
False |
**kwargs |
Extra keyword arguments passed to :func: |
{} |
Returns:
| Type | Description |
|---|---|
list |
Local file paths for the downloaded assets. |
Source code in hypercoast/tanager.py
def download_tanager(
items,
asset: str = "ortho_radiance_hdf5",
out_dir: Optional[str] = None,
quiet: bool = True,
overwrite: bool = False,
**kwargs,
) -> List[str]:
"""Download a Tanager asset from one or more STAC items.
Args:
items (dict, str, or list): STAC item dictionary, STAC item URL, or a
sequence of either.
asset (str, optional): STAC asset key to download. Defaults to
``"ortho_radiance_hdf5"``.
out_dir (str, optional): Output directory. Defaults to the current
directory.
quiet (bool, optional): Suppress download output. Defaults to True.
overwrite (bool, optional): Overwrite existing files. Defaults to False.
**kwargs: Extra keyword arguments passed to :func:`download_file`.
Returns:
list: Local file paths for the downloaded assets.
"""
paths = []
for item in _coerce_tanager_items(items):
assets = item.get("assets", {})
if asset not in assets:
available = sorted(assets.keys())
raise KeyError(
f"STAC item has no asset '{asset}'. Available assets: {available}"
)
href = assets[asset].get("href")
if not href:
raise ValueError(f"STAC item asset '{asset}' has no href.")
output = None
if out_dir is not None:
output = os.path.join(out_dir, os.path.basename(href))
paths.append(
download_file(
href,
output=output,
quiet=quiet,
overwrite=overwrite,
unzip=False,
**kwargs,
)
)
return paths
extract_tanager(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs)
¶
Extracts data from a PACE dataset for a given latitude and longitude range and calculates the mean over these dimensions.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset |
Union[xr.Dataset, str] |
The PACE dataset or path to the dataset file. |
required |
latitude |
Union[float, Tuple[float, float]] |
The latitude or range of latitudes to extract data for. |
required |
longitude |
Union[float, Tuple[float, float]] |
The longitude or range of longitudes to extract data for. |
required |
delta |
float |
The range to add/subtract to the latitude and longitude if they are not ranges. Defaults to 0.01. |
0.01 |
return_plot |
bool |
Whether to return a plot of the data. Defaults to False. |
False |
**kwargs |
Additional keyword arguments to pass to the plot function. |
{} |
Returns:
| Type | Description |
|---|---|
Union[xr.DataArray, plt.figure.Figure] |
The mean data over the latitude and longitude dimensions, or a plot of this data if return_plot is True. |
Source code in hypercoast/tanager.py
def extract_tanager(
dataset: Union[xr.Dataset, str],
latitude: Union[float, Tuple[float, float]],
longitude: Union[float, Tuple[float, float]],
delta: float = 0.01,
return_plot: bool = False,
**kwargs,
) -> Union[xr.DataArray, plt.Figure]:
"""
Extracts data from a PACE dataset for a given latitude and longitude range
and calculates the mean over these dimensions.
Args:
dataset (Union[xr.Dataset, str]): The PACE dataset or path to the dataset file.
latitude (Union[float, Tuple[float, float]]): The latitude or range of
latitudes to extract data for.
longitude (Union[float, Tuple[float, float]]): The longitude or range of
longitudes to extract data for.
delta (float, optional): The range to add/subtract to the latitude and
longitude if they are not ranges. Defaults to 0.01.
return_plot (bool, optional): Whether to return a plot of the data. Defaults to False.
**kwargs: Additional keyword arguments to pass to the plot function.
Returns:
Union[xr.DataArray, plt.figure.Figure]: The mean data over the latitude
and longitude dimensions, or a plot of this data if return_plot is True.
"""
if isinstance(latitude, list) or isinstance(latitude, tuple):
pass
else:
latitude = (latitude - delta, latitude + delta)
if isinstance(longitude, list) or isinstance(longitude, tuple):
pass
else:
longitude = (longitude - delta, longitude + delta)
ds = filter_tanager(dataset, latitude, longitude, return_plot=False)
data = ds.mean(dim=["y", "x"])
if return_plot:
return data.plot.line(**kwargs)
else:
return data
filter_tanager(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs)
¶
Filters a Tanager dataset based on latitude and longitude.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset |
xr.Dataset |
The Tanager dataset to filter. |
required |
latitude |
float or tuple |
The latitude to filter by. If a tuple or list, it represents a range. |
required |
longitude |
float or tuple |
The longitude to filter by. If a tuple or list, it represents a range. |
required |
drop |
bool |
Whether to drop the filtered out data. Defaults to True. |
True |
Returns:
| Type | Description |
|---|---|
xr.DataArray |
The filtered Tanager data. |
Source code in hypercoast/tanager.py
def filter_tanager(
dataset, latitude, longitude, drop=True, return_plot=False, **kwargs
):
"""
Filters a Tanager dataset based on latitude and longitude.
Args:
dataset (xr.Dataset): The Tanager dataset to filter.
latitude (float or tuple): The latitude to filter by. If a tuple or list, it represents a range.
longitude (float or tuple): The longitude to filter by. If a tuple or list, it represents a range.
drop (bool, optional): Whether to drop the filtered out data. Defaults to True.
Returns:
xr.DataArray: The filtered Tanager data.
"""
if isinstance(latitude, list) or isinstance(latitude, tuple):
lat_con = (dataset["latitude"] > latitude[0]) & (
dataset["latitude"] < latitude[1]
)
else:
lat_con = dataset["latitude"] == latitude
if isinstance(longitude, list) or isinstance(longitude, tuple):
lon_con = (dataset["longitude"] > longitude[0]) & (
dataset["longitude"] < longitude[1]
)
else:
lon_con = dataset["longitude"] == longitude
da = dataset["toa_radiance"].where(lat_con & lon_con, drop=drop, **kwargs)
da_filtered = da.dropna(dim="y", how="all")
da_filtered = da_filtered.dropna(dim="x", how="all")
if return_plot:
rrs_stack = da_filtered.stack(
{"pixel": ["y", "x"]},
create_index=False,
)
rrs_stack.plot.line(hue="pixel")
else:
return da_filtered
get_tanager_asset_url(stac_item, asset='ortho_visual')
¶
Return an asset URL from a Tanager STAC item.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
stac_item |
dict or str |
STAC item dictionary or item JSON URL. |
required |
asset |
str |
STAC asset key. Defaults to |
'ortho_visual' |
Returns:
| Type | Description |
|---|---|
str |
The asset href. |
Source code in hypercoast/tanager.py
def get_tanager_asset_url(stac_item, asset: str = "ortho_visual") -> str:
"""Return an asset URL from a Tanager STAC item.
Args:
stac_item (dict or str): STAC item dictionary or item JSON URL.
asset (str, optional): STAC asset key. Defaults to ``"ortho_visual"``.
Returns:
str: The asset href.
"""
item = _load_stac_item(stac_item)
assets = item.get("assets", {})
if asset not in assets:
available = sorted(assets.keys())
raise KeyError(
f"STAC item has no asset '{asset}'. Available assets: {available}"
)
href = assets[asset].get("href")
if not href:
raise ValueError(f"STAC item asset '{asset}' has no href.")
return href
grid_tanager(dataset, bands=None, wavelengths=None, method='nearest', row_range=None, col_range=None, **kwargs)
¶
Grids a Tanager dataset based on latitude and longitude.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset |
xr.Dataset |
The Tanager dataset to grid. |
required |
bands |
list |
The band indices to select. Defaults to None. |
None |
wavelengths |
list |
The wavelength values to select. Takes priority over bands. Defaults to None. |
None |
method |
str |
The method to use for griddata interpolation. Defaults to "nearest". |
'nearest' |
row_range |
tuple |
Row range (start_row, end_row) to subset the data. Defaults to None. |
None |
col_range |
tuple |
Column range (start_col, end_col) to subset the data. Defaults to None. |
None |
**kwargs |
Additional keyword arguments to pass to the xr.Dataset constructor. |
{} |
Returns:
| Type | Description |
|---|---|
xr.DataArray |
The gridded Tanager data. |
Source code in hypercoast/tanager.py
def grid_tanager(
dataset,
bands=None,
wavelengths=None,
method="nearest",
row_range=None,
col_range=None,
**kwargs,
):
"""
Grids a Tanager dataset based on latitude and longitude.
Args:
dataset (xr.Dataset): The Tanager dataset to grid.
bands (list, optional): The band indices to select. Defaults to None.
wavelengths (list, optional): The wavelength values to select. Takes priority over bands. Defaults to None.
method (str, optional): The method to use for griddata interpolation.
Defaults to "nearest".
row_range (tuple, optional): Row range (start_row, end_row) to subset the data. Defaults to None.
col_range (tuple, optional): Column range (start_col, end_col) to subset the data. Defaults to None.
**kwargs: Additional keyword arguments to pass to the xr.Dataset constructor.
Returns:
xr.DataArray: The gridded Tanager data.
"""
from scipy.interpolate import griddata
from scipy.spatial import ConvexHull
# Priority: wavelengths > bands > default
if wavelengths is not None:
# Use wavelengths directly
if not isinstance(wavelengths, list):
wavelengths = [wavelengths]
selected_wavelengths = wavelengths
elif bands is not None:
# Convert bands to wavelengths
if not isinstance(bands, list):
bands = [bands]
selected_wavelengths = []
for band in bands:
if isinstance(band, (int, np.integer)) or (
isinstance(band, float) and band < 500
):
# Treat as band index
selected_wavelengths.append(
dataset.coords["wavelength"].values[int(band)]
)
else:
# Treat as wavelength value
selected_wavelengths.append(band)
else:
# Default to first wavelength
selected_wavelengths = dataset.coords["wavelength"].values
# Apply spatial subset filtering if ranges are provided
if row_range is not None or col_range is not None:
# Get original array dimensions
y_size, x_size = dataset.latitude.shape
# Determine row and column indices
start_row = row_range[0] if row_range is not None else 0
end_row = row_range[1] if row_range is not None else y_size
start_col = col_range[0] if col_range is not None else 0
end_col = col_range[1] if col_range is not None else x_size
# Ensure indices are within bounds
start_row = max(0, min(start_row, y_size))
end_row = max(start_row, min(end_row, y_size))
start_col = max(0, min(start_col, x_size))
end_col = max(start_col, min(end_col, x_size))
# Subset the dataset using isel for y and x dimensions
dataset_subset = dataset.isel(
y=slice(start_row, end_row), x=slice(start_col, end_col)
)
# For subsets, return the data directly without interpolation to avoid artifacts
selected_data_list = []
for wl in selected_wavelengths:
data = dataset_subset.sel(wavelength=wl, method="nearest")["toa_radiance"]
selected_data_list.append(data.values)
# Stack wavelengths as the last dimension
gridded_data_3d = np.stack(selected_data_list, axis=-1)
# Create output dataset with proper coordinates
lat_subset = dataset_subset.latitude
lon_subset = dataset_subset.longitude
# Create coordinate arrays for the subset
y_coords = np.arange(gridded_data_3d.shape[0])
x_coords = np.arange(gridded_data_3d.shape[1])
dataset2 = xr.Dataset(
{"toa_radiance": (("y", "x", "wavelength"), gridded_data_3d)},
coords={
"y": ("y", y_coords),
"x": ("x", x_coords),
"wavelength": ("wavelength", selected_wavelengths),
"latitude": (("y", "x"), lat_subset.values),
"longitude": (("y", "x"), lon_subset.values),
},
**kwargs,
)
dataset2["toa_radiance"].rio.write_crs("EPSG:4326", inplace=True)
return dataset2
lat = dataset.latitude
lon = dataset.longitude
# Find valid data points for any wavelength to define spatial mask
first_wavelength_data = dataset.sel(
wavelength=selected_wavelengths[0], method="nearest"
)["toa_radiance"]
overall_valid_mask = ~np.isnan(first_wavelength_data.data) & (
first_wavelength_data.data > 0
)
if not np.any(overall_valid_mask):
# No valid data, return empty grid using valid lat/lon bounds
valid_lat_data = lat.data[~np.isnan(lat.data)]
valid_lon_data = lon.data[~np.isnan(lon.data)]
if len(valid_lat_data) == 0 or len(valid_lon_data) == 0:
# Fallback to original bounds if no valid subset data
grid_lat = np.linspace(lat.min().values, lat.max().values, lat.shape[0])
grid_lon = np.linspace(lon.min().values, lon.max().values, lon.shape[1])
else:
grid_lat = np.linspace(
valid_lat_data.min(), valid_lat_data.max(), lat.shape[0]
)
grid_lon = np.linspace(
valid_lon_data.min(), valid_lon_data.max(), lon.shape[1]
)
grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)
gridded_data_dict = {
wl: np.full_like(grid_lat_2d, np.nan) for wl in selected_wavelengths
}
else:
# Get valid coordinates for spatial masking
valid_lat = lat.data[overall_valid_mask]
valid_lon = lon.data[overall_valid_mask]
# Create grid based on valid data bounds (considering subset if applied)
grid_lat = np.linspace(valid_lat.min(), valid_lat.max(), lat.shape[0])
grid_lon = np.linspace(valid_lon.min(), valid_lon.max(), lon.shape[1])
grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)
# For subsets, use simple bounding box instead of convex hull to avoid over-masking
if row_range is not None or col_range is not None:
# For subsets, just use bounding box
inside_hull = (
(grid_lat_2d >= valid_lat.min())
& (grid_lat_2d <= valid_lat.max())
& (grid_lon_2d >= valid_lon.min())
& (grid_lon_2d <= valid_lon.max())
)
else:
# For full dataset, use convex hull for better edge handling
try:
hull = ConvexHull(np.column_stack([valid_lon, valid_lat]))
from matplotlib.path import Path
hull_path = Path(
np.column_stack(
[valid_lon[hull.vertices], valid_lat[hull.vertices]]
)
)
grid_points = np.column_stack(
[grid_lon_2d.flatten(), grid_lat_2d.flatten()]
)
inside_hull = hull_path.contains_points(grid_points).reshape(
grid_lat_2d.shape
)
except Exception:
# Fallback: use simple bounding box
inside_hull = (
(grid_lat_2d >= valid_lat.min())
& (grid_lat_2d <= valid_lat.max())
& (grid_lon_2d >= valid_lon.min())
& (grid_lon_2d <= valid_lon.max())
)
gridded_data_dict = {}
for wl in selected_wavelengths:
data = dataset.sel(wavelength=wl, method="nearest")["toa_radiance"]
# Mask nodata values (both NaN and zero values)
data_flat = data.data.flatten()
valid_mask = ~np.isnan(data_flat) & (data_flat > 0)
if not np.any(valid_mask):
gridded_data = np.full_like(grid_lat_2d, np.nan)
else:
gridded_data = griddata(
(lat.data.flatten()[valid_mask], lon.data.flatten()[valid_mask]),
data_flat[valid_mask],
(grid_lat_2d, grid_lon_2d),
method=method,
fill_value=np.nan,
)
# Apply spatial mask to prevent edge interpolation (only for full dataset)
if row_range is None and col_range is None:
gridded_data[~inside_hull] = np.nan
gridded_data_dict[wl] = gridded_data
selected_wavelengths = list(gridded_data_dict.keys())
# Create a 3D array with dimensions latitude, longitude, and wavelength
gridded_data_3d = np.dstack(list(gridded_data_dict.values()))
dataset2 = xr.Dataset(
{"toa_radiance": (("latitude", "longitude", "wavelength"), gridded_data_3d)},
coords={
"latitude": ("latitude", grid_lat),
"longitude": ("longitude", grid_lon),
"wavelength": ("wavelength", selected_wavelengths),
},
**kwargs,
)
dataset2["toa_radiance"].rio.write_crs("EPSG:4326", inplace=True)
return dataset2
read_tanager(filepath, bands=None, stac_url=None, wavelengths=None, fwhm=None, product=None, **kwargs)
¶
Read Planet Tanager HDF5 hyperspectral data and return an xarray.Dataset.
Auto-detects the Tanager product variant from the file contents and sources
wavelength metadata from inside the file when available. Supports all four
Planet Tanager product variants: basic_radiance, ortho_radiance,
basic_sr, and ortho_sr. Surface reflectance products expose their
data as surface_reflectance with a toa_radiance alias retained for
backward compatibility with the rest of the HyperCoast Tanager helpers;
the alias may be removed in a future major release.
Wavelengths are sourced in this precedence: (1) the wavelengths kwarg,
(2) a wavelength dataset or attribute inside the HDF5 file, (3) the
stac_url kwarg parsed for eo:bands metadata, (4) a synthesized
integer index with a UserWarning. No hardcoded STAC URL is used.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
filepath |
str or os.PathLike |
Local file path or HTTPS URL to the
Tanager |
required |
bands |
array-like |
Indices of spectral bands to keep. |
None |
stac_url |
str |
STAC item URL to source wavelength metadata from when the file does not contain it. |
None |
wavelengths |
array-like |
Wavelengths in nanometers to use
directly. Must have either the full cube band count or, when
|
None |
fwhm |
array-like |
Full width at half maximum in nanometers
to use directly. Same length rules as |
None |
product |
str |
Force a specific product variant. One of
|
None |
**kwargs |
Extra keyword arguments forwarded to |
{} |
Returns:
| Type | Description |
|---|---|
xr.Dataset |
Dataset with dims |
Exceptions:
| Type | Description |
|---|---|
ValueError |
If no 3-D hyperspectral cube can be located in the file. |
Source code in hypercoast/tanager.py
def read_tanager(
filepath,
bands=None,
stac_url=None,
wavelengths=None,
fwhm=None,
product=None,
**kwargs,
):
"""Read Planet Tanager HDF5 hyperspectral data and return an xarray.Dataset.
Auto-detects the Tanager product variant from the file contents and sources
wavelength metadata from inside the file when available. Supports all four
Planet Tanager product variants: ``basic_radiance``, ``ortho_radiance``,
``basic_sr``, and ``ortho_sr``. Surface reflectance products expose their
data as ``surface_reflectance`` with a ``toa_radiance`` alias retained for
backward compatibility with the rest of the HyperCoast Tanager helpers;
the alias may be removed in a future major release.
Wavelengths are sourced in this precedence: (1) the ``wavelengths`` kwarg,
(2) a wavelength dataset or attribute inside the HDF5 file, (3) the
``stac_url`` kwarg parsed for ``eo:bands`` metadata, (4) a synthesized
integer index with a ``UserWarning``. No hardcoded STAC URL is used.
Args:
filepath (str or os.PathLike): Local file path or HTTPS URL to the
Tanager ``.h5`` file.
bands (array-like, optional): Indices of spectral bands to keep.
stac_url (str, optional): STAC item URL to source wavelength metadata
from when the file does not contain it.
wavelengths (array-like, optional): Wavelengths in nanometers to use
directly. Must have either the full cube band count or, when
``bands`` is also supplied, the number of selected bands.
fwhm (array-like, optional): Full width at half maximum in nanometers
to use directly. Same length rules as ``wavelengths``.
product (str, optional): Force a specific product variant. One of
``basic_radiance``, ``ortho_radiance``, ``basic_sr``, ``ortho_sr``.
**kwargs: Extra keyword arguments forwarded to ``xr.Dataset``.
Returns:
xr.Dataset: Dataset with dims ``(wavelength, y, x)``, a canonical data
variable (``toa_radiance`` for radiance products, ``surface_reflectance``
for SR products, plus a ``toa_radiance`` alias), and ``latitude`` /
``longitude`` coordinates on ``(y, x)``.
Raises:
ValueError: If no 3-D hyperspectral cube can be located in the file.
"""
if isinstance(filepath, str) and filepath.startswith("https://"):
filepath = download_file(filepath)
with h5py.File(filepath, "r") as f:
layout = _discover_tanager_layout(f, product=product)
cube = f[layout["data_path"]]
cube_shape = cube.shape
band_axis = layout["band_axis"]
n_bands_total = cube_shape[band_axis]
# Read only the requested bands from disk so large Tanager scenes do
# not blow up memory. Fall back to a full read if h5py rejects the
# index expression (for example, unsorted integer lists).
if bands is not None:
index = [slice(None)] * cube.ndim
index[band_axis] = bands
try:
data = cube[tuple(index)]
except (TypeError, ValueError):
data = cube[()]
slicer = [slice(None)] * cube.ndim
slicer[band_axis] = bands
data = data[tuple(slicer)]
else:
data = cube[()]
if band_axis != 0:
data = np.moveaxis(data, band_axis, 0)
lat_path = layout["lat_path"]
lon_path = layout["lon_path"]
if lat_path is not None and lon_path is not None:
lat = f[lat_path][()]
lon = f[lon_path][()]
else:
lat, lon = _grid_latlon(f, layout)
if lat is None or lon is None:
raise ValueError(
"Could not locate Latitude/Longitude datasets in the Tanager HDF5 file."
)
wl_nm_full, fwhm_nm_full = _read_wavelengths_from_hdf5(f, layout, n_bands_total)
n_bands_selected = data.shape[0]
if layout["fill_value"] is not None:
data = np.where(data == layout["fill_value"], np.nan, data.astype(float))
if layout["scale_factor"] != 1.0 or layout["add_offset"] != 0.0:
data = data.astype(float) * layout["scale_factor"] + layout["add_offset"]
def _apply_band_slice(values, expected):
"""Slice a full-length band-aligned array to the selected bands.
Args:
values (array-like or None): Values indexed along the band axis.
expected (int): Expected length before slicing.
Returns:
numpy.ndarray or None: Sliced values, or ``None`` if ``values`` is
``None``.
"""
if values is None:
return None
arr = np.asarray(values)
if arr.size == expected and bands is not None:
arr = arr[bands]
return arr
wl_nm = _apply_band_slice(wl_nm_full, n_bands_total)
fwhm_nm = _apply_band_slice(fwhm_nm_full, n_bands_total)
if wavelengths is not None:
wl_nm = np.asarray(wavelengths, dtype=float).ravel()
if wl_nm.size == n_bands_total and bands is not None:
wl_nm = wl_nm[bands]
if wl_nm.size != n_bands_selected:
raise ValueError(
f"`wavelengths` has length {wl_nm.size} but {n_bands_selected} "
f"bands are being read."
)
if fwhm is not None:
fwhm_arr = np.asarray(fwhm, dtype=float).ravel()
if fwhm_arr.size == n_bands_total and bands is not None:
fwhm_arr = fwhm_arr[bands]
if fwhm_arr.size != n_bands_selected:
raise ValueError(
f"`fwhm` has length {fwhm_arr.size} but {n_bands_selected} "
f"bands are being read."
)
fwhm_nm = fwhm_arr
if wl_nm is None and stac_url is not None:
wl_stac, fwhm_stac = _read_wavelengths_from_stac(
stac_url, layout["stac_asset_key"]
)
if wl_stac.size != n_bands_total:
raise ValueError(
f"STAC item reports {wl_stac.size} bands but the data cube has "
f"{n_bands_total} bands."
)
wl_nm = _apply_band_slice(wl_stac, n_bands_total)
fwhm_nm = _apply_band_slice(fwhm_stac, n_bands_total)
if wl_nm is None:
warnings.warn(
"No wavelength metadata found in the Tanager HDF5 file and no "
"`stac_url` or `wavelengths` supplied; falling back to integer "
"band indices. Pass `wavelengths` or `stac_url` for physical nm "
"values.",
UserWarning,
stacklevel=2,
)
wl_nm = np.arange(n_bands_selected, dtype=float)
if fwhm_nm is None:
fwhm_nm = np.full(n_bands_selected, np.nan)
data_var_name = layout["data_var_name"]
coords = {
"wavelength": wl_nm,
"fwhm": ("wavelength", fwhm_nm),
"latitude": (("y", "x"), lat),
"longitude": (("y", "x"), lon),
}
da = xr.DataArray(
data, dims=("wavelength", "y", "x"), coords=coords, name=data_var_name
)
ds = xr.Dataset(
data_vars={data_var_name: da},
coords={
"wavelength": da.wavelength,
"fwhm": ("wavelength", fwhm_nm),
"latitude": (("y", "x"), lat),
"longitude": (("y", "x"), lon),
},
attrs={
"source": "Planet Tanager HDF5",
"product": layout["product"],
"stac_item": stac_url or "",
"data_var": data_var_name,
},
**kwargs,
)
if data_var_name == "surface_reflectance" and "toa_radiance" not in ds.data_vars:
ds["toa_radiance"] = ds["surface_reflectance"]
return ds
read_tanager_stac(stac_item, asset='ortho_radiance_hdf5', out_dir=None, bands=None, wavelengths=None, fwhm=None, product=None, quiet=True, overwrite=False, **kwargs)
¶
Download and read a Tanager HDF5 asset from a STAC item.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
stac_item |
dict or str |
STAC item dictionary or item JSON URL. |
required |
asset |
str |
HDF5 STAC asset key. Defaults to
|
'ortho_radiance_hdf5' |
out_dir |
str |
Directory for the downloaded HDF5 file. |
None |
bands |
array-like |
Spectral band indices to keep. |
None |
wavelengths |
array-like |
Explicit wavelength values in nm. |
None |
fwhm |
array-like |
Explicit FWHM values in nm. |
None |
product |
str |
Force a Tanager product variant. |
None |
quiet |
bool |
Suppress download output. Defaults to True. |
True |
overwrite |
bool |
Overwrite existing files. Defaults to False. |
False |
**kwargs |
Extra keyword arguments passed to :func: |
{} |
Returns:
| Type | Description |
|---|---|
xarray.Dataset |
Tanager dataset read from the selected STAC asset. |
Source code in hypercoast/tanager.py
def read_tanager_stac(
stac_item,
asset: str = "ortho_radiance_hdf5",
out_dir: Optional[str] = None,
bands=None,
wavelengths=None,
fwhm=None,
product: Optional[str] = None,
quiet: bool = True,
overwrite: bool = False,
**kwargs,
):
"""Download and read a Tanager HDF5 asset from a STAC item.
Args:
stac_item (dict or str): STAC item dictionary or item JSON URL.
asset (str, optional): HDF5 STAC asset key. Defaults to
``"ortho_radiance_hdf5"``.
out_dir (str, optional): Directory for the downloaded HDF5 file.
bands (array-like, optional): Spectral band indices to keep.
wavelengths (array-like, optional): Explicit wavelength values in nm.
fwhm (array-like, optional): Explicit FWHM values in nm.
product (str, optional): Force a Tanager product variant.
quiet (bool, optional): Suppress download output. Defaults to True.
overwrite (bool, optional): Overwrite existing files. Defaults to False.
**kwargs: Extra keyword arguments passed to :func:`read_tanager`.
Returns:
xarray.Dataset: Tanager dataset read from the selected STAC asset.
"""
item = _load_stac_item(stac_item)
stac_url = _get_stac_item_url(item)
paths = download_tanager(
item,
asset=asset,
out_dir=out_dir,
quiet=quiet,
overwrite=overwrite,
)
if wavelengths is None or fwhm is None:
stac_wl, stac_fwhm = _read_wavelengths_from_stac_item(item, asset)
if wavelengths is None:
wavelengths = stac_wl
if fwhm is None:
fwhm = stac_fwhm
ds = read_tanager(
paths[0],
bands=bands,
stac_url=stac_url,
wavelengths=wavelengths,
fwhm=fwhm,
product=product,
**kwargs,
)
if product is None and asset in _STAC_ASSET_PRODUCT:
ds.attrs["product"] = _STAC_ASSET_PRODUCT[asset]
ds.attrs["stac_asset"] = asset
ds.attrs["stac_item"] = stac_url or ds.attrs.get("stac_item", "")
return ds
search_tanager(bbox=None, temporal=None, collections=None, count=-1, query=None, cloud_percent=None, catalog_url='https://www.planet.com/data/stac/tanager-core-imagery/catalog.json', output=None, crs='EPSG:4326', return_gdf=False, timeout=30, **kwargs)
¶
Search Planet Tanager STAC sample imagery.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
bbox |
list |
Bounding box |
None |
temporal |
str or tuple |
Date/time range as
|
None |
collections |
str or list |
Tanager collection ids or titles,
such as |
None |
count |
int |
Maximum number of items to return. |
-1 |
query |
str |
Case-insensitive text search against item id, title, description, and location description. |
None |
cloud_percent |
float |
Maximum item |
None |
catalog_url |
str |
Tanager STAC catalog URL. Planet browser URLs are accepted and normalized to raw JSON URLs. |
'https://www.planet.com/data/stac/tanager-core-imagery/catalog.json' |
output |
str |
File path to save a GeoDataFrame when
|
None |
crs |
str |
CRS for GeoDataFrame output. Defaults to
|
'EPSG:4326' |
return_gdf |
bool |
Return |
False |
timeout |
int |
HTTP request timeout in seconds. |
30 |
**kwargs |
Additional exact-match filters against STAC item properties,
for example |
{} |
Returns:
| Type | Description |
|---|---|
list or tuple |
STAC item dictionaries, or |
Source code in hypercoast/tanager.py
def search_tanager(
bbox: Optional[List[float]] = None,
temporal: Optional[Union[str, Tuple[str, str]]] = None,
collections: Optional[Union[str, List[str]]] = None,
count: int = -1,
query: Optional[str] = None,
cloud_percent: Optional[float] = None,
catalog_url: str = TANAGER_STAC_CATALOG_URL,
output: Optional[str] = None,
crs: str = "EPSG:4326",
return_gdf: bool = False,
timeout: int = 30,
**kwargs,
) -> Union[List[dict], tuple]:
"""Search Planet Tanager STAC sample imagery.
Args:
bbox (list, optional): Bounding box ``[xmin, ymin, xmax, ymax]`` in
EPSG:4326.
temporal (str or tuple, optional): Date/time range as
``"start/end"``, ``"start,end"``, or ``(start, end)``.
collections (str or list, optional): Tanager collection ids or titles,
such as ``"coastal-water-bodies"`` or ``"GHG Plumes"``.
count (int, optional): Maximum number of items to return. ``-1`` means
all matching items. Defaults to ``-1``.
query (str, optional): Case-insensitive text search against item id,
title, description, and location description.
cloud_percent (float, optional): Maximum item ``cloud_percent``.
catalog_url (str, optional): Tanager STAC catalog URL. Planet browser
URLs are accepted and normalized to raw JSON URLs.
output (str, optional): File path to save a GeoDataFrame when
``return_gdf`` is True.
crs (str, optional): CRS for GeoDataFrame output. Defaults to
``"EPSG:4326"``.
return_gdf (bool, optional): Return ``(items, gdf)`` instead of only
the item list. Defaults to False.
timeout (int, optional): HTTP request timeout in seconds.
**kwargs: Additional exact-match filters against STAC item properties,
for example ``quality_category="test"``.
Returns:
list or tuple: STAC item dictionaries, or ``(items, gdf)`` when
``return_gdf=True``.
"""
catalog_url = _normalize_stac_url(catalog_url)
bbox = list(bbox) if bbox is not None else None
selected_collections = _as_list(collections)
if selected_collections is not None:
selected_collections = {str(value).lower() for value in selected_collections}
has_limit = count is not None and count > -1
results = []
if has_limit and count == 0:
if return_gdf:
gdf = _stac_items_to_gdf(results, crs=crs)
if output is not None:
gdf.to_file(output)
return results, gdf
return results
catalog = _fetch_json(catalog_url, timeout=timeout)
for link in _stac_links(catalog, "child"):
if has_limit and len(results) >= count:
break
collection_url = _normalize_stac_url(link["href"])
collection = _fetch_json(collection_url, timeout=timeout)
collection_id = str(collection.get("id", "")).lower()
collection_title = str(collection.get("title", "")).lower()
if selected_collections is not None and not (
collection_id in selected_collections
or collection_title in selected_collections
):
continue
for item_link in _stac_links(collection, "item"):
if has_limit and len(results) >= count:
break
item_url = _normalize_stac_url(item_link["href"])
item = dict(_fetch_json(item_url, timeout=timeout))
item["_stac_url"] = item_url
item["_collection_url"] = collection_url
item["_collection_title"] = collection.get("title", "")
if _item_matches_filters(
item,
bbox=bbox,
temporal=temporal,
query=query,
cloud_percent=cloud_percent,
**kwargs,
):
results.append(item)
if return_gdf:
gdf = _stac_items_to_gdf(results, crs=crs)
if output is not None:
gdf.to_file(output)
return results, gdf
return results
tanager_footprints(bbox=None, temporal=None, collections=None, count=-1, query=None, cloud_percent=None, catalog_url='https://www.planet.com/data/stac/tanager-core-imagery/catalog.json', output=None, crs='EPSG:4326', unique=True, return_items=False, timeout=30, **kwargs)
¶
Return Tanager STAC item footprints as a GeoDataFrame.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
bbox |
list |
Bounding box |
None |
temporal |
str or tuple |
Date/time range as
|
None |
collections |
str or list |
Tanager collection ids or titles. Defaults to all collections in the Tanager STAC catalog. |
None |
count |
int |
Maximum number of matching STAC item records to
inspect. |
-1 |
query |
str |
Case-insensitive text search against item id, title, description, and location description. |
None |
cloud_percent |
float |
Maximum item |
None |
catalog_url |
str |
Tanager STAC catalog URL. Planet browser URLs are accepted and normalized to raw JSON URLs. |
'https://www.planet.com/data/stac/tanager-core-imagery/catalog.json' |
output |
str |
File path to save the GeoDataFrame. |
None |
crs |
str |
CRS for GeoDataFrame output. Defaults to
|
'EPSG:4326' |
unique |
bool |
Deduplicate scenes that appear in more than one thematic collection. Defaults to True. |
True |
return_items |
bool |
Return |
False |
timeout |
int |
HTTP request timeout in seconds. |
30 |
**kwargs |
Additional exact-match filters against STAC item properties. |
{} |
Returns:
| Type | Description |
|---|---|
geopandas.GeoDataFrame or tuple |
Footprint GeoDataFrame, or
|
Source code in hypercoast/tanager.py
def tanager_footprints(
bbox: Optional[List[float]] = None,
temporal: Optional[Union[str, Tuple[str, str]]] = None,
collections: Optional[Union[str, List[str]]] = None,
count: int = -1,
query: Optional[str] = None,
cloud_percent: Optional[float] = None,
catalog_url: str = TANAGER_STAC_CATALOG_URL,
output: Optional[str] = None,
crs: str = "EPSG:4326",
unique: bool = True,
return_items: bool = False,
timeout: int = 30,
**kwargs,
):
"""Return Tanager STAC item footprints as a GeoDataFrame.
Args:
bbox (list, optional): Bounding box ``[xmin, ymin, xmax, ymax]`` in
EPSG:4326.
temporal (str or tuple, optional): Date/time range as
``"start/end"``, ``"start,end"``, or ``(start, end)``.
collections (str or list, optional): Tanager collection ids or titles.
Defaults to all collections in the Tanager STAC catalog.
count (int, optional): Maximum number of matching STAC item records to
inspect. ``-1`` means all. Defaults to ``-1``.
query (str, optional): Case-insensitive text search against item id,
title, description, and location description.
cloud_percent (float, optional): Maximum item ``cloud_percent``.
catalog_url (str, optional): Tanager STAC catalog URL. Planet browser
URLs are accepted and normalized to raw JSON URLs.
output (str, optional): File path to save the GeoDataFrame.
crs (str, optional): CRS for GeoDataFrame output. Defaults to
``"EPSG:4326"``.
unique (bool, optional): Deduplicate scenes that appear in more than
one thematic collection. Defaults to True.
return_items (bool, optional): Return ``(items, gdf)`` instead of only
the GeoDataFrame. Defaults to False.
timeout (int, optional): HTTP request timeout in seconds.
**kwargs: Additional exact-match filters against STAC item properties.
Returns:
geopandas.GeoDataFrame or tuple: Footprint GeoDataFrame, or
``(items, gdf)`` when ``return_items=True``.
"""
items = search_tanager(
bbox=bbox,
temporal=temporal,
collections=collections,
count=count,
query=query,
cloud_percent=cloud_percent,
catalog_url=catalog_url,
return_gdf=False,
timeout=timeout,
**kwargs,
)
if unique:
items = _dedupe_tanager_items(items)
gdf = _stac_items_to_gdf(items, crs=crs)
if output is not None:
gdf.to_file(output)
if return_items:
return items, gdf
return gdf
tanager_to_image(dataset, bands=None, wavelengths=None, method='nearest', row_range=None, col_range=None, output=None, **kwargs)
¶
Converts an Tanager dataset to an image.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset |
xarray.Dataset or str |
The dataset containing the EMIT data or the file path to the dataset. |
required |
bands |
array-like |
The specific band indices to select. Defaults to None. |
None |
wavelengths |
array-like |
The specific wavelength values to select. Takes priority over bands. Defaults to None. |
None |
method |
str |
The method to use for data interpolation. Defaults to "nearest". |
'nearest' |
row_range |
tuple |
Row range (start_row, end_row) to subset the data. Defaults to None. |
None |
col_range |
tuple |
Column range (start_col, end_col) to subset the data. Defaults to None. |
None |
output |
str |
The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None. |
None |
**kwargs |
Additional keyword arguments to be passed to |
{} |
Returns:
| Type | Description |
|---|---|
rasterio.Dataset or None |
The image converted from the dataset. If |
Source code in hypercoast/tanager.py
def tanager_to_image(
dataset,
bands=None,
wavelengths=None,
method="nearest",
row_range=None,
col_range=None,
output=None,
**kwargs,
):
"""
Converts an Tanager dataset to an image.
Args:
dataset (xarray.Dataset or str): The dataset containing the EMIT data or the file path to the dataset.
bands (array-like, optional): The specific band indices to select. Defaults to None.
wavelengths (array-like, optional): The specific wavelength values to select. Takes priority over bands. Defaults to None.
method (str, optional): The method to use for data interpolation. Defaults to "nearest".
row_range (tuple, optional): Row range (start_row, end_row) to subset the data. Defaults to None.
col_range (tuple, optional): Column range (start_col, end_col) to subset the data. Defaults to None.
output (str, optional): The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.
**kwargs: Additional keyword arguments to be passed to `leafmap.array_to_image`.
Returns:
rasterio.Dataset or None: The image converted from the dataset. If `output` is provided, the image will be saved to the specified file and the function will return None.
"""
from leafmap import array_to_image
if isinstance(dataset, str):
dataset = read_tanager(dataset, bands=bands)
grid = grid_tanager(
dataset,
bands=bands,
wavelengths=wavelengths,
method=method,
row_range=row_range,
col_range=col_range,
)
data = grid["toa_radiance"]
data.rio.write_crs("EPSG:4326", inplace=True)
return array_to_image(data, transpose=False, output=output, **kwargs)