tanager module¶

Reader and helpers for Planet Tanager hyperspectral HDF5 products.

Supports all four published product variants via :func:read_tanager: basic_radiance_hdf5, ortho_radiance_hdf5, basic_sr_hdf5, and ortho_sr_hdf5. The reader auto-detects the HDF5 layout and sources wavelength metadata from inside the file when available, falling back to a caller-supplied STAC URL only when necessary.

`download_tanager(items, asset='ortho_radiance_hdf5', out_dir=None, quiet=True, overwrite=False, **kwargs)` ¶

Download a Tanager asset from one or more STAC items.

Parameters:

Name	Type	Description	Default
`items`	`dict, str, or list`	STAC item dictionary, STAC item URL, or a sequence of either.	required
`asset`	`str`	STAC asset key to download. Defaults to `"ortho_radiance_hdf5"`.	`'ortho_radiance_hdf5'`
`out_dir`	`str`	Output directory. Defaults to the current directory.	`None`
`quiet`	`bool`	Suppress download output. Defaults to True.	`True`
`overwrite`	`bool`	Overwrite existing files. Defaults to False.	`False`
`**kwargs`		Extra keyword arguments passed to :func:`download_file`.	`{}`

Returns:

Type	Description
`list`	Local file paths for the downloaded assets.

Source code in hypercoast/tanager.py

def download_tanager(
    items,
    asset: str = "ortho_radiance_hdf5",
    out_dir: Optional[str] = None,
    quiet: bool = True,
    overwrite: bool = False,
    **kwargs,
) -> List[str]:
    """Download a Tanager asset from one or more STAC items.

    Args:
        items (dict, str, or list): STAC item dictionary, STAC item URL, or a
            sequence of either.
        asset (str, optional): STAC asset key to download. Defaults to
            ``"ortho_radiance_hdf5"``.
        out_dir (str, optional): Output directory. Defaults to the current
            directory.
        quiet (bool, optional): Suppress download output. Defaults to True.
        overwrite (bool, optional): Overwrite existing files. Defaults to False.
        **kwargs: Extra keyword arguments passed to :func:`download_file`.

    Returns:
        list: Local file paths for the downloaded assets.
    """
    paths = []
    for item in _coerce_tanager_items(items):
        assets = item.get("assets", {})
        if asset not in assets:
            available = sorted(assets.keys())
            raise KeyError(
                f"STAC item has no asset '{asset}'. Available assets: {available}"
            )
        href = assets[asset].get("href")
        if not href:
            raise ValueError(f"STAC item asset '{asset}' has no href.")
        output = None
        if out_dir is not None:
            output = os.path.join(out_dir, os.path.basename(href))
        paths.append(
            download_file(
                href,
                output=output,
                quiet=quiet,
                overwrite=overwrite,
                unzip=False,
                **kwargs,
            )
        )
    return paths

`extract_tanager(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs)` ¶

Extracts data from a PACE dataset for a given latitude and longitude range and calculates the mean over these dimensions.

Parameters:

Name	Type	Description	Default
`dataset`	`Union[xr.Dataset, str]`	The PACE dataset or path to the dataset file.	required
`latitude`	`Union[float, Tuple[float, float]]`	The latitude or range of latitudes to extract data for.	required
`longitude`	`Union[float, Tuple[float, float]]`	The longitude or range of longitudes to extract data for.	required
`delta`	`float`	The range to add/subtract to the latitude and longitude if they are not ranges. Defaults to 0.01.	`0.01`
`return_plot`	`bool`	Whether to return a plot of the data. Defaults to False.	`False`
`**kwargs`		Additional keyword arguments to pass to the plot function.	`{}`

Returns:

Type	Description
`Union[xr.DataArray, plt.figure.Figure]`	The mean data over the latitude and longitude dimensions, or a plot of this data if return_plot is True.

Source code in hypercoast/tanager.py

def extract_tanager(
    dataset: Union[xr.Dataset, str],
    latitude: Union[float, Tuple[float, float]],
    longitude: Union[float, Tuple[float, float]],
    delta: float = 0.01,
    return_plot: bool = False,
    **kwargs,
) -> Union[xr.DataArray, plt.Figure]:
    """
    Extracts data from a PACE dataset for a given latitude and longitude range
        and calculates the mean over these dimensions.

    Args:
        dataset (Union[xr.Dataset, str]): The PACE dataset or path to the dataset file.
        latitude (Union[float, Tuple[float, float]]): The latitude or range of
            latitudes to extract data for.
        longitude (Union[float, Tuple[float, float]]): The longitude or range of
            longitudes to extract data for.
        delta (float, optional): The range to add/subtract to the latitude and
            longitude if they are not ranges. Defaults to 0.01.
        return_plot (bool, optional): Whether to return a plot of the data. Defaults to False.
        **kwargs: Additional keyword arguments to pass to the plot function.

    Returns:
        Union[xr.DataArray, plt.figure.Figure]: The mean data over the latitude
            and longitude dimensions, or a plot of this data if return_plot is True.
    """
    if isinstance(latitude, list) or isinstance(latitude, tuple):
        pass
    else:
        latitude = (latitude - delta, latitude + delta)

    if isinstance(longitude, list) or isinstance(longitude, tuple):
        pass
    else:
        longitude = (longitude - delta, longitude + delta)

    ds = filter_tanager(dataset, latitude, longitude, return_plot=False)
    data = ds.mean(dim=["y", "x"])
    if return_plot:
        return data.plot.line(**kwargs)
    else:
        return data

`filter_tanager(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs)` ¶

Filters a Tanager dataset based on latitude and longitude.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset`	The Tanager dataset to filter.	required
`latitude`	`float or tuple`	The latitude to filter by. If a tuple or list, it represents a range.	required
`longitude`	`float or tuple`	The longitude to filter by. If a tuple or list, it represents a range.	required
`drop`	`bool`	Whether to drop the filtered out data. Defaults to True.	`True`

Returns:

Type	Description
`xr.DataArray`	The filtered Tanager data.

Source code in hypercoast/tanager.py

def filter_tanager(
    dataset, latitude, longitude, drop=True, return_plot=False, **kwargs
):
    """
    Filters a Tanager dataset based on latitude and longitude.

    Args:
        dataset (xr.Dataset): The Tanager dataset to filter.
        latitude (float or tuple): The latitude to filter by. If a tuple or list, it represents a range.
        longitude (float or tuple): The longitude to filter by. If a tuple or list, it represents a range.
        drop (bool, optional): Whether to drop the filtered out data. Defaults to True.

    Returns:
        xr.DataArray: The filtered Tanager data.
    """
    if isinstance(latitude, list) or isinstance(latitude, tuple):
        lat_con = (dataset["latitude"] > latitude[0]) & (
            dataset["latitude"] < latitude[1]
        )
    else:
        lat_con = dataset["latitude"] == latitude

    if isinstance(longitude, list) or isinstance(longitude, tuple):
        lon_con = (dataset["longitude"] > longitude[0]) & (
            dataset["longitude"] < longitude[1]
        )
    else:
        lon_con = dataset["longitude"] == longitude

    da = dataset["toa_radiance"].where(lat_con & lon_con, drop=drop, **kwargs)
    da_filtered = da.dropna(dim="y", how="all")
    da_filtered = da_filtered.dropna(dim="x", how="all")

    if return_plot:
        rrs_stack = da_filtered.stack(
            {"pixel": ["y", "x"]},
            create_index=False,
        )
        rrs_stack.plot.line(hue="pixel")
    else:
        return da_filtered

`get_tanager_asset_url(stac_item, asset='ortho_visual')` ¶

Return an asset URL from a Tanager STAC item.

Parameters:

Name	Type	Description	Default
`stac_item`	`dict or str`	STAC item dictionary or item JSON URL.	required
`asset`	`str`	STAC asset key. Defaults to `"ortho_visual"`.	`'ortho_visual'`

Returns:

Type	Description
`str`	The asset href.

Source code in hypercoast/tanager.py

def get_tanager_asset_url(stac_item, asset: str = "ortho_visual") -> str:
    """Return an asset URL from a Tanager STAC item.

    Args:
        stac_item (dict or str): STAC item dictionary or item JSON URL.
        asset (str, optional): STAC asset key. Defaults to ``"ortho_visual"``.

    Returns:
        str: The asset href.
    """
    item = _load_stac_item(stac_item)
    assets = item.get("assets", {})
    if asset not in assets:
        available = sorted(assets.keys())
        raise KeyError(
            f"STAC item has no asset '{asset}'. Available assets: {available}"
        )
    href = assets[asset].get("href")
    if not href:
        raise ValueError(f"STAC item asset '{asset}' has no href.")
    return href

`grid_tanager(dataset, bands=None, wavelengths=None, method='nearest', row_range=None, col_range=None, **kwargs)` ¶

Grids a Tanager dataset based on latitude and longitude.

Parameters:

Name	Type	Description	Default
`dataset`	`xr.Dataset`	The Tanager dataset to grid.	required
`bands`	`list`	The band indices to select. Defaults to None.	`None`
`wavelengths`	`list`	The wavelength values to select. Takes priority over bands. Defaults to None.	`None`
`method`	`str`	The method to use for griddata interpolation. Defaults to "nearest".	`'nearest'`
`row_range`	`tuple`	Row range (start_row, end_row) to subset the data. Defaults to None.	`None`
`col_range`	`tuple`	Column range (start_col, end_col) to subset the data. Defaults to None.	`None`
`**kwargs`		Additional keyword arguments to pass to the xr.Dataset constructor.	`{}`

Returns:

Type	Description
`xr.DataArray`	The gridded Tanager data.

Source code in hypercoast/tanager.py

def grid_tanager(
    dataset,
    bands=None,
    wavelengths=None,
    method="nearest",
    row_range=None,
    col_range=None,
    **kwargs,
):
    """
    Grids a Tanager dataset based on latitude and longitude.

    Args:
        dataset (xr.Dataset): The Tanager dataset to grid.
        bands (list, optional): The band indices to select. Defaults to None.
        wavelengths (list, optional): The wavelength values to select. Takes priority over bands. Defaults to None.
        method (str, optional): The method to use for griddata interpolation.
            Defaults to "nearest".
        row_range (tuple, optional): Row range (start_row, end_row) to subset the data. Defaults to None.
        col_range (tuple, optional): Column range (start_col, end_col) to subset the data. Defaults to None.
        **kwargs: Additional keyword arguments to pass to the xr.Dataset constructor.

    Returns:
        xr.DataArray: The gridded Tanager data.
    """
    from scipy.interpolate import griddata
    from scipy.spatial import ConvexHull

    # Priority: wavelengths > bands > default
    if wavelengths is not None:
        # Use wavelengths directly
        if not isinstance(wavelengths, list):
            wavelengths = [wavelengths]
        selected_wavelengths = wavelengths
    elif bands is not None:
        # Convert bands to wavelengths
        if not isinstance(bands, list):
            bands = [bands]

        selected_wavelengths = []
        for band in bands:
            if isinstance(band, (int, np.integer)) or (
                isinstance(band, float) and band < 500
            ):
                # Treat as band index
                selected_wavelengths.append(
                    dataset.coords["wavelength"].values[int(band)]
                )
            else:
                # Treat as wavelength value
                selected_wavelengths.append(band)
    else:
        # Default to first wavelength
        selected_wavelengths = dataset.coords["wavelength"].values

    # Apply spatial subset filtering if ranges are provided
    if row_range is not None or col_range is not None:
        # Get original array dimensions
        y_size, x_size = dataset.latitude.shape

        # Determine row and column indices
        start_row = row_range[0] if row_range is not None else 0
        end_row = row_range[1] if row_range is not None else y_size
        start_col = col_range[0] if col_range is not None else 0
        end_col = col_range[1] if col_range is not None else x_size

        # Ensure indices are within bounds
        start_row = max(0, min(start_row, y_size))
        end_row = max(start_row, min(end_row, y_size))
        start_col = max(0, min(start_col, x_size))
        end_col = max(start_col, min(end_col, x_size))

        # Subset the dataset using isel for y and x dimensions
        dataset_subset = dataset.isel(
            y=slice(start_row, end_row), x=slice(start_col, end_col)
        )

        # For subsets, return the data directly without interpolation to avoid artifacts
        selected_data_list = []
        for wl in selected_wavelengths:
            data = dataset_subset.sel(wavelength=wl, method="nearest")["toa_radiance"]
            selected_data_list.append(data.values)

        # Stack wavelengths as the last dimension
        gridded_data_3d = np.stack(selected_data_list, axis=-1)

        # Create output dataset with proper coordinates
        lat_subset = dataset_subset.latitude
        lon_subset = dataset_subset.longitude

        # Create coordinate arrays for the subset
        y_coords = np.arange(gridded_data_3d.shape[0])
        x_coords = np.arange(gridded_data_3d.shape[1])

        dataset2 = xr.Dataset(
            {"toa_radiance": (("y", "x", "wavelength"), gridded_data_3d)},
            coords={
                "y": ("y", y_coords),
                "x": ("x", x_coords),
                "wavelength": ("wavelength", selected_wavelengths),
                "latitude": (("y", "x"), lat_subset.values),
                "longitude": (("y", "x"), lon_subset.values),
            },
            **kwargs,
        )

        dataset2["toa_radiance"].rio.write_crs("EPSG:4326", inplace=True)
        return dataset2

    lat = dataset.latitude
    lon = dataset.longitude

    # Find valid data points for any wavelength to define spatial mask
    first_wavelength_data = dataset.sel(
        wavelength=selected_wavelengths[0], method="nearest"
    )["toa_radiance"]
    overall_valid_mask = ~np.isnan(first_wavelength_data.data) & (
        first_wavelength_data.data > 0
    )

    if not np.any(overall_valid_mask):
        # No valid data, return empty grid using valid lat/lon bounds
        valid_lat_data = lat.data[~np.isnan(lat.data)]
        valid_lon_data = lon.data[~np.isnan(lon.data)]

        if len(valid_lat_data) == 0 or len(valid_lon_data) == 0:
            # Fallback to original bounds if no valid subset data
            grid_lat = np.linspace(lat.min().values, lat.max().values, lat.shape[0])
            grid_lon = np.linspace(lon.min().values, lon.max().values, lon.shape[1])
        else:
            grid_lat = np.linspace(
                valid_lat_data.min(), valid_lat_data.max(), lat.shape[0]
            )
            grid_lon = np.linspace(
                valid_lon_data.min(), valid_lon_data.max(), lon.shape[1]
            )

        grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)
        gridded_data_dict = {
            wl: np.full_like(grid_lat_2d, np.nan) for wl in selected_wavelengths
        }
    else:
        # Get valid coordinates for spatial masking
        valid_lat = lat.data[overall_valid_mask]
        valid_lon = lon.data[overall_valid_mask]

        # Create grid based on valid data bounds (considering subset if applied)
        grid_lat = np.linspace(valid_lat.min(), valid_lat.max(), lat.shape[0])
        grid_lon = np.linspace(valid_lon.min(), valid_lon.max(), lon.shape[1])
        grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)

        # For subsets, use simple bounding box instead of convex hull to avoid over-masking
        if row_range is not None or col_range is not None:
            # For subsets, just use bounding box
            inside_hull = (
                (grid_lat_2d >= valid_lat.min())
                & (grid_lat_2d <= valid_lat.max())
                & (grid_lon_2d >= valid_lon.min())
                & (grid_lon_2d <= valid_lon.max())
            )
        else:
            # For full dataset, use convex hull for better edge handling
            try:
                hull = ConvexHull(np.column_stack([valid_lon, valid_lat]))
                from matplotlib.path import Path

                hull_path = Path(
                    np.column_stack(
                        [valid_lon[hull.vertices], valid_lat[hull.vertices]]
                    )
                )
                grid_points = np.column_stack(
                    [grid_lon_2d.flatten(), grid_lat_2d.flatten()]
                )
                inside_hull = hull_path.contains_points(grid_points).reshape(
                    grid_lat_2d.shape
                )
            except Exception:
                # Fallback: use simple bounding box
                inside_hull = (
                    (grid_lat_2d >= valid_lat.min())
                    & (grid_lat_2d <= valid_lat.max())
                    & (grid_lon_2d >= valid_lon.min())
                    & (grid_lon_2d <= valid_lon.max())
                )

        gridded_data_dict = {}
        for wl in selected_wavelengths:
            data = dataset.sel(wavelength=wl, method="nearest")["toa_radiance"]

            # Mask nodata values (both NaN and zero values)
            data_flat = data.data.flatten()
            valid_mask = ~np.isnan(data_flat) & (data_flat > 0)

            if not np.any(valid_mask):
                gridded_data = np.full_like(grid_lat_2d, np.nan)
            else:
                gridded_data = griddata(
                    (lat.data.flatten()[valid_mask], lon.data.flatten()[valid_mask]),
                    data_flat[valid_mask],
                    (grid_lat_2d, grid_lon_2d),
                    method=method,
                    fill_value=np.nan,
                )
                # Apply spatial mask to prevent edge interpolation (only for full dataset)
                if row_range is None and col_range is None:
                    gridded_data[~inside_hull] = np.nan
            gridded_data_dict[wl] = gridded_data

    selected_wavelengths = list(gridded_data_dict.keys())
    # Create a 3D array with dimensions latitude, longitude, and wavelength
    gridded_data_3d = np.dstack(list(gridded_data_dict.values()))

    dataset2 = xr.Dataset(
        {"toa_radiance": (("latitude", "longitude", "wavelength"), gridded_data_3d)},
        coords={
            "latitude": ("latitude", grid_lat),
            "longitude": ("longitude", grid_lon),
            "wavelength": ("wavelength", selected_wavelengths),
        },
        **kwargs,
    )

    dataset2["toa_radiance"].rio.write_crs("EPSG:4326", inplace=True)

    return dataset2

`read_tanager(filepath, bands=None, stac_url=None, wavelengths=None, fwhm=None, product=None, **kwargs)` ¶

Read Planet Tanager HDF5 hyperspectral data and return an xarray.Dataset.

Auto-detects the Tanager product variant from the file contents and sources wavelength metadata from inside the file when available. Supports all four Planet Tanager product variants: basic_radiance, ortho_radiance, basic_sr, and ortho_sr. Surface reflectance products expose their data as surface_reflectance with a toa_radiance alias retained for backward compatibility with the rest of the HyperCoast Tanager helpers; the alias may be removed in a future major release.

Wavelengths are sourced in this precedence: (1) the wavelengths kwarg, (2) a wavelength dataset or attribute inside the HDF5 file, (3) the stac_url kwarg parsed for eo:bands metadata, (4) a synthesized integer index with a UserWarning. No hardcoded STAC URL is used.

Parameters:

Name	Type	Description	Default
`filepath`	`str or os.PathLike`	Local file path or HTTPS URL to the Tanager `.h5` file.	required
`bands`	`array-like`	Indices of spectral bands to keep.	`None`
`stac_url`	`str`	STAC item URL to source wavelength metadata from when the file does not contain it.	`None`
`wavelengths`	`array-like`	Wavelengths in nanometers to use directly. Must have either the full cube band count or, when `bands` is also supplied, the number of selected bands.	`None`
`fwhm`	`array-like`	Full width at half maximum in nanometers to use directly. Same length rules as `wavelengths`.	`None`
`product`	`str`	Force a specific product variant. One of `basic_radiance`, `ortho_radiance`, `basic_sr`, `ortho_sr`.	`None`
`**kwargs`		Extra keyword arguments forwarded to `xr.Dataset`.	`{}`

Returns:

Type	Description
`xr.Dataset`	Dataset with dims `(wavelength, y, x)`, a canonical data variable (`toa_radiance` for radiance products, `surface_reflectance` for SR products, plus a `toa_radiance` alias), and `latitude` / `longitude` coordinates on `(y, x)`.

Exceptions:

Type	Description
`ValueError`	If no 3-D hyperspectral cube can be located in the file.

Source code in hypercoast/tanager.py

def read_tanager(
    filepath,
    bands=None,
    stac_url=None,
    wavelengths=None,
    fwhm=None,
    product=None,
    **kwargs,
):
    """Read Planet Tanager HDF5 hyperspectral data and return an xarray.Dataset.

    Auto-detects the Tanager product variant from the file contents and sources
    wavelength metadata from inside the file when available. Supports all four
    Planet Tanager product variants: ``basic_radiance``, ``ortho_radiance``,
    ``basic_sr``, and ``ortho_sr``. Surface reflectance products expose their
    data as ``surface_reflectance`` with a ``toa_radiance`` alias retained for
    backward compatibility with the rest of the HyperCoast Tanager helpers;
    the alias may be removed in a future major release.

    Wavelengths are sourced in this precedence: (1) the ``wavelengths`` kwarg,
    (2) a wavelength dataset or attribute inside the HDF5 file, (3) the
    ``stac_url`` kwarg parsed for ``eo:bands`` metadata, (4) a synthesized
    integer index with a ``UserWarning``. No hardcoded STAC URL is used.

    Args:
        filepath (str or os.PathLike): Local file path or HTTPS URL to the
            Tanager ``.h5`` file.
        bands (array-like, optional): Indices of spectral bands to keep.
        stac_url (str, optional): STAC item URL to source wavelength metadata
            from when the file does not contain it.
        wavelengths (array-like, optional): Wavelengths in nanometers to use
            directly. Must have either the full cube band count or, when
            ``bands`` is also supplied, the number of selected bands.
        fwhm (array-like, optional): Full width at half maximum in nanometers
            to use directly. Same length rules as ``wavelengths``.
        product (str, optional): Force a specific product variant. One of
            ``basic_radiance``, ``ortho_radiance``, ``basic_sr``, ``ortho_sr``.
        **kwargs: Extra keyword arguments forwarded to ``xr.Dataset``.

    Returns:
        xr.Dataset: Dataset with dims ``(wavelength, y, x)``, a canonical data
        variable (``toa_radiance`` for radiance products, ``surface_reflectance``
        for SR products, plus a ``toa_radiance`` alias), and ``latitude`` /
        ``longitude`` coordinates on ``(y, x)``.

    Raises:
        ValueError: If no 3-D hyperspectral cube can be located in the file.
    """
    if isinstance(filepath, str) and filepath.startswith("https://"):
        filepath = download_file(filepath)

    with h5py.File(filepath, "r") as f:
        layout = _discover_tanager_layout(f, product=product)

        cube = f[layout["data_path"]]
        cube_shape = cube.shape
        band_axis = layout["band_axis"]
        n_bands_total = cube_shape[band_axis]

        # Read only the requested bands from disk so large Tanager scenes do
        # not blow up memory. Fall back to a full read if h5py rejects the
        # index expression (for example, unsorted integer lists).
        if bands is not None:
            index = [slice(None)] * cube.ndim
            index[band_axis] = bands
            try:
                data = cube[tuple(index)]
            except (TypeError, ValueError):
                data = cube[()]
                slicer = [slice(None)] * cube.ndim
                slicer[band_axis] = bands
                data = data[tuple(slicer)]
        else:
            data = cube[()]

        if band_axis != 0:
            data = np.moveaxis(data, band_axis, 0)

        lat_path = layout["lat_path"]
        lon_path = layout["lon_path"]
        if lat_path is not None and lon_path is not None:
            lat = f[lat_path][()]
            lon = f[lon_path][()]
        else:
            lat, lon = _grid_latlon(f, layout)
        if lat is None or lon is None:
            raise ValueError(
                "Could not locate Latitude/Longitude datasets in the Tanager HDF5 file."
            )

        wl_nm_full, fwhm_nm_full = _read_wavelengths_from_hdf5(f, layout, n_bands_total)

    n_bands_selected = data.shape[0]

    if layout["fill_value"] is not None:
        data = np.where(data == layout["fill_value"], np.nan, data.astype(float))
    if layout["scale_factor"] != 1.0 or layout["add_offset"] != 0.0:
        data = data.astype(float) * layout["scale_factor"] + layout["add_offset"]

    def _apply_band_slice(values, expected):
        """Slice a full-length band-aligned array to the selected bands.

        Args:
            values (array-like or None): Values indexed along the band axis.
            expected (int): Expected length before slicing.

        Returns:
            numpy.ndarray or None: Sliced values, or ``None`` if ``values`` is
            ``None``.
        """
        if values is None:
            return None
        arr = np.asarray(values)
        if arr.size == expected and bands is not None:
            arr = arr[bands]
        return arr

    wl_nm = _apply_band_slice(wl_nm_full, n_bands_total)
    fwhm_nm = _apply_band_slice(fwhm_nm_full, n_bands_total)

    if wavelengths is not None:
        wl_nm = np.asarray(wavelengths, dtype=float).ravel()
        if wl_nm.size == n_bands_total and bands is not None:
            wl_nm = wl_nm[bands]
        if wl_nm.size != n_bands_selected:
            raise ValueError(
                f"`wavelengths` has length {wl_nm.size} but {n_bands_selected} "
                f"bands are being read."
            )

    if fwhm is not None:
        fwhm_arr = np.asarray(fwhm, dtype=float).ravel()
        if fwhm_arr.size == n_bands_total and bands is not None:
            fwhm_arr = fwhm_arr[bands]
        if fwhm_arr.size != n_bands_selected:
            raise ValueError(
                f"`fwhm` has length {fwhm_arr.size} but {n_bands_selected} "
                f"bands are being read."
            )
        fwhm_nm = fwhm_arr

    if wl_nm is None and stac_url is not None:
        wl_stac, fwhm_stac = _read_wavelengths_from_stac(
            stac_url, layout["stac_asset_key"]
        )
        if wl_stac.size != n_bands_total:
            raise ValueError(
                f"STAC item reports {wl_stac.size} bands but the data cube has "
                f"{n_bands_total} bands."
            )
        wl_nm = _apply_band_slice(wl_stac, n_bands_total)
        fwhm_nm = _apply_band_slice(fwhm_stac, n_bands_total)

    if wl_nm is None:
        warnings.warn(
            "No wavelength metadata found in the Tanager HDF5 file and no "
            "`stac_url` or `wavelengths` supplied; falling back to integer "
            "band indices. Pass `wavelengths` or `stac_url` for physical nm "
            "values.",
            UserWarning,
            stacklevel=2,
        )
        wl_nm = np.arange(n_bands_selected, dtype=float)

    if fwhm_nm is None:
        fwhm_nm = np.full(n_bands_selected, np.nan)

    data_var_name = layout["data_var_name"]

    coords = {
        "wavelength": wl_nm,
        "fwhm": ("wavelength", fwhm_nm),
        "latitude": (("y", "x"), lat),
        "longitude": (("y", "x"), lon),
    }

    da = xr.DataArray(
        data, dims=("wavelength", "y", "x"), coords=coords, name=data_var_name
    )

    ds = xr.Dataset(
        data_vars={data_var_name: da},
        coords={
            "wavelength": da.wavelength,
            "fwhm": ("wavelength", fwhm_nm),
            "latitude": (("y", "x"), lat),
            "longitude": (("y", "x"), lon),
        },
        attrs={
            "source": "Planet Tanager HDF5",
            "product": layout["product"],
            "stac_item": stac_url or "",
            "data_var": data_var_name,
        },
        **kwargs,
    )

    if data_var_name == "surface_reflectance" and "toa_radiance" not in ds.data_vars:
        ds["toa_radiance"] = ds["surface_reflectance"]

    return ds

`read_tanager_stac(stac_item, asset='ortho_radiance_hdf5', out_dir=None, bands=None, wavelengths=None, fwhm=None, product=None, quiet=True, overwrite=False, **kwargs)` ¶

Download and read a Tanager HDF5 asset from a STAC item.

Parameters:

Name	Type	Description	Default
`stac_item`	`dict or str`	STAC item dictionary or item JSON URL.	required
`asset`	`str`	HDF5 STAC asset key. Defaults to `"ortho_radiance_hdf5"`.	`'ortho_radiance_hdf5'`
`out_dir`	`str`	Directory for the downloaded HDF5 file.	`None`
`bands`	`array-like`	Spectral band indices to keep.	`None`
`wavelengths`	`array-like`	Explicit wavelength values in nm.	`None`
`fwhm`	`array-like`	Explicit FWHM values in nm.	`None`
`product`	`str`	Force a Tanager product variant.	`None`
`quiet`	`bool`	Suppress download output. Defaults to True.	`True`
`overwrite`	`bool`	Overwrite existing files. Defaults to False.	`False`
`**kwargs`		Extra keyword arguments passed to :func:`read_tanager`.	`{}`

Returns:

Type	Description
`xarray.Dataset`	Tanager dataset read from the selected STAC asset.

Source code in hypercoast/tanager.py

def read_tanager_stac(
    stac_item,
    asset: str = "ortho_radiance_hdf5",
    out_dir: Optional[str] = None,
    bands=None,
    wavelengths=None,
    fwhm=None,
    product: Optional[str] = None,
    quiet: bool = True,
    overwrite: bool = False,
    **kwargs,
):
    """Download and read a Tanager HDF5 asset from a STAC item.

    Args:
        stac_item (dict or str): STAC item dictionary or item JSON URL.
        asset (str, optional): HDF5 STAC asset key. Defaults to
            ``"ortho_radiance_hdf5"``.
        out_dir (str, optional): Directory for the downloaded HDF5 file.
        bands (array-like, optional): Spectral band indices to keep.
        wavelengths (array-like, optional): Explicit wavelength values in nm.
        fwhm (array-like, optional): Explicit FWHM values in nm.
        product (str, optional): Force a Tanager product variant.
        quiet (bool, optional): Suppress download output. Defaults to True.
        overwrite (bool, optional): Overwrite existing files. Defaults to False.
        **kwargs: Extra keyword arguments passed to :func:`read_tanager`.

    Returns:
        xarray.Dataset: Tanager dataset read from the selected STAC asset.
    """
    item = _load_stac_item(stac_item)
    stac_url = _get_stac_item_url(item)
    paths = download_tanager(
        item,
        asset=asset,
        out_dir=out_dir,
        quiet=quiet,
        overwrite=overwrite,
    )
    if wavelengths is None or fwhm is None:
        stac_wl, stac_fwhm = _read_wavelengths_from_stac_item(item, asset)
        if wavelengths is None:
            wavelengths = stac_wl
        if fwhm is None:
            fwhm = stac_fwhm
    ds = read_tanager(
        paths[0],
        bands=bands,
        stac_url=stac_url,
        wavelengths=wavelengths,
        fwhm=fwhm,
        product=product,
        **kwargs,
    )
    if product is None and asset in _STAC_ASSET_PRODUCT:
        ds.attrs["product"] = _STAC_ASSET_PRODUCT[asset]
    ds.attrs["stac_asset"] = asset
    ds.attrs["stac_item"] = stac_url or ds.attrs.get("stac_item", "")
    return ds

`search_tanager(bbox=None, temporal=None, collections=None, count=-1, query=None, cloud_percent=None, catalog_url='https://www.planet.com/data/stac/tanager-core-imagery/catalog.json', output=None, crs='EPSG:4326', return_gdf=False, timeout=30, **kwargs)` ¶

Search Planet Tanager STAC sample imagery.

Parameters:

Name	Type	Description	Default
`bbox`	`list`	Bounding box `[xmin, ymin, xmax, ymax]` in EPSG:4326.	`None`
`temporal`	`str or tuple`	Date/time range as `"start/end"`, `"start,end"`, or `(start, end)`.	`None`
`collections`	`str or list`	Tanager collection ids or titles, such as `"coastal-water-bodies"` or `"GHG Plumes"`.	`None`
`count`	`int`	Maximum number of items to return. `-1` means all matching items. Defaults to `-1`.	`-1`
`query`	`str`	Case-insensitive text search against item id, title, description, and location description.	`None`
`cloud_percent`	`float`	Maximum item `cloud_percent`.	`None`
`catalog_url`	`str`	Tanager STAC catalog URL. Planet browser URLs are accepted and normalized to raw JSON URLs.	`'https://www.planet.com/data/stac/tanager-core-imagery/catalog.json'`
`output`	`str`	File path to save a GeoDataFrame when `return_gdf` is True.	`None`
`crs`	`str`	CRS for GeoDataFrame output. Defaults to `"EPSG:4326"`.	`'EPSG:4326'`
`return_gdf`	`bool`	Return `(items, gdf)` instead of only the item list. Defaults to False.	`False`
`timeout`	`int`	HTTP request timeout in seconds.	`30`
`**kwargs`		Additional exact-match filters against STAC item properties, for example `quality_category="test"`.	`{}`

Returns:

Type	Description
`list or tuple`	STAC item dictionaries, or `(items, gdf)` when `return_gdf=True`.

Source code in hypercoast/tanager.py

def search_tanager(
    bbox: Optional[List[float]] = None,
    temporal: Optional[Union[str, Tuple[str, str]]] = None,
    collections: Optional[Union[str, List[str]]] = None,
    count: int = -1,
    query: Optional[str] = None,
    cloud_percent: Optional[float] = None,
    catalog_url: str = TANAGER_STAC_CATALOG_URL,
    output: Optional[str] = None,
    crs: str = "EPSG:4326",
    return_gdf: bool = False,
    timeout: int = 30,
    **kwargs,
) -> Union[List[dict], tuple]:
    """Search Planet Tanager STAC sample imagery.

    Args:
        bbox (list, optional): Bounding box ``[xmin, ymin, xmax, ymax]`` in
            EPSG:4326.
        temporal (str or tuple, optional): Date/time range as
            ``"start/end"``, ``"start,end"``, or ``(start, end)``.
        collections (str or list, optional): Tanager collection ids or titles,
            such as ``"coastal-water-bodies"`` or ``"GHG Plumes"``.
        count (int, optional): Maximum number of items to return. ``-1`` means
            all matching items. Defaults to ``-1``.
        query (str, optional): Case-insensitive text search against item id,
            title, description, and location description.
        cloud_percent (float, optional): Maximum item ``cloud_percent``.
        catalog_url (str, optional): Tanager STAC catalog URL. Planet browser
            URLs are accepted and normalized to raw JSON URLs.
        output (str, optional): File path to save a GeoDataFrame when
            ``return_gdf`` is True.
        crs (str, optional): CRS for GeoDataFrame output. Defaults to
            ``"EPSG:4326"``.
        return_gdf (bool, optional): Return ``(items, gdf)`` instead of only
            the item list. Defaults to False.
        timeout (int, optional): HTTP request timeout in seconds.
        **kwargs: Additional exact-match filters against STAC item properties,
            for example ``quality_category="test"``.

    Returns:
        list or tuple: STAC item dictionaries, or ``(items, gdf)`` when
            ``return_gdf=True``.
    """
    catalog_url = _normalize_stac_url(catalog_url)
    bbox = list(bbox) if bbox is not None else None
    selected_collections = _as_list(collections)
    if selected_collections is not None:
        selected_collections = {str(value).lower() for value in selected_collections}

    has_limit = count is not None and count > -1
    results = []

    if has_limit and count == 0:
        if return_gdf:
            gdf = _stac_items_to_gdf(results, crs=crs)
            if output is not None:
                gdf.to_file(output)
            return results, gdf
        return results

    catalog = _fetch_json(catalog_url, timeout=timeout)

    for link in _stac_links(catalog, "child"):
        if has_limit and len(results) >= count:
            break
        collection_url = _normalize_stac_url(link["href"])
        collection = _fetch_json(collection_url, timeout=timeout)
        collection_id = str(collection.get("id", "")).lower()
        collection_title = str(collection.get("title", "")).lower()
        if selected_collections is not None and not (
            collection_id in selected_collections
            or collection_title in selected_collections
        ):
            continue

        for item_link in _stac_links(collection, "item"):
            if has_limit and len(results) >= count:
                break
            item_url = _normalize_stac_url(item_link["href"])
            item = dict(_fetch_json(item_url, timeout=timeout))
            item["_stac_url"] = item_url
            item["_collection_url"] = collection_url
            item["_collection_title"] = collection.get("title", "")
            if _item_matches_filters(
                item,
                bbox=bbox,
                temporal=temporal,
                query=query,
                cloud_percent=cloud_percent,
                **kwargs,
            ):
                results.append(item)

    if return_gdf:
        gdf = _stac_items_to_gdf(results, crs=crs)
        if output is not None:
            gdf.to_file(output)
        return results, gdf
    return results

`tanager_footprints(bbox=None, temporal=None, collections=None, count=-1, query=None, cloud_percent=None, catalog_url='https://www.planet.com/data/stac/tanager-core-imagery/catalog.json', output=None, crs='EPSG:4326', unique=True, return_items=False, timeout=30, **kwargs)` ¶

Return Tanager STAC item footprints as a GeoDataFrame.

Parameters:

Name	Type	Description	Default
`bbox`	`list`	Bounding box `[xmin, ymin, xmax, ymax]` in EPSG:4326.	`None`
`temporal`	`str or tuple`	Date/time range as `"start/end"`, `"start,end"`, or `(start, end)`.	`None`
`collections`	`str or list`	Tanager collection ids or titles. Defaults to all collections in the Tanager STAC catalog.	`None`
`count`	`int`	Maximum number of matching STAC item records to inspect. `-1` means all. Defaults to `-1`.	`-1`
`query`	`str`	Case-insensitive text search against item id, title, description, and location description.	`None`
`cloud_percent`	`float`	Maximum item `cloud_percent`.	`None`
`catalog_url`	`str`	Tanager STAC catalog URL. Planet browser URLs are accepted and normalized to raw JSON URLs.	`'https://www.planet.com/data/stac/tanager-core-imagery/catalog.json'`
`output`	`str`	File path to save the GeoDataFrame.	`None`
`crs`	`str`	CRS for GeoDataFrame output. Defaults to `"EPSG:4326"`.	`'EPSG:4326'`
`unique`	`bool`	Deduplicate scenes that appear in more than one thematic collection. Defaults to True.	`True`
`return_items`	`bool`	Return `(items, gdf)` instead of only the GeoDataFrame. Defaults to False.	`False`
`timeout`	`int`	HTTP request timeout in seconds.	`30`
`**kwargs`		Additional exact-match filters against STAC item properties.	`{}`

Returns:

Type	Description
`geopandas.GeoDataFrame or tuple`	Footprint GeoDataFrame, or `(items, gdf)` when `return_items=True`.

Source code in hypercoast/tanager.py

def tanager_footprints(
    bbox: Optional[List[float]] = None,
    temporal: Optional[Union[str, Tuple[str, str]]] = None,
    collections: Optional[Union[str, List[str]]] = None,
    count: int = -1,
    query: Optional[str] = None,
    cloud_percent: Optional[float] = None,
    catalog_url: str = TANAGER_STAC_CATALOG_URL,
    output: Optional[str] = None,
    crs: str = "EPSG:4326",
    unique: bool = True,
    return_items: bool = False,
    timeout: int = 30,
    **kwargs,
):
    """Return Tanager STAC item footprints as a GeoDataFrame.

    Args:
        bbox (list, optional): Bounding box ``[xmin, ymin, xmax, ymax]`` in
            EPSG:4326.
        temporal (str or tuple, optional): Date/time range as
            ``"start/end"``, ``"start,end"``, or ``(start, end)``.
        collections (str or list, optional): Tanager collection ids or titles.
            Defaults to all collections in the Tanager STAC catalog.
        count (int, optional): Maximum number of matching STAC item records to
            inspect. ``-1`` means all. Defaults to ``-1``.
        query (str, optional): Case-insensitive text search against item id,
            title, description, and location description.
        cloud_percent (float, optional): Maximum item ``cloud_percent``.
        catalog_url (str, optional): Tanager STAC catalog URL. Planet browser
            URLs are accepted and normalized to raw JSON URLs.
        output (str, optional): File path to save the GeoDataFrame.
        crs (str, optional): CRS for GeoDataFrame output. Defaults to
            ``"EPSG:4326"``.
        unique (bool, optional): Deduplicate scenes that appear in more than
            one thematic collection. Defaults to True.
        return_items (bool, optional): Return ``(items, gdf)`` instead of only
            the GeoDataFrame. Defaults to False.
        timeout (int, optional): HTTP request timeout in seconds.
        **kwargs: Additional exact-match filters against STAC item properties.

    Returns:
        geopandas.GeoDataFrame or tuple: Footprint GeoDataFrame, or
            ``(items, gdf)`` when ``return_items=True``.
    """
    items = search_tanager(
        bbox=bbox,
        temporal=temporal,
        collections=collections,
        count=count,
        query=query,
        cloud_percent=cloud_percent,
        catalog_url=catalog_url,
        return_gdf=False,
        timeout=timeout,
        **kwargs,
    )
    if unique:
        items = _dedupe_tanager_items(items)

    gdf = _stac_items_to_gdf(items, crs=crs)
    if output is not None:
        gdf.to_file(output)

    if return_items:
        return items, gdf
    return gdf

`tanager_to_image(dataset, bands=None, wavelengths=None, method='nearest', row_range=None, col_range=None, output=None, **kwargs)` ¶

Converts an Tanager dataset to an image.

Parameters:

Name	Type	Description	Default
`dataset`	`xarray.Dataset or str`	The dataset containing the EMIT data or the file path to the dataset.	required
`bands`	`array-like`	The specific band indices to select. Defaults to None.	`None`
`wavelengths`	`array-like`	The specific wavelength values to select. Takes priority over bands. Defaults to None.	`None`
`method`	`str`	The method to use for data interpolation. Defaults to "nearest".	`'nearest'`
`row_range`	`tuple`	Row range (start_row, end_row) to subset the data. Defaults to None.	`None`
`col_range`	`tuple`	Column range (start_col, end_col) to subset the data. Defaults to None.	`None`
`output`	`str`	The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.	`None`
`**kwargs`		Additional keyword arguments to be passed to `leafmap.array_to_image`.	`{}`

Returns:

Type	Description
`rasterio.Dataset or None`	The image converted from the dataset. If `output` is provided, the image will be saved to the specified file and the function will return None.

Source code in hypercoast/tanager.py

def tanager_to_image(
    dataset,
    bands=None,
    wavelengths=None,
    method="nearest",
    row_range=None,
    col_range=None,
    output=None,
    **kwargs,
):
    """
    Converts an Tanager dataset to an image.

    Args:
        dataset (xarray.Dataset or str): The dataset containing the EMIT data or the file path to the dataset.
        bands (array-like, optional): The specific band indices to select. Defaults to None.
        wavelengths (array-like, optional): The specific wavelength values to select. Takes priority over bands. Defaults to None.
        method (str, optional): The method to use for data interpolation. Defaults to "nearest".
        row_range (tuple, optional): Row range (start_row, end_row) to subset the data. Defaults to None.
        col_range (tuple, optional): Column range (start_col, end_col) to subset the data. Defaults to None.
        output (str, optional): The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.
        **kwargs: Additional keyword arguments to be passed to `leafmap.array_to_image`.

    Returns:
        rasterio.Dataset or None: The image converted from the dataset. If `output` is provided, the image will be saved to the specified file and the function will return None.
    """
    from leafmap import array_to_image

    if isinstance(dataset, str):
        dataset = read_tanager(dataset, bands=bands)

    grid = grid_tanager(
        dataset,
        bands=bands,
        wavelengths=wavelengths,
        method=method,
        row_range=row_range,
        col_range=col_range,
    )

    data = grid["toa_radiance"]
    data.rio.write_crs("EPSG:4326", inplace=True)

    return array_to_image(data, transpose=False, output=output, **kwargs)

tanager module¶

download_tanager(items, asset='ortho_radiance_hdf5', out_dir=None, quiet=True, overwrite=False, **kwargs) ¶

extract_tanager(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs) ¶

filter_tanager(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs) ¶

get_tanager_asset_url(stac_item, asset='ortho_visual') ¶

grid_tanager(dataset, bands=None, wavelengths=None, method='nearest', row_range=None, col_range=None, **kwargs) ¶

read_tanager(filepath, bands=None, stac_url=None, wavelengths=None, fwhm=None, product=None, **kwargs) ¶

read_tanager_stac(stac_item, asset='ortho_radiance_hdf5', out_dir=None, bands=None, wavelengths=None, fwhm=None, product=None, quiet=True, overwrite=False, **kwargs) ¶

search_tanager(bbox=None, temporal=None, collections=None, count=-1, query=None, cloud_percent=None, catalog_url='https://www.planet.com/data/stac/tanager-core-imagery/catalog.json', output=None, crs='EPSG:4326', return_gdf=False, timeout=30, **kwargs) ¶

tanager_footprints(bbox=None, temporal=None, collections=None, count=-1, query=None, cloud_percent=None, catalog_url='https://www.planet.com/data/stac/tanager-core-imagery/catalog.json', output=None, crs='EPSG:4326', unique=True, return_items=False, timeout=30, **kwargs) ¶

tanager_to_image(dataset, bands=None, wavelengths=None, method='nearest', row_range=None, col_range=None, output=None, **kwargs) ¶

`download_tanager(items, asset='ortho_radiance_hdf5', out_dir=None, quiet=True, overwrite=False, **kwargs)` ¶

`extract_tanager(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs)` ¶

`filter_tanager(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs)` ¶

`get_tanager_asset_url(stac_item, asset='ortho_visual')` ¶

`grid_tanager(dataset, bands=None, wavelengths=None, method='nearest', row_range=None, col_range=None, **kwargs)` ¶

`read_tanager(filepath, bands=None, stac_url=None, wavelengths=None, fwhm=None, product=None, **kwargs)` ¶

`read_tanager_stac(stac_item, asset='ortho_radiance_hdf5', out_dir=None, bands=None, wavelengths=None, fwhm=None, product=None, quiet=True, overwrite=False, **kwargs)` ¶

`search_tanager(bbox=None, temporal=None, collections=None, count=-1, query=None, cloud_percent=None, catalog_url='https://www.planet.com/data/stac/tanager-core-imagery/catalog.json', output=None, crs='EPSG:4326', return_gdf=False, timeout=30, **kwargs)` ¶

`tanager_footprints(bbox=None, temporal=None, collections=None, count=-1, query=None, cloud_percent=None, catalog_url='https://www.planet.com/data/stac/tanager-core-imagery/catalog.json', output=None, crs='EPSG:4326', unique=True, return_items=False, timeout=30, **kwargs)` ¶

`tanager_to_image(dataset, bands=None, wavelengths=None, method='nearest', row_range=None, col_range=None, output=None, **kwargs)` ¶