Summary module¶
Dataset summary and batch extraction helpers.
DatasetSummary
dataclass
¶
Describe a local hyperspectral dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path |
str |
Dataset path. |
required |
exists |
bool |
Whether the path exists. |
required |
sensor |
Optional[str] |
Sensor name used to read the dataset. |
None |
variables |
list[str] |
Data variable names. |
<factory> |
selected_variable |
Optional[str] |
Selected data variable. |
None |
crs |
Optional[str] |
Dataset CRS when discoverable. |
None |
bounds |
Optional[tuple[float, float, float, float]] |
Dataset bounds when discoverable. |
None |
wavelength_count |
int |
Number of spectral coordinates. |
0 |
wavelength_min |
Optional[float] |
Minimum wavelength or band coordinate. |
None |
wavelength_max |
Optional[float] |
Maximum wavelength or band coordinate. |
None |
dimensions |
dict[str, int] |
Dataset dimensions. |
<factory> |
default_rgb |
list[float] |
Default RGB wavelengths from the sensor registry. |
<factory> |
warnings |
list[str] |
Non-fatal summary warnings. |
<factory> |
Source code in hypercoast/summary.py
@dataclass
class DatasetSummary:
"""Describe a local hyperspectral dataset.
Args:
path: Dataset path.
exists: Whether the path exists.
sensor: Sensor name used to read the dataset.
variables: Data variable names.
selected_variable: Selected data variable.
crs: Dataset CRS when discoverable.
bounds: Dataset bounds when discoverable.
wavelength_count: Number of spectral coordinates.
wavelength_min: Minimum wavelength or band coordinate.
wavelength_max: Maximum wavelength or band coordinate.
dimensions: Dataset dimensions.
default_rgb: Default RGB wavelengths from the sensor registry.
warnings: Non-fatal summary warnings.
"""
path: str
exists: bool
sensor: Optional[str] = None
variables: list[str] = field(default_factory=list)
selected_variable: Optional[str] = None
crs: Optional[str] = None
bounds: Optional[tuple[float, float, float, float]] = None
wavelength_count: int = 0
wavelength_min: Optional[float] = None
wavelength_max: Optional[float] = None
dimensions: dict[str, int] = field(default_factory=dict)
default_rgb: list[float] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
def as_dict(self) -> dict[str, Any]:
"""Return a JSON-serializable summary dictionary.
Returns:
dict: Summary fields.
"""
return asdict(self)
as_dict(self)
¶
Return a JSON-serializable summary dictionary.
Returns:
| Type | Description |
|---|---|
dict |
Summary fields. |
Source code in hypercoast/summary.py
def as_dict(self) -> dict[str, Any]:
"""Return a JSON-serializable summary dictionary.
Returns:
dict: Summary fields.
"""
return asdict(self)
extract_spectra_to_csv(sensor, path, points_csv, output, x_column='x', y_column='y', crs='EPSG:4326')
¶
Extract spectra for CSV point coordinates.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
sensor |
str |
Sensor name or alias. |
required |
path |
str | Path |
Input dataset path. |
required |
points_csv |
str | Path |
CSV file containing point coordinates. |
required |
output |
str | Path |
Output long-form CSV path. |
required |
x_column |
str |
X or longitude column. |
'x' |
y_column |
str |
Y or latitude column. |
'y' |
crs |
str |
Coordinate reference system for point coordinates. |
'EPSG:4326' |
Returns:
| Type | Description |
|---|---|
str |
Output CSV path. |
Source code in hypercoast/summary.py
def extract_spectra_to_csv(
sensor: str,
path: str | Path,
points_csv: str | Path,
output: str | Path,
x_column: str = "x",
y_column: str = "y",
crs: str = "EPSG:4326",
) -> str:
"""Extract spectra for CSV point coordinates.
Args:
sensor: Sensor name or alias.
path: Input dataset path.
points_csv: CSV file containing point coordinates.
output: Output long-form CSV path.
x_column: X or longitude column.
y_column: Y or latitude column.
crs: Coordinate reference system for point coordinates.
Returns:
str: Output CSV path.
"""
from .registry import extract_sensor
points = pd.read_csv(points_csv)
if x_column not in points.columns or y_column not in points.columns:
raise ValueError(f"Point CSV must contain '{x_column}' and '{y_column}'.")
dataset = read_sensor(sensor, path)
rows: list[dict[str, Any]] = []
try:
for feature_id, row in points.iterrows():
lon, lat = _to_lon_lat(float(row[x_column]), float(row[y_column]), crs)
spectrum = extract_sensor(sensor, dataset, lat=lat, lon=lon)
wavelengths, values = _spectrum_values(spectrum)
for wavelength, value in zip(wavelengths, values):
rows.append(
{
"feature_id": feature_id,
"x": row[x_column],
"y": row[y_column],
"crs": crs,
"wavelength": wavelength,
"value": value,
"layer": str(path),
"variable": getattr(spectrum, "name", None),
}
)
finally:
close = getattr(dataset, "close", None)
if callable(close):
close()
output_path = Path(output)
output_path.parent.mkdir(parents=True, exist_ok=True)
pd.DataFrame(rows).to_csv(output_path, index=False)
return str(output_path)
subset_dataset(path, output, bbox, sensor=None, variable=None)
¶
Subset a rectilinear local dataset by bounding box and write NetCDF.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path |
str | Path |
Input dataset path. |
required |
output |
str | Path |
Output NetCDF path. |
required |
bbox |
tuple[float, float, float, float] |
Bounding box as |
required |
sensor |
Optional[str] |
Optional sensor name or alias. |
None |
variable |
Optional[str] |
Optional data variable to keep. |
None |
Returns:
| Type | Description |
|---|---|
str |
Output path. |
Source code in hypercoast/summary.py
def subset_dataset(
path: str | Path,
output: str | Path,
bbox: tuple[float, float, float, float],
sensor: Optional[str] = None,
variable: Optional[str] = None,
) -> str:
"""Subset a rectilinear local dataset by bounding box and write NetCDF.
Args:
path: Input dataset path.
output: Output NetCDF path.
bbox: Bounding box as ``(xmin, ymin, xmax, ymax)``.
sensor: Optional sensor name or alias.
variable: Optional data variable to keep.
Returns:
str: Output path.
"""
ds = read_sensor(sensor, path) if sensor else xr.open_dataset(path)
try:
selected = _select_variable(ds, variable)
if variable and selected != variable:
raise ValueError(
f"Variable {variable!r} not found. " f"Available: {list(ds.data_vars)}"
)
if variable and selected:
ds = ds[[selected]]
xmin, ymin, xmax, ymax = bbox
ds = _subset_by_bbox(ds, xmin, ymin, xmax, ymax)
output_path = Path(output)
output_path.parent.mkdir(parents=True, exist_ok=True)
ds.to_netcdf(output_path)
return str(output_path)
finally:
close = getattr(ds, "close", None)
if callable(close):
close()
summarize_dataset(path, sensor=None, variable=None)
¶
Summarize a local dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path |
str | Path |
Local dataset path. |
required |
sensor |
Optional[str] |
Optional sensor name or alias. |
None |
variable |
Optional[str] |
Optional selected data variable. |
None |
Returns:
| Type | Description |
|---|---|
DatasetSummary |
Dataset metadata and warnings. |
Source code in hypercoast/summary.py
def summarize_dataset(
path: str | Path,
sensor: Optional[str] = None,
variable: Optional[str] = None,
) -> DatasetSummary:
"""Summarize a local dataset.
Args:
path: Local dataset path.
sensor: Optional sensor name or alias.
variable: Optional selected data variable.
Returns:
DatasetSummary: Dataset metadata and warnings.
"""
dataset_path = Path(path)
summary = DatasetSummary(path=str(dataset_path), exists=dataset_path.exists())
if not dataset_path.exists():
summary.warnings.append(f"File not found: {dataset_path}")
return summary
handler = None
if sensor:
handler = get_sensor(sensor)
summary.sensor = handler.name
summary.default_rgb = [float(value) for value in handler.default_rgb]
try:
ds = (
read_sensor(sensor, dataset_path)
if sensor
else xr.open_dataset(dataset_path)
)
except Exception as exc:
summary.warnings.append(f"Could not open dataset: {exc}")
return summary
try:
summary.variables = list(ds.data_vars)
summary.dimensions = {name: int(size) for name, size in ds.sizes.items()}
summary.selected_variable = _select_variable(ds, variable)
summary.crs = _dataset_crs(ds)
summary.bounds = _dataset_bounds(ds)
wavelengths = _wavelength_values(ds, summary.selected_variable)
if wavelengths is not None and wavelengths.size > 0:
finite = wavelengths[np.isfinite(wavelengths)]
summary.wavelength_count = int(wavelengths.size)
if finite.size > 0:
summary.wavelength_min = float(np.nanmin(finite))
summary.wavelength_max = float(np.nanmax(finite))
finally:
close = getattr(ds, "close", None)
if callable(close):
close()
if variable and summary.selected_variable != variable:
summary.warnings.append(f"Variable not found or not selectable: {variable}")
return summary