Source code for xcube.core.timeseries

# Copyright (c) 2018-2024 by xcube team and contributors
# Permissions are hereby granted under the terms of the MIT License:
# https://opensource.org/licenses/MIT.

import warnings
from typing import Union, Sequence, Optional, AbstractSet, Set

import numpy as np
import pyproj
import shapely.geometry
import shapely.ops
import shapely.wkt
import xarray as xr

from xcube.core.geom import GeometryLike
from xcube.core.geom import normalize_geometry
from xcube.core.geom import get_dataset_geometry
from xcube.core.geom import mask_dataset_by_geometry
from xcube.core.gridmapping import GridMapping
from xcube.core.select import select_variables_subset
from xcube.util.timeindex import ensure_time_index_compatible
from xcube.util.assertions import assert_instance
from xcube.constants import CRS_CRS84

Date = Union[np.datetime64, str]

AGG_MEAN = "mean"
AGG_MEDIAN = "median"
AGG_STD = "std"
AGG_MIN = "min"
AGG_MAX = "max"
AGG_COUNT = "count"

MUST_LOAD = True
CAN_COMPUTE = False

AGG_METHODS = {
    AGG_MEAN: CAN_COMPUTE,
    AGG_MEDIAN: MUST_LOAD,
    AGG_STD: CAN_COMPUTE,
    AGG_MIN: CAN_COMPUTE,
    AGG_MAX: CAN_COMPUTE,
    AGG_COUNT: CAN_COMPUTE,
}


[docs] def get_time_series( cube: xr.Dataset, grid_mapping: Optional[GridMapping] = None, geometry: Optional[GeometryLike] = None, var_names: Optional[Sequence[str]] = None, start_date: Optional[Date] = None, end_date: Optional[Date] = None, agg_methods: Union[str, Sequence[str], AbstractSet[str]] = AGG_MEAN, use_groupby: bool = False, cube_asserted: Optional[bool] = None, ) -> Optional[xr.Dataset]: """Get a time series dataset from a data *cube*. *geometry* may be provided as a (shapely) geometry object, a valid GeoJSON object, a valid WKT string, a sequence of box coordinates (x1, y1, x2, y2), or point coordinates (x, y). If *geometry* covers an area, i.e. is not a point, the function aggregates the variables to compute a mean value and if desired, the number of valid observations and the standard deviation. *start_date* and *end_date* may be provided as a numpy.datetime64 or an ISO datetime string. Returns a time-series dataset whose data variables have a time dimension but no longer have spatial dimensions, hence the resulting dataset's variables will only have N-2 dimensions. A global attribute ``max_number_of_observations`` will be set to the maximum number of observations that could have been made in each time step. If the given *geometry* does not overlap the cube's boundaries, or if not output variables remain, the function returns ``None``. Args: cube: The xcube dataset grid_mapping: Grid mapping of *cube*. geometry: Optional geometry var_names: Optional sequence of names of variables to be included. start_date: Optional start date. end_date: Optional end date. agg_methods: Aggregation methods. May be single string or sequence of strings. Possible values are 'mean', 'median', 'min', 'max', 'std', 'count'. Defaults to 'mean'. Ignored if geometry is a point. use_groupby: Use group-by operation. May increase or decrease runtime performance and/or memory consumption. cube_asserted: Deprecated and ignored since xcube 0.11.0. No replacement. """ if cube_asserted is not None: warnings.warn( "cube_asserted has been deprecated" " and will be removed soon.", DeprecationWarning, ) assert_instance(cube, xr.Dataset) if grid_mapping is not None: assert_instance(grid_mapping, GridMapping) else: grid_mapping = GridMapping.from_dataset(cube) geometry = normalize_geometry(geometry) if geometry is not None and not grid_mapping.crs.is_geographic: project = pyproj.Transformer.from_crs( CRS_CRS84, grid_mapping.crs, always_xy=True ).transform geometry = shapely.ops.transform(project, geometry) dataset = select_variables_subset(cube, var_names) if len(dataset.data_vars) == 0: return None if start_date is not None or end_date is not None: date_slice = slice(start_date, end_date) safe_slice = ensure_time_index_compatible(dataset, date_slice) dataset = dataset.sel(time=safe_slice) x_name, y_name = grid_mapping.xy_dim_names if isinstance(geometry, shapely.geometry.Point): bounds = get_dataset_geometry(dataset) if not bounds.contains(geometry): return None indexers = {x_name: geometry.x, y_name: geometry.y} dataset = dataset.sel(**indexers, method="Nearest") return dataset.assign_attrs(max_number_of_observations=1) agg_methods = normalize_agg_methods(agg_methods) if geometry is not None: dataset = mask_dataset_by_geometry( dataset, geometry, save_geometry_mask="__mask__" ) if dataset is None: return None mask = dataset["__mask__"] max_number_of_observations = np.count_nonzero(mask) dataset = dataset.drop_vars(["__mask__"]) else: max_number_of_observations = dataset[y_name].size * dataset[x_name].size must_load = len(agg_methods) > 1 or any( AGG_METHODS[agg_method] == MUST_LOAD for agg_method in agg_methods ) if must_load: dataset.load() agg_datasets = [] if use_groupby: time_group = dataset.groupby("time") for agg_method in agg_methods: method = getattr(time_group, agg_method) if agg_method == "count": agg_dataset = method(dim=xr.ALL_DIMS) else: agg_dataset = method(dim=xr.ALL_DIMS, skipna=True) agg_datasets.append(agg_dataset) else: for agg_method in agg_methods: method = getattr(dataset, agg_method) if agg_method == "count": agg_dataset = method(dim=(y_name, x_name)) else: agg_dataset = method(dim=(y_name, x_name), skipna=True) agg_datasets.append(agg_dataset) agg_datasets = [ agg_dataset.rename( name_dict={v: f"{v}_{agg_method}" for v in agg_dataset.data_vars} ) for agg_method, agg_dataset in zip(agg_methods, agg_datasets) ] ts_dataset = xr.merge(agg_datasets) ts_dataset = ts_dataset.assign_attrs( max_number_of_observations=max_number_of_observations ) return ts_dataset
def normalize_agg_methods( agg_methods: Union[str, Sequence[str]], exception_type=ValueError ) -> Set[str]: agg_methods = agg_methods or [AGG_MEAN] if isinstance(agg_methods, str): agg_methods = [agg_methods] agg_methods = set(agg_methods) invalid_agg_methods = agg_methods - set(AGG_METHODS.keys()) if invalid_agg_methods: s = "s" if len(invalid_agg_methods) > 1 else "" raise exception_type( f"invalid aggregation method{s}:" f' {", ".join(sorted(list(invalid_agg_methods)))}' ) return agg_methods