Source code for pywasp.wasp.binned_wind_climate

"""Binned Wind Climate Module

Extends windkit Binned Wind Climate module to add a function for creating a weibull
distribution out of the binned wind climate using the WAsP method. It contains a much
faster fortran based binning routine that can be used for example in combination with
mesoscale model output, where speed is crucial.

It also contains functions for calculating stability information efficiently using
a binning algorithm which computes the conditional mean stability based on the wind
histogram in a single pass without storing the whole time series of wind speed.
"""

__all__ = [
    "bwc_from_tswc",
    "calc_temp_scale",
    "stability_histogram",
    "create_histogram_z0",
    "mean_z0",
    "baroclinicity_histogram",
    "bwc_resample_like",
    "bwc_resample_sectors",
    "bwc_resample_wsbins_like",
]

import logging
import numpy as np
import xarray as xr

from windkit.wind_climate.binned_wind_climate import (
    _create_time_attributes,
    _validate_bwc_wrapper_factory,
    _bwc_from_counts,
)
from windkit.wind_climate.time_series_wind_climate import (
    _validate_tswc_wrapper_factory,
    WS,
    WD,
    DIM_TIME,
)
from windkit import create_sector_coords
from windkit import create_wsbin_coords
from windkit.spatial import (
    _spatial_stack,
    _spatial_unstack,
    to_point,
    get_spatial_struct,
    set_crs,
    interp_unstructured_like,
    are_spatially_equal,
    _to_spatial_struct,
)
from windkit.xarray_structures.metadata import (
    _BWC_ATTRS,
    _HIS_ATTRS,
    _HIS_STAB_ATTRS,
    _TS_STAB_ATTRS,
    _MEAN_STAB_ATTRS,
    _HIS_BARO_ATTRS,
    _MEAN_BARO_ATTRS,
    _update_var_attrs,
    _ALL_VARS_META,
    _update_history,
)

from pywasp.core import Rvea0288, Rvea0326  # pylint:disable=no-name-in-module

logger = logging.getLogger(__name__)


# fortran calls
_CREATE_HIST = Rvea0288.histograms.add_histogram_points
_CALC_TSCALE = Rvea0326.calc_points.tstar_points
_NORM_HIST = Rvea0288.histograms.normalize_histogram_points
_COND_HIST = Rvea0288.histograms.conditional_hist
_MEAN_HIST = Rvea0288.histograms.mean_from_hist
_STD_HIST = Rvea0288.histograms.std_from_hist
_RESAMPLE_HIST = Rvea0288.histograms.histogram_resample_sectors
_RESAMPLE_HIST_WSBINS = Rvea0288.histograms.histogram_resample_wsbins
_RESAMPLE_HIST_WSBINS_AK = Rvea0288.histograms.histogram_resample_wsbins_ak
_DGDZ = Rvea0288.baroclin.compute_dgdz
_DGDZ_TIME = Rvea0288.baroclin.compute_dgdz_t

# constants
RTD = 180 / np.pi



[docs]
@_validate_tswc_wrapper_factory(run_extra_checks=False)
def bwc_from_tswc(
    ds,
    hist=None,
    normalize=True,
    revert_to_original=True,
    wsbin_width=1,
    n_wsbins=30,
    n_sectors=12,
):
    """Add timeseries to histogram

    Converts a time series with wind speed & direction to derive
    summed histogram of the wind vector.

    .. warning::
        This function is experimental and its signature may change.

    Parameters
    ----------
    ds: xr.Dataset
        PyWAsP Timeseries dataset. Can have any of the pywasp
        spatial data structures
    hist: xr.Dataset, optional
        Histogram with dimensions point, wsbin, sector
        containing a count, Default is None, creating a
        histogram using wsbin_width, n_wsbins, and n_sectors
    normalize: boolean
        Normalize histogram for each sector, storing the frequency per sector in "wdfreq"
        variable, Default True
    revert_to_original: boolean
        Return the histogram input in the same spatial struction as ds? If false, keeps
        the data is kept in stacked point, which is more efficient when doing large
        calculations for numerical wind atlases.
    wsbin_width: float
        width of wind speed bins
    n_wsbins: int
        Number of wind speed bins
    n_sectors: int
        Number of sectors (wind direction bins)

    Returns
    -------
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
        with the values from ds-timeseries added.
    """
    logger.debug(
        f"Convert time series ({ds[WS].sizes[DIM_TIME]} samples) into histogram"
    )

    # 1. Determine bins
    if hist is not None:
        wd_bins = hist.sector.values
        ws_bins = hist.wsbin.values
    else:
        wd_bins = create_sector_coords(n_sectors)
        ws_bins = create_wsbin_coords(n_wsbins, width=wsbin_width)

    # 2. Stack and Prepare Time Series Data
    struct_in = get_spatial_struct(ds)
    ds = to_point(ds).transpose(..., DIM_TIME, "point")

    # 3. Prepare Output Arrays
    if hist is None:
        # Create (wsbin,sector,point) F-contiguous
        hist_arr = np.zeros(
            (ws_bins.size, wd_bins.size, ds.point.size), dtype=np.float32, order="F"
        )
        ierr_arr = np.zeros((ds.point.size,), dtype=np.int32, order="F")
    else:
        # Ensure F-contiguous (wsbin, sector, point)
        hist = to_point(hist)
        tdims = (..., "wsbin", "sector", "point")
        hist_arr = (
            hist["wv_count"]
            .transpose(*tdims)
            .astype(np.float32, order="F", copy=False)
            .data
        )
        ierr_arr = (
            hist["count_bins_exceeded"].astype(np.int32, order="F", copy=False).data
        )

    # 4. Call Fortran
    _CREATE_HIST(
        ds[WS].data,
        ds[WD].data,
        0,
        ws_bins,
        wd_bins,
        np.full(ds[WS].shape, 1.0, order="F", dtype=np.float32),
        hist_arr,
        ierr_arr,
    )

    nr_fails = ierr_arr[ierr_arr > 0].shape[0]
    if nr_fails > 0:
        logger.warning(
            f"""The wind speeds you provided
        exceeded the range used for binning ({np.max(ws_bins.values)})
        in {nr_fails} points"""
        )

    # 5. Wrap result
    hist = _wv_count_ds(hist_arr, ierr_arr, ws_bins, wd_bins, ds[WS].point)

    dic_attrs = _create_time_attributes(ds, hist=hist)
    hist = hist.assign_attrs(dic_attrs).drop_vars("point", errors="ignore")

    if normalize:
        hist = _normalize_hist(hist)

    if revert_to_original:
        hist = _to_spatial_struct(hist, struct_in)

    hist = hist.assign_coords({"sector": wd_bins, "wsbin": ws_bins})
    attrs = _BWC_ATTRS if normalize else _HIS_ATTRS
    hist = _update_var_attrs(hist, attrs)

    return _update_history(hist)



def _wv_count_ds(hist, ierr, ws_bins_coord, wd_bins_coord, point_coords):
    """
    Build a WAsP-style histogram xarray Dataset from raw numpy arrays.

    Parameters
    ----------
    hist : ndarray
        3D numpy array with histogram counts shaped (wsbin, sector, point).
    ierr : ndarray
        1D integer array with per-point counts of samples outside bins.
    ws_bins_coord : ndarray or DataArray
        Coordinates (centers) for wind speed bins.
    wd_bins_coord : ndarray or DataArray
        Coordinates (centers) for wind direction sectors.
    point_coords : ndarray or DataArray
        Coordinates for spatial points.

    Returns
    -------
    xr.Dataset
        Dataset containing:
        - wv_count: DataArray of shape (wsbin, sector, point) with the histogram counts.
        - count_bins_exceeded: 1D DataArray per point with the number of samples outside bins.
        The Dataset will have coords 'wsbin', 'sector', and 'point'.
    """
    hist = (
        xr.DataArray(
            hist,
            dims=("wsbin", "sector", "point"),
            coords={
                "wsbin": ws_bins_coord,
                "sector": wd_bins_coord,
                "point": point_coords,
            },
        )
    ).to_dataset(name="wv_count")
    hist = hist.assign_coords(point=point_coords)
    hist["count_bins_exceeded"] = xr.DataArray(
        ierr, dims=("point"), coords={"point": point_coords}
    )

    return hist


def _create_hist(wspd, wdir, isuv, wsbin, wdsec, add, wv_count, count_bins_exceeded):
    """creating histogram from wind speed and wind dir

    Wrapper around fortran call that loops over all points
    and generates 2d histograms from wind speed and direction.
    If the wind speed or wind direction is outside of the range
    used for binning it adds 1 to `count_bins_exceeded` for each point

    Parameters
    ----------
    wspd: array of float
        Array with wind speeds dimensions (`time`, `point`)
    wdir: array of float
        Array with meteorological wind direction dimensions (`time`, `point`)
    isuv: int {0,1}
        Are the input values wind speed / wind direction (0)
        or u / v (1)?
    wsbin: array of float
        center of wind speed bins used for creating histogram
    wdsec: array of float
        center of wind direction bins used for creating histogram
    add: array of float
        Array with values that will be added to histogram at each point
    wv_count: array of float
        Array with histogram with dimensions #wsbin, #wdsec, #point
    count_bins_exceeded: array of int
        Array with dimension point

    Returns
    -------
    wv_count: array of float
        Array with histogram with dimensions #wsbin, #wdsec, #point
    count_bins_exceeded: array of int
        Array with dimension point
    """
    _CREATE_HIST(wspd, wdir, isuv, wsbin, wdsec, add, wv_count, count_bins_exceeded)
    return (wv_count, count_bins_exceeded)



[docs]
def calc_temp_scale(ds, revert_to_original=True, enhmix=1.0, use_humidity=False):
    """Calculate temperature scale

    This calculates a surface layer temperature scale that can
    be used to model the wind profile. Using the enhmix can be used
    to control mixing over land, because ERA5 has a large bias
    in $u_*$ over land. If you supply enhmix you will have to
    add a landmask because it is only applied over land. If use_humidity
    is true the humidity is used for calculating the air density.

    .. warning::
        This function is experimental and its signature may change.

    Parameters
    ----------
    ds : xarray.Dataset
        Dataset containing variables
        ["PSFC", "T2", "UST", "HFX", "LH", "PBLH"]
        Can have any pywasp spatial structure
    revert_to_original: boolean
        Return the histogram input in original format from the
        input :class:`xarray.Dataset`? If false, keeps the data in stacked point
        format which is more efficient when doing large
        calculations for numerical wind atlases.
    enhmix : float
        Decrease mixing over land with this factor. If different from
        1.0, requires a land mask variable "LANDMASK" since it is only
        applied over land.
    use_humidity: bool
        If True, use the humdity for calculating the air density. If
        true, requires variable "elevation" (for the terrain elevation)
        and "Q2" to be present on the dataset. By default False

    Returns
    -------
    hist : xr.DataArray
        DataArray 'temp_scale' with same dimensions as input
    """
    dss = _spatial_stack(ds, copy=revert_to_original).transpose(..., DIM_TIME, "point")

    req_vars = ["PSFC", "T2", "UST", "HFX", "LH", "PBLH"]
    if enhmix != 1.0:
        req_vars = req_vars + ["LANDMASK"]
    if use_humidity:
        req_vars = req_vars + ["elevation", "Q2"]

    if "LANDMASK" not in req_vars:
        logger.debug("Adding dummy variable LANDMASK")
        # land mask not required in dataset, so we just create a dummy array
        dss["LANDMASK"] = xr.full_like(dss["point"], 0)

    if "Q2" not in req_vars:
        # humidity not required in dataset, so we just create a dummy array
        logger.debug("Adding dummy variables Q2 and elevation")
        dss["elevation"] = xr.full_like(dss["point"], 0)
        dss["Q2"] = xr.full_like(dss["UST"], 0)

    try:
        logger.debug("Calculating temperature scale")
        tstar = _CALC_TSCALE(
            dss["PSFC"],
            dss["T2"],
            dss["UST"],
            dss["HFX"],
            dss["LH"],
            dss["PBLH"],
            dss["elevation"],
            dss["Q2"],
            dss["LANDMASK"],
            enhmix,
            int(use_humidity),
        )
    except KeyError as e:
        raise KeyError(f"Required variable {e} not found in dataset!")

    dss["temp_scale"] = (dss.UST.dims, tstar)
    dss = dss[["temp_scale"]]
    _update_var_attrs(dss, _TS_STAB_ATTRS)

    if revert_to_original:
        res = _spatial_unstack(dss)["temp_scale"]
    else:
        res = dss["temp_scale"]
    return _update_history(res)




[docs]
def stability_histogram(
    ds,
    hist=None,
    finalize=True,
    revert_to_original=True,
    wsbin_width=1,
    n_wsbins=40,
    n_sectors=12,
    percentile=0.5,
    wv_count=None,
    landmask=None,
):
    """Add timeseries to existing histogram

    Uses the stability parameters "temp_scale" and "ustar_over_pblh".
    See the paper "Using observed and modelled heat fluxes for
    improved extrapolation of wind distributions" for the definitions
    of these variables. The temp_scale variable is defined as $T_*$
    in that paper, whereas the "ustar_over_pblh" should be defined as
    $u_*/pblh$ as input for this function. This is done to give priority
    to the smallest pblh that are most important for the wind profile.
    The output is reversed ($**-1$) to give the input format as required
    by WAsP ($pblh/u_*$).
    The time dimension is copied so that one can see what was the
    period that was used to generate the histogram.
    This method is optimized for large datasets, where storing
    all values in memory is not possible. It uses a binning so that
    the stability parameters are calculated as a conditional mean (on the
    wind speed and direction) in a single pass through the data.
    The histogram is created if not provided, but it can also be
    provided to add more data to an existing histogram. The idea
    is than that you sum to the existing histogram and then
    finalize at the end using finalize=True. When using finalize=True,
    the histogram is converted into a mean and standard deviation
    of the stability parameters per wind speed and direction bin. To
    calculate both sea and land stability, you will have to provide
    a landmask so that the finalize step can separate between sea
    and land points.

    .. warning::
        This function is experimental and its signature may change.

    Parameters
    ----------
    ds : xarray.Dataset
        Dataset containing variables ['wind_speed', 'wind_direction',
        'temp_scale', 'ustar_over_pblh']
        Can have any pywasp spatial structure
    hist : xarray.Dataset
        Histogram with dimensions point, wsbin, sector
    finalize : boolean
        Convert the stabilility histogram into a mean and
        standard deviation?
    revert_to_original : boolean
        Return the histogram input in original format from the
        input ``ds``? If false, keeps the data in stacked point
        format which is more efficient when doing large
        calculations for numerical wind atlases.
    wsbin_width : float
        width of wind speed bins
    n_wsbins : int
        Number of wind speed bins
    n_sectors : int
        Number of sectors (wind direction bins)
    percentile : float
        Percentile of the wind speed distribution to use for
        calculating the stability parameters. Default is 0.5,
        which is the median.
    wv_count : xarray.Dataset, optional
        Histogram with dimensions point, wsbin, sector
        containg a count. If provided, this histogram is used
        for the finalization step. If None, a new histogram
        is created from the input ``ds``. Default is None.
    landmask: xr.DataArray
        Landmask with values 0 (sea) and 1 (land)

    Returns
    -------
    hist : xarray.Dataset
        Histogram with dimensions point, wsbin, sector
        with the values from ds-timeseries added.

    Raises
    ------
    ValueError
        If landmask is provided, but does not have the same
        spatial structure as ds.
    ValueError
        If hist is provided, but does not have the same
        spatial structure as ds.
    ValueError
        If landmask is provided, finalize=True and
        there are only land or sea points in ds.
    """
    if landmask is not None:
        if not are_spatially_equal(ds, landmask):
            raise ValueError(
                "Input ds and landmask must have the same spatial structure"
            )

    if hist is not None:
        if are_spatially_equal(ds, hist) is not False:
            raise ValueError(
                "Source histogram provided with argument hist must have the same spatial structure"
            )

    if hist is None:
        hist = _init_histogram_stab(ds, wsbin_width, n_wsbins, n_sectors)

    ds_sub = ds[[WS, WD, "temp_scale", "ustar_over_pblh"]]
    dss = _spatial_stack(ds_sub, copy=revert_to_original).transpose(
        ..., DIM_TIME, "point"
    )
    hist = _spatial_stack(hist, copy=revert_to_original).transpose(
        ..., "wsbin", "sector", "point"
    )

    # error arrays are inout in fortran, so we must create copies
    # to not add the error counter 3 times
    ierr_sum_squared_temp_scale = hist["count_bins_exceeded"].copy().values
    ierr_sum_pblh = hist["count_bins_exceeded"].copy().values

    sum_temp_scale, count_bins_exceeded = _create_hist(
        dss[WS],
        dss[WD],
        0,
        hist.wsbin,
        hist.sector,
        dss["temp_scale"],
        hist["sum_temp_scale"].values,
        hist["count_bins_exceeded"].values,
    )
    sum_squared_temp_scale, count_bins_exceeded = _create_hist(
        dss[WS],
        dss[WD],
        0,
        hist.wsbin,
        hist.sector,
        dss["temp_scale"] ** 2,
        hist["sum_squared_temp_scale"].values,
        ierr_sum_squared_temp_scale,
    )
    sum_pblh, count_bins_exceeded = _create_hist(
        dss[WS],
        dss[WD],
        0,
        hist.wsbin,
        hist.sector,
        dss["ustar_over_pblh"],
        hist["sum_pblh"].values,
        ierr_sum_pblh,
    )

    var_names = (
        "sum_temp_scale",
        "sum_squared_temp_scale",
        "sum_pblh",
        "count_bins_exceeded",
    )

    for name in var_names:
        hist[name] = (hist[name].dims, eval(name))
        _update_var_attrs(hist[name], _HIS_STAB_ATTRS)
        hist[name].attrs["_pwio_data_is_2d"] = True

    dic_attrs = _create_time_attributes(ds_sub, hist=hist)
    hist = hist.assign_attrs(dic_attrs)

    if finalize:
        if wv_count is None:
            wv_count = bwc_from_tswc(
                dss,
                normalize=False,
                revert_to_original=False,
                wsbin_width=wsbin_width,
                n_wsbins=n_wsbins,
                n_sectors=n_sectors,
            )
        else:
            wv_count = bwc_from_tswc(
                dss, hist=wv_count, normalize=False, revert_to_original=False
            )

        res = _finalize_meso(
            hist, wv_count["wv_count"], landmask, percentile=percentile
        )
        return _update_history(res)

    res = _spatial_unstack(hist) if revert_to_original else hist
    return _update_history(res)



def _init_histogram_stab(ds, wsbin_width=1, nws=40, nwd=12):
    """Initialize histogram

    Initialize histogram with the variables needed for
    a stability histogram. The dimensions of the spatial
    structure are the same as the input ds and extended with
    dimensions nws, nwd.

    Parameters
    ----------
    ds: xr.Dataset
        Can have any of the pywasp spatial data structures
    var_dic: dict
        Dictonairy contain the variables that will be added to
        the xarray container, including meta-data according to
        the CF conventions
    wsbin: float
        width of wind speed bins
    wdsec: float
        width of wind direction bins
    nws: int
        Number of wind speed bins
    nwd: int
        Number of wind direction bins

    Returns
    -------
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
    """
    hist = _spatial_stack(ds, copy=False)[["point"]]
    var_names = ("sum_temp_scale", "sum_squared_temp_scale", "sum_pblh")
    for var in var_names:
        hist[var] = _init_histogram(ds, var, nwd=nwd, nws=nws, wsbin_width=wsbin_width)
    hist["count_bins_exceeded"] = xr.zeros_like(hist["point"], dtype=np.int32)

    return hist


def _init_histogram(ds, name, nwd=12, nws=None, wsbin_width=None):
    """Initialize generic histogram

    Initialize histogram with the variables needed for
    a wind speed and wind direction dependent histogram. The dimensions
    of the spatial structure are the same as the input ds and extended
    with dimensions nws, nwd.

    Parameters
    ----------
    ds: `xarray.Dataset`
        Can have any of the pywasp spatial data structures
    name: str
        Name of the `xarray.DataArray` that is created
    nwd: int
        Number of wind direction bins
    nws: int
        Number of wind speed bins
    wsbin_width: float
        width of wind speed bins

    Returns
    -------
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
    """
    all_dims = (_spatial_stack(ds, copy=False)["point"],)
    all_dims = all_dims + (create_sector_coords(nwd),)
    if wsbin_width is not None and nws is not None:
        all_dims = all_dims + (create_wsbin_coords(nws, width=wsbin_width),)
    template = xr.broadcast(*all_dims)
    # combined point, ws_bin, wd_bin array filled with zeroes
    hist = xr.zeros_like(template[0], dtype="float32")
    # make sure we copy auxilary coordinates from sector and wind speed bins
    for i in template[1:]:
        hist = hist.assign_coords(i.coords)
    hist.name = name
    try:
        _update_var_attrs(hist, {**_ALL_VARS_META})
    except KeyError:
        logging.warning("No metadata could be added to the variable!")

    return hist


def _init_histogram_wind(ds, wsbin_width, nws, nwd):
    """Initialize histogram

    Initialize histogram with the variables needed for
    a wind vector count histogram. The dimensions of the spatial
    structure are the same as the input ds and extended with
    dimensions nws, nwd.

    Parameters
    ----------
    ds: xr.Dataset
        Can have any of the pywasp spatial data structures
    wsbin_width: float
        width of wind speed bins
    nws: int
        Number of wind speed bins
    nwd: int
        Number of wind direction bins

    Returns
    -------
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
    """
    hist = _spatial_stack(ds, copy=False)[["point"]]

    hist["wv_count"] = _init_histogram(
        ds, "wv_count", nwd=nwd, nws=nws, wsbin_width=wsbin_width
    )
    hist["count_bins_exceeded"] = xr.zeros_like(hist["point"], dtype=np.int32)

    return hist


def _stab_mean_std(hist, wv_count, percentile=0.5):
    """Compute mean or standard deviation from histogram

    Compute mean or standard deviation from histogram. The
    variables are binned by sector and wind speed bin and
    a mean or std. dev. is calulcated based on the highest
    percentile of wind speeds for each sector.

    Parameters
    ----------
    hist: xr.Dataset
        Dataset with variables `sum_temp_scale`, `sum_squared_temp_scale`
        and `sum_pblh` and dimensions `wsbin`, `sector`

    Returns
    -------
    clim: xr.Dataset
        Dataset with mean mesoclimate variables per sector
    """
    hist["wv_count"] = wv_count
    hist = _spatial_stack(hist, copy=False)
    cond_hist, freq = _COND_HIST(hist.sum_pblh, hist.wv_count, percentile)
    pblhmean = _MEAN_HIST(cond_hist, freq)

    cond_hist, freq = _COND_HIST(hist.sum_temp_scale, hist.wv_count, percentile)
    cond_hist2, freq = _COND_HIST(
        hist.sum_squared_temp_scale, hist.wv_count, percentile
    )
    tstarstd = _STD_HIST(cond_hist, cond_hist2, freq)
    tstarmean = _MEAN_HIST(cond_hist, freq)

    clim = hist[["point", "sector"]]
    clim["mean_pblh_scale"] = (("sector", "point"), 1 / pblhmean)
    clim["rms_temp_scale"] = (("sector", "point"), tstarstd)
    clim["mean_temp_scale"] = (("sector", "point"), tstarmean)

    return _spatial_unstack(clim)


def create_histogram_z0(
    ds,
    hist=None,
    finalize=True,
    revert_to_original=True,
    wsbin_width=1,
    n_wsbins=40,
    n_sectors=12,
    percentile=0.5,
    wv_count=None,
    landmask=None,
):
    """Add timeseries to existing histogram

    Uses the stability parameters to derive
    summed histogram of the surface layer temperature scale,
    squared surface layer temperature scale, boundary
    layer height and logarithm of roughness length. The time
    dimension is copied so that one can see what was the
    period that was used to generate the histogram.

    .. warning::
        This function is experimental and its signature may change.

    Parameters
    ----------
    ds: xr.Dataset
        Dataset containing variables
        ['wind_speed', 'wind_direction', 'LANDMASK', 'ZNT']
        Can have any pywasp spatial structure
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
    finalize: boolean
        Convert the stabilility histogram into a mean and
        standard deviation?
    revert_to_original: boolean
        Return the histogram input in original format from the
        input `ds`? If false, keeps the data in stacked point
        format which is more efficient when doing large
        calculations for numerical wind atlases.
    wsbin_width: float
        width of wind speed bins
    n_wsbins: int
        Number of wind speed bins
    n_sectors: int
        Number of sectors (wind direction bins)
    kwargs:
        Other keyword arguments passed on to _finalize_meso

    Returns
    -------
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
        with the values from ds-timeseries added.
    """
    if hist is None:
        hist = _spatial_stack(ds, copy=False)[["point"]]
        hist["z0sum"] = _init_histogram(
            ds[["ZNT"]], "z0sum", n_sectors, n_wsbins, wsbin_width
        )
        hist["count_bins_exceeded"] = xr.zeros_like(hist["point"], dtype=np.int32)

    ds_sub = ds[[WS, WD, "ZNT"]]
    dss = _spatial_stack(ds_sub, copy=False).transpose(..., DIM_TIME, "point")
    hist = _spatial_stack(hist, copy=False).transpose(..., "wsbin", "sector", "point")

    z0sum, count_bins_exceeded = _create_hist(
        dss[WS],
        dss[WD],
        0,
        hist.wsbin,
        hist.sector,
        np.log(dss["ZNT"]),
        hist["z0sum"].values,
        hist["count_bins_exceeded"].values,
    )
    hist["z0sum"] = (("wsbin", "sector", "point"), z0sum)
    hist["count_bins_exceeded"] = (("point"), count_bins_exceeded)
    dic_attrs = _create_time_attributes(ds_sub, hist=hist)
    hist = hist.assign_attrs(dic_attrs).drop_vars("point", errors="ignore")

    if finalize:
        if wv_count is None:
            wv_count = bwc_from_tswc(
                dss,
                normalize=False,
                revert_to_original=False,
                wsbin_width=wsbin_width,
                n_wsbins=n_wsbins,
                n_sectors=n_sectors,
            )
        else:
            wv_count = bwc_from_tswc(
                dss, hist=wv_count, normalize=False, revert_to_original=False
            )
        res = _z0_mean(hist, wv_count["wv_count"], landmask, percentile=percentile)
        return _update_history(res)

    res = _spatial_unstack(hist) if revert_to_original else hist
    return _update_history(res)


def _z0_mean(hist, wv_count, landmask, percentile=0.5):
    """Compute mean or standard deviation from histogram

    Compute mean or standard deviation from histogram. The
    variables are binned by sector and wind speed bin and
    a mean or std. dev. is calulcated based on the highest
    percentile of wind speeds for each sector.

    Parameters
    ----------
    hist: xr.Dataset
        Dataset with variables `sum_temp_scale`, `sum_squared_temp_scale`
        and `sum_pblh` and dimensions `wsbin`, `sector`

    Returns
    -------
    clim: xr.Dataset
        Dataset with mean mesoclimate variables per sector
    """
    hist["wv_count"] = wv_count
    hist["landmask"] = landmask
    hist = _spatial_stack(hist, copy=False)
    cond_hist, wd_count = _COND_HIST(hist["z0sum"], hist["wv_count"], percentile)
    z0mean = _MEAN_HIST(cond_hist, wd_count)
    hist["logz0sec"] = (("sector", "point"), z0mean)
    hist["wd_count"] = (("sector", "point"), wd_count)
    hist["mean_z0"] = mean_z0(hist)
    hist["mean_z0"] = xr.where(hist["landmask"] == 0, 0.0, hist["mean_z0"])
    return _spatial_unstack(hist)


def mean_z0(hist):
    """Compute mean roughness

    Compute mean roughness from log(z0) per wsbin and
    sector

    .. warning::
        This function is experimental and its signature may change.

    Parameters
    ----------
    hist: xr.Dataset
        Dataset with variables `logz0sec` and `wd_count`
        and dimensions `wsbin`, `sector`

    Returns
    -------
    ds: xr.DataArray
        Dataarray with mean z0 per sector
    """
    mean_z0 = hist["logz0sec"] * hist["wd_count"] / hist["wd_count"].sum(dim=["sector"])
    return np.exp(mean_z0.sum(dim=["sector"]))


def _normalize_hist(hist_point):
    """Convert frequency to probability-density histogram

    Convert frequency to probability density histogram in the
    WAsP core structure, i.e. with an additional variables
    containing wind speed sector frequencies.

    Parameters
    ----------
    hist: xr.Dataset
        Dataset with the frequency of occurence in each bin

    Returns
    -------
    hist: xr.Dataset
        Binned wind climate with probability density in each bin
    """
    wsfreq, wdfreq = _NORM_HIST(hist_point["wv_count"])

    var_names = ("wsfreq", "wdfreq")
    var_dims = (("wsbin", "sector", "point"), ("sector", "point"))
    for name, dims in zip(var_names, var_dims):
        hist_point[name] = (dims, eval(name))

    hist = hist_point.drop_vars(["wv_count", "count_bins_exceeded"], errors="ignore")

    return hist


def _finalize_meso(total_stab_histogram, wv_count, landmask=None, percentile=0.5):
    """Finalize cumulative mesoscale climate

    Computes conditional mean mesoscale climate from cumulative
    histogram with mesoscale climate variables. The mean
    and standard deviation of heat flux are calculated based on
    the values occuring at the 50 percent highest wind speeds
    (percentile=0.5). These fields are calculated seperately
    over land and sea.

    Parameters
    ----------
    total_stab_histogram: xr.Dataset
        Dataset with the sum of mesoscale variables
    wrf: xr.Dataset
        Dataset (e.g. `wrfout`) with the WRF variables
        `LANDMASK`, `HGT`, `LU_INDEX`, `COSALPHA`,
        `SINALPHA`

    Returns
    -------
    ds: xr.Dataset
        Dataset with mean or standard deviation of the
        histograms
    """
    hist = _stab_mean_std(total_stab_histogram, wv_count, percentile=percentile)
    if landmask is not None:
        hist = _nearest_landsea(hist, landmask)

    _update_var_attrs(hist, _MEAN_STAB_ATTRS)

    return hist


def _nearest_landsea(ds, landmask):
    """Calculate mean values over sea and land

    To use stability setting over land and sea seperately
    we calculate two fields from one: the land and sea fields
    are generated using windkit nearest neighbour interpolation

    Parameters
    ----------
    ds: xr.Dataset
        Dataset with the variables given in `ori_vars`

    landmask: xr.DataArray
        Landmask with values 0 (sea) and 1 (land)

    Returns
    -------
    ds: xr.Dataset
        Dataset with the variables given in `new_vars`
    """
    ori_vars = [
        "mean_temp_scale",
        "rms_temp_scale",
        "mean_pblh_scale",
        "mean_temp_scale",
        "rms_temp_scale",
        "mean_pblh_scale",
    ]
    new_vars = [
        "mean_temp_scale_land",
        "rms_temp_scale_land",
        "mean_pblh_scale_land",
        "mean_temp_scale_sea",
        "rms_temp_scale_sea",
        "mean_pblh_scale_sea",
    ]

    for var, ori_var in zip(new_vars, ori_vars):
        # era5 has a fraction for the landmask, if the ratio
        # land sea is larger than 0.5 we consider it land
        if "_land" in var:
            landflag = 1
            surface_type = "land"
        else:
            landflag = 0
            surface_type = "water"

        tarr = ds[ori_var].where(np.round(landmask) == landflag)
        tarr_point = to_point(tarr)
        tarr_no_nans = tarr_point.dropna("point")
        if tarr_no_nans.sizes["point"] == 0:
            raise ValueError(f"""
                The landmask you passed in does not contain any 
                pixels with {landflag} ({surface_type}). Please include such points 
                to obtain a valid estimation of the 
                stability conditions over {surface_type}
            """)
        tarr = interp_unstructured_like(tarr_no_nans, tarr, method="nearest")

        ds[var] = tarr

    return ds[new_vars]


def _init_histogram_baro(ds, wsbin_width=1, nws=40, nwd=12):
    """Initialize histogram

    Initialize histogram with the variables needed for
    a stability histogram. The dimensions of the spatial
    structure are the same as the input ds and extended with
    dimensions nws, nwd.

    Parameters
    ----------
    ds: xr.Dataset
        Can have any of the pywasp spatial data structures
    var_dic: dict
        Dictonairy contain the variables that will be added to
        the xarray container, including meta-data according to
        the CF conventions
    wsbin: float
        width of wind speed bins
    wdsec: float
        width of wind direction bins
    nws: int
        Number of wind speed bins
    nwd: int
        Number of wind direction bins

    Returns
    -------
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
    """
    bin_centers_ws = create_wsbin_coords(bins=nws, width=wsbin_width)
    bin_centers_wd = create_sector_coords(nwd)

    hist = _spatial_stack(ds, copy=False)[["point"]]
    # stabdim = hist.sizes["point"]

    # sum_dugdz = np.zeros((nws, nwd, stabdim), order="F", dtype=np.float32)
    # sum_dvgdz = np.zeros((nws, nwd, stabdim), order="F", dtype=np.float32)
    # count_bins_exceeded = np.zeros(stabdim, order="F", dtype=np.int32)

    hist = hist.assign_coords({**bin_centers_wd.coords, **bin_centers_ws.coords})

    var_names = (
        "sum_dugdz",
        "sum_dvgdz",
        "count_bins_exceeded",
    )
    var_dims = (("wsbin", "sector", "point"),) * 2 + ("point",)
    for name, dims in zip(var_names, var_dims):
        hist[name] = (dims, eval(name), _HIS_BARO_ATTRS[name])

    return hist


def _finalize_baro(hist, percentile=0.5):
    """Compute mean or standard deviation from histogram

    Compute mean or standard deviation from histogram. The
    variables are binned by sector and wind speed bin and
    a mean or std. dev. is calulcated based on the highest
    percentile of wind speeds for each sector.

    Parameters
    ----------
    hist: xr.Dataset
        Dataset with variables `tstarsum`, `tstar2sum`
        and `pblhsum` and dimensions `wsbin`, `sector`

    Returns
    -------
    clim: xr.Dataset
        Dataset with mean mesoclimate variables per sector
    """
    hist = _spatial_stack(hist, copy=False)
    cond_hist, freq = _COND_HIST(hist["sum_dugdz"], hist["wv_count"], percentile)
    mean_dugdz = _MEAN_HIST(cond_hist, freq)
    cond_hist, freq = _COND_HIST(hist["sum_dvgdz"], hist["wv_count"], percentile)
    mean_dvgdz = _MEAN_HIST(cond_hist, freq)

    clim = hist[["point", "sector"]]
    clim["mean_dgdz_dir_north"] = (
        ("sector", "point"),
        np.arctan2(-mean_dvgdz, -mean_dugdz) * RTD,
    )
    clim["mean_dgdz_dir_south"] = (
        ("sector", "point"),
        np.arctan2(mean_dvgdz, mean_dugdz) * RTD,
    )
    clim["mean_dgdz_dir"] = xr.where(
        clim["south_north"] < 0,
        clim["mean_dgdz_dir_south"],
        clim["mean_dgdz_dir_north"],
    )
    clim["mean_dgdz"] = (
        ("sector", "point"),
        np.sqrt(mean_dugdz**2 + mean_dvgdz**2),
    )
    out = _spatial_unstack(
        clim.drop_vars(["mean_dgdz_dir_north", "mean_dgdz_dir_south"])
    )
    _update_var_attrs(out, _MEAN_BARO_ATTRS)

    return out


def baroclinicity_histogram(
    ds, hist=None, finalize=True, revert_to_original=True, **kw
):
    """Add timeseries to existing histogram

    Uses the baroclinicity parameters to derive
    summed histogram of the geostrophic shear components.

    .. warning::
        This function is experimental and its signature may change.

    Parameters
    ----------
    ds: xr.Dataset
        Dataset containing variables
        ['wind_speed', 'wind_direction', 'PSFC', 'T2', 'UST',
                 'HFX', 'LH', 'PBLH', 'LANDMASK', 'ZNT']
        Can have any pywasp spatial structure
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
    finalize: boolean
        Convert the stabilility histogram into a mean and
        standard deviation?
    revert_to_original: boolean
        Return the histogram input in original format from the
        input `ds`? If false, keeps the data in stacked point
        format which is more efficient when doing large
        calculations for numerical wind atlases.
    **kwargs
        Arbitrary keyword arguments passed on to
        initialization of histogram

    Returns
    -------
    hist: xr.Dataset
        Histogram with dimensions point, wsbin, sector
        with the values from ds-timeseries added.
    """
    if hist is None:
        hist = _init_histogram_baro(ds, **kw)

    ds_sub = ds[[WS, WD, "dugdz", "dvgdz"]]
    dss = _spatial_stack(ds_sub, copy=False).transpose(..., DIM_TIME, "point")
    # add = np.full(dss[WS].shape, 1.0, order="F", dtype=np.float32)
    hist = _spatial_stack(hist, copy=False).transpose(..., "wsbin", "sector", "point")

    # error arrays are inout in fortran, so we must create copies
    # to not add the error counter 2 times
    ierr_sum_dvgdz = hist["count_bins_exceeded"].copy().values

    sum_dugdz, count_bins_exceeded = _create_hist(
        dss[WS],
        dss[WD],
        0,
        hist.wsbin,
        hist.sector,
        dss["dugdz"],
        hist["sum_dugdz"].values,
        hist["count_bins_exceeded"].values,
    )
    sum_dvgdz, count_bins_exceeded = _create_hist(
        dss[WS],
        dss[WD],
        0,
        hist.wsbin,
        hist.sector,
        dss["dvgdz"],
        hist["sum_dvgdz"].values,
        ierr_sum_dvgdz,
    )

    var_names = (
        "sum_dugdz",
        "sum_dvgdz",
        "count_bins_exceeded",
    )
    for name in var_names:
        hist[name] = (hist[name].dims, eval(name))
        _update_var_attrs(hist[name], _HIS_BARO_ATTRS)
        hist[name].attrs["_pwio_data_is_2d"] = False

    dic_attrs = _create_time_attributes(ds_sub, hist=hist)
    hist = hist.assign_attrs(dic_attrs)

    if finalize:
        return _finalize_baro(hist)

    return _spatial_unstack(hist) if revert_to_original else hist



[docs]
@_validate_bwc_wrapper_factory(run_extra_checks=False)
def bwc_resample_like(source, target):
    """
    Resamples a histogram to different sector or wind speed bin structure.

    Parameters
    ----------
    source: xarray.Dataset
        Binned wind climate xr.Dataset object that we want to resample
    target: xarray.Dataset
        Binned wind climate xr.Dataset object structure that we want to resample to

    Returns
    -------
    xarray.Dataset
        Binned Wind Climate
    """
    result = bwc_resample_sectors(source, target.sizes["sector"])
    result = bwc_resample_wsbins_like(result, target)
    return result




[docs]
@_validate_bwc_wrapper_factory(run_extra_checks=False)
def bwc_resample_sectors(source, n_sectors=12, offset=None):
    """
    Resamples a histogram to different sector structure.

    Parameters
    ----------
    source: xarray.Dataset
        Binned wind climate xr.Dataset object that we want to resample
    target: xarray.Dataset
        Binned wind climate xr.Dataset object structure that we want to resample to
    offset: xarray.DataArray or float
        Offset in degrees with which the histogram are rotated (positive
        for clockwise rotation). If it is a DataArray, needs to have the same spatial shape as
        the binned wind climate.

    Returns
    -------
    xarray.Dataset
        Binned Wind Climate
    """
    if source.sizes["sector"] == n_sectors and offset is None:
        return source

    source = _spatial_stack(source)
    # copy of old attributes so we can unstack to the original structure
    oldattrs = {key: val for key, val in source.attrs.items() if "_pwio" in key}

    if offset is None:
        offset = xr.full_like(source.point, 0.0)
    elif isinstance(offset, float | int):
        offset = xr.full_like(source.point, offset)
    else:
        offset = to_point(offset)

    def _resample(wv_freq, doff, n_sectors):
        return _RESAMPLE_HIST(wv_freq, doff, n_sectors)

    wv_freq = xr.apply_ufunc(
        _resample,
        (source.wsfreq * source.wdfreq).transpose(..., "wsbin", "sector"),
        offset,
        input_core_dims=[["wsbin", "sector"], []],
        output_core_dims=[["wsbin", "sector"]],
        exclude_dims={"sector"},
        dask="allowed",
        vectorize=True,
        kwargs={"n_sectors": n_sectors},
        keep_attrs=True,
    )
    wdcenters = create_sector_coords(n_sectors)
    wv_freq = wv_freq.assign_coords({**wdcenters.coords})
    source = _bwc_from_counts(wv_freq)

    # Update metadata
    source.attrs["title"] = "Binned wind climate"
    # apply old attributes so that we can unstack
    source = source.assign_attrs(oldattrs)
    res = _spatial_unstack(source)
    return _update_history(res)




[docs]
@_validate_bwc_wrapper_factory(run_extra_checks=False)
def bwc_resample_wsbins_like(source, target, fit_weibull=True, errors="raise"):
    """
    Resamples a histogram to different wind speed bin structure.

    Parameters
    ----------
    source: xarray.Dataset
        Binned wind climate xr.Dataset object that we want to resample
    target: xarray.Dataset
        Binned wind climate xr.Dataset object structure that we want to resample to
    fit_weibull: boolean, optional
        Default True, when resampling the histogram find the sectorwise A and k and
        use these to resample the histogram. If false, use the A and k provided in
        the dataset instead.
    errors: {'raise', 'ignore'}, optional
        If 'raise' (default), raises a ValueError error if probability mass
        is transfered beyond the highest wind speed bin.
        If "ignore", all points including those that lose probability mass
        are returned.

    Returns
    -------
    xarray.Dataset
        Binned Wind Climate
    """

    # we reset coordinates with drop=True to make sure there is no user added coordinates that cause the dataarrays to be different
    if (
        not source["sector"]
        .reset_coords(drop=True)
        .equals(target["sector"].reset_coords(drop=True))
    ):
        raise ValueError(
            "Input and target histogram must have the same sector structure!"
        )

    # source and target wsbin structure are the same, so no resampling needed
    same_wsbins = (
        source["wsbin"]
        .reset_coords(drop=True)
        .equals(target["wsbin"].reset_coords(drop=True))
    )
    if same_wsbins and fit_weibull:
        return source

    source = _spatial_stack(source).transpose(..., "wsbin", "sector")
    target = _spatial_stack(target).transpose(..., "wsbin", "sector")

    if not fit_weibull:
        if not all(v in target.data_vars for v in ["A", "k"]):
            raise ValueError(
                "Variables A and k are needed for resampling when fit_weibull is set to False"
            )

    if fit_weibull:
        wsfreq, ierror = xr.apply_ufunc(
            _RESAMPLE_HIST_WSBINS,
            source.wsfreq,
            source.wsceil,
            target.wsceil,
            target.wsfloor,
            input_core_dims=[["wsbin", "sector"], *[["wsbin"]] * 3],
            output_core_dims=[["wsbin", "sector"], ["sector"]],
            exclude_dims={"wsbin"},
            dask="allowed",
            vectorize=True,
            keep_attrs=True,
        )
    else:
        wsfreq, ierror = xr.apply_ufunc(
            _RESAMPLE_HIST_WSBINS_AK,
            source.wsfreq,
            source.wsceil,
            target.A,
            target.k,
            target.wsceil,
            target.wsfloor,
            input_core_dims=[
                ["wsbin", "sector"],
                ["wsbin"],
                *[["sector"]] * 2,
                *[["wsbin"]] * 2,
            ],
            output_core_dims=[["wsbin", "sector"], ["sector"]],
            exclude_dims={"wsbin"},
            dask="allowed",
            vectorize=True,
            keep_attrs=True,
        )

    if ierror.sum() > 0 and errors == "raise":
        i_points = [i for i, v in enumerate(ierror) if v > 0]
        str_points = ",".join(map(str, i_points))
        raise ValueError(
            f"Resampling of the wsbins in {len(i_points)} out of {source.sizes['point']} points (i={str_points}) exceeded the number of target bins!"
        )

    result, _ = xr.broadcast(source[["point", "sector"]], target[["wsbin"]])
    result = result.drop_vars("point", errors="ignore")
    result["wsfreq"] = wsfreq
    result["wdfreq"] = source["wdfreq"]
    result.coords["wsceil"] = target["wsceil"].reset_coords(drop=True)
    result.coords["wsfloor"] = target["wsfloor"].reset_coords(drop=True)
    res = _spatial_unstack(result)

    return _update_history(res)



def _get_dgdz(z, sp, levs, lats):
    """
    Calculate the gradients of the zonal and meridional geostrophic wind components with respect to height.

    Parameters
    ----------
    z : array_like
        Geopotential height array.
    sp : array_like
        Surface pressure array.
    levs : array_like
        Pressure levels array.
    lats : array_like
        Latitudes array.

    Returns
    -------
    tuple of array_like
        Gradients of the zonal (dugdz) and meridional (dvgdz) geostrophic wind components with respect to height.
    """
    dugdz, dvgdz = _DGDZ(z, sp, levs, lats)
    return (dugdz, dvgdz)


def _get_dgdz_t(z, sp, levs, lats):
    """
    Calculate the time-dependent gradients of the zonal and meridional geostrophic wind components with respect to height.

    Parameters
    ----------
    z : array_like
        Geopotential height array.
    sp : array_like
        Surface pressure array.
    levs : array_like
        Pressure levels array.
    lats : array_like
        Latitudes array.

    Returns
    -------
    tuple of array_like
        Time-dependent gradients of the zonal (dugdz) and meridional (dvgdz) geostrophic wind components with respect to height.
    """
    dugdz, dvgdz = _DGDZ_TIME(z, sp, levs, lats)
    return (dugdz, dvgdz)


def _calc_dgdz(sfc):
    """
    Calculate the vertical gradients of the zonal and meridional geostrophic wind components with respect to height for a given surface dataset.

    Parameters
    ----------
    sfc : xarray.Dataset
        Surface dataset containing geopotential height, surface pressure, wind speed, and wind direction.

    Returns
    -------
    xarray.Dataset
        Dataset with added gradients of the zonal (dugdz) and meridional (dvgdz) geostrophic wind components with respect to height.
    """
    dugdz, dvgdz = xr.apply_ufunc(
        _get_dgdz,
        sfc["z"]
        .transpose("west_east", "south_north", "isobaricInhPa", "time")
        .astype("float32"),
        (sfc["sp"].transpose("west_east", "south_north", "time") / 100).astype(
            "float32"
        ),
        sfc["isobaricInhPa"],
        sfc["south_north"],
        input_core_dims=[
            ["west_east", "south_north", "isobaricInhPa"],
            ["west_east", "south_north"],
            ["isobaricInhPa"],
            ["south_north"],
        ],
        output_core_dims=[["west_east", "south_north"], ["west_east", "south_north"]],
        dask="allowed",
        vectorize=True,
        keep_attrs=True,
    )
    out = xr.Dataset(
        {"dugdz": dugdz, "dvgdz": dvgdz},
    )
    out = set_crs(out, 4326)
    return out.transpose("time", "south_north", "west_east")


def _calc_dgdz_t(sfc):
    """
    Calculate vertical gradients of the zonal and meridional geostrophic wind components with respect to height for a given dataset.

    Parameters
    ----------
    sfc : xarray.Dataset
        Dataset containing geopotential height, surface pressure, wind speed, and wind direction.

    Returns
    -------
    xarray.Dataset
        Dataset with added time-dependent gradients of the zonal (dugdz) and meridional (dvgdz) wind components with respect to height.
    """
    dugdz, dvgdz = _get_dgdz_t(
        sfc["z"].transpose("west_east", "south_north", "isobaricInhPa", "time"),
        sfc["sp"].transpose("west_east", "south_north", "time") / 100,
        sfc["isobaricInhPa"],
        sfc["south_north"],
    )
    out = xr.Dataset(
        {"dugdz": dugdz, "dvgdz": dvgdz},
    )

    out = set_crs(out, 4326)
    return out.transpose("time", "south_north", "west_east")