Source code for windkit.wind_climate.binned_wind_climate

# (c) 2022 DTU Wind Energy
"""Binned wind climate module

When measuring the wind speed and wind direction over a time period, one can
create a histogram by counting the frequency of occurence for each
wind speed and direction bin.

Because there can be large differences in the wind climate when the wind is
coming from different wind directions, a binned wind distribution is usually
specified per wind direction sector.

A valid Weibull wind climate therefore has a dimension ``sector`` and the variables
``wsbin`` and ``wdfreq``. Also it must have a valid spatial structure. This module contains
functions that operate on and create binned wind climates.
This includes the ability to create bwc datasets both from files and from
existing data, the ability to calculate common parameters from the bwc object,
and the ability to write them to the legacy *.tab* format.
"""

__all__ = [
    "validate_bwc",
    "is_bwc",
    "create_bwc",
    "read_bwc",
    "bwc_from_tswc",
    "bwc_to_file",
    "combine_bwcs",
    "weibull_fit",
]

import logging
import warnings
from functools import wraps
from pathlib import Path

import numpy as np
import pandas as pd
import scipy.stats
import xarray as xr
from lxml import etree

from windkit import weibull
from windkit.xarray_structures._validate import (
    _create_is_obj_function,
    _create_validation_wrapper_factory,
    _create_validator,
)
from windkit.xarray_structures.empty import (
    _copy_chunks,
    _define_std_arrays,
    _empty_unstack,
)
from windkit.xarray_structures.metadata import (
    _BWC_ATTRS,
    _HIS_ATTRS,
    _WEIB_ATTRS,
    _WSBIN_COORD_ATTRS,
    _update_history,
    _update_var_attrs,
)
from windkit.xarray_structures.sector import (
    create_sector_coords,
)
from windkit.xarray_structures.wsbin import (
    create_wsbin_coords,
)
from windkit.spatial import (
    create_dataset,
    get_crs,
    is_point,
    _spatial_stack,
    _spatial_unstack,
    to_point,
)
from windkit.utils import _infer_file_format
from windkit.wind import wd_to_sector
from windkit.wind_climate.time_series_wind_climate import _validate_tswc_wrapper_factory

logger = logging.getLogger(__name__)

SUPPORTED_BWC_FILE_FORMATS_READ = ["tab", "owc", "omwc", "nc"]
SUPPORTED_BWC_FILE_FORMATS_WRITE = ["tab"]

WS = "wind_speed"
WD = "wind_direction"
VAR_WS_FREQ = "ws_freq"
VAR_WD_FREQ = "wdfreq"
VAR_WS_FREQ_BY_SECTOR = "wsfreq"
DIM_TIME = "time"
DIM_WS = "wsbin"
DIM_WD = "sector"
WV_COUNT = "wv_count"
DATA_VAR_DICT_BWC = {"wsfreq": ["wsbin", "sector"], "wdfreq": ["sector"]}
REQ_DIMS_BWC = ["wsbin", "sector"]
REQ_COORDS_BWC = [
    "sector",
]


def _validate_freqs_greater_than_zero(wwc):
    """Helper function to validate wdfreq and wsfreq are positive"""
    response_list = []

    if "wsfreq" in wwc.data_vars and any(
        xr.where(wwc.wsfreq.values < 0.0, True, False).flatten()
    ):
        response_list.append("'wsfreq' has negative values")

    if "wdfreq" in wwc.data_vars and any(
        xr.where(wwc.wdfreq.values < 0.0, True, False).flatten()
    ):
        response_list.append("'wdfreq' has negative values")

    return response_list


validate_bwc = _create_validator(
    variables=DATA_VAR_DICT_BWC,
    dims=REQ_DIMS_BWC,
    coords=REQ_COORDS_BWC,
    extra_checks=[_validate_freqs_greater_than_zero],
)

_validate_bwc_wrapper_factory = _create_validation_wrapper_factory(validate_bwc)

is_bwc = _create_is_obj_function(validate_bwc)


def _wv_count_wrapper(func):
    """
    Decorator to handle wind vector count format.

    Handles the case where the parameter is an xarray.DataArray with a wv_count
    format or a xarray.Dataset with a 'wv_count' data variable, i.e. has wind
    counts by bin and sector. If so, returns a binned wind climate xarray.Dataset.
    If it is not the case, it does nothing.
    """

    @wraps(func)
    def wv_count_to_bwc(*args, **kwargs):
        obj = args[0]
        try:
            # handles the case of a xarray.Dataset
            obj = obj["wv_count"]
        except KeyError:
            pass
        if isinstance(obj, xr.DataArray) and obj.name == "wv_count":
            bwc = _bwc_from_counts(obj)
            result = func(bwc, *args[1:], **kwargs)
        else:
            result = func(*args, **kwargs)
        return result

    return wv_count_to_bwc


def _freqs_to_dataset(
    wsfreq,
    wdfreq,
    wsbins,
    south_north,
    west_east,
    height,
    crs,
    **kwargs,
):
    """
    Makes data variables, coordinates, and attributes ready for
    xarray.Dataset construction from a histogram of wind speeds and directions

    Parameters
    ----------
    wsfreq: np.array([n_wsbins, n_sectors]): float64
        Wind speed frequency by wind speed and sector
    wdfreq: np.array([n_sectors]): float64
        Wind direction frequency by sector
    wsbins: np.array([n_wsbins + 1]): float64
        Edges of wind speed bins
    south_north: float64
        Coordinate value in y-direction
    west_east: float64
        Coordinate value in x-direction
    height: float64
        Height above ground
    crs : int, dict, str or pyproj.crs.CRS
            Value to initialize `pyproj.crs.CRS`
    kwargs : dict
        Other kwargs are added as attributes to the dataset

    Returns
    -------
    xarray.Dataset
    """

    wsbins = np.asarray(wsbins, dtype=np.float64)

    _, n_sectors = wsfreq.shape

    wscenters = create_wsbin_coords(wsbins)
    wdcenters = create_sector_coords(n_sectors)

    na = np.newaxis

    wdfreq /= np.sum(wdfreq)
    with np.errstate(all="ignore"):
        wsfreq = wsfreq / np.sum(wsfreq, axis=0)[na, :]
    if np.isnan(wsfreq).any():
        logging.debug(
            "There are sectors with no wind observations (nan), which will be set to 0.0."
        )
        wsfreq = np.nan_to_num(wsfreq)

    # Build dataset
    ds = create_dataset(west_east, south_north, height, crs).drop_vars("output")
    ds["wdfreq"] = (("sector", "point"), wdfreq[:, na])
    ds["wsfreq"] = (("wsbin", "sector", "point"), wsfreq[:, :, na])

    ds = ds.assign_coords(
        {
            **wscenters.coords,
            **wdcenters.coords,
        }
    )

    ds = ds.assign_attrs(kwargs)

    ds = _update_var_attrs(ds, _BWC_ATTRS)
    return _update_history(ds)



[docs]
def create_bwc(output_locs, n_sectors=12, n_wsbins=30, not_empty=True, seed=9876538):
    """
    Create empty binned wind climate dataset.

    If not_empty=True, the data variables are filled with meaninful random numbers,
    e.g. the sum of wdfreq is 1.

    Parameters
    ----------
    output_loc : xarray.Dataset
        Output geospatial information.
    n_sectors : int
        Number of sectors, defaults to 12.
    n_wsbins: int
        Number of histogram bins, defaults to 30.
    not_empty : bool
        If true, the empty dataset is filled with random
        meaningful data.
    seed : int
        Seed for the random data, defaults to 9876538.

    Returns
    -------
    ds : xarray.Dataset
        Binned wind climate dataset either empty or filled with
        random numbers.
    """

    da_dict, unstack_attrs, is_scalar = _define_std_arrays(output_locs, n_sectors)
    ds = xr.Dataset(
        {"wdfreq": da_dict["da_4d"], "wsfreq": da_dict["da_4d"]}, attrs=unstack_attrs
    )
    wsbin_coords = create_wsbin_coords(n_wsbins, width=1.0)

    ds["wsfreq"] = ds["wsfreq"].expand_dims({"wsbin": wsbin_coords.values})
    ds = ds.assign_coords({**wsbin_coords.coords})
    n_pt = len(ds["point"])

    if not_empty:
        wsbin_n = np.linspace(1, n_wsbins, n_wsbins)
        rng = np.random.default_rng(seed)
        wsbin_full = wsbin_n.repeat(n_sectors * n_pt).reshape(
            (n_wsbins, n_sectors, n_pt)
        )
        k = rng.uniform(1.5, 2.5, [n_sectors, n_pt])
        A = rng.uniform(5, 10, [n_sectors, n_pt])
        wsbin_freq_not1 = scipy.stats.weibull_min.pdf(wsbin_full, k, scale=A)
        wsbin_freq = wsbin_freq_not1 / wsbin_freq_not1.sum(0)

        ds["wsfreq"] = xr.DataArray(wsbin_freq, ds["wsfreq"].coords, ds["wsfreq"].sizes)
        ds["wdfreq"] = xr.DataArray(
            rng.dirichlet(np.ones(n_sectors), n_pt).T,
            ds["wdfreq"].coords,
            ds["wdfreq"].sizes,
        )

    ustack_ds = _empty_unstack(ds, is_scalar)
    ds = _update_var_attrs(_copy_chunks(output_locs, ustack_ds), _BWC_ATTRS)

    return _update_history(ds)



def _create_wv_count(
    output_locs, n_sectors=12, n_wsbins=30, not_empty=True, seed=9876538
):
    """
    Create empty wind vector count dataset.
    If not_empty=True, the data variables are filled with meaninful random numbers.

    Parameters
    ----------
    output_loc : xarray.Dataset
        Output geospatial information
    n_sectors : int
        Number of sectors, defaults to 12.
    not_empty : bool
        If true, the empty dataset is filled with random
        meaningful data, defaults to True.
    seed : int
        Seed for the random data, defaults to 9876538.

    Returns
    -------
    ds : xarray.Dataset
        Wind vector count dataset either empty or filled with
        random numbers.
    """
    da_dict, unstack_attrs, is_scalar = _define_std_arrays(output_locs, n_sectors)
    ds = xr.Dataset({"wv_count": da_dict["da_4d"]}, attrs=unstack_attrs)
    wsbin_coords = create_wsbin_coords(n_wsbins, width=1.0)

    ds["wv_count"] = ds["wv_count"].expand_dims({"wsbin": wsbin_coords.values})
    ds = ds.assign_coords({**wsbin_coords.coords})
    n_pt = len(ds["point"])

    if not_empty:
        rng = np.random.default_rng(seed)
        k = rng.uniform(1.5, 2.5, [n_sectors, n_pt])
        A = rng.uniform(5, 10, [n_sectors, n_pt])
        wsbin_n = np.linspace(1, n_wsbins, n_wsbins)
        wsbin_full = wsbin_n.repeat(n_sectors * n_pt).reshape(
            (n_wsbins, n_sectors, n_pt)
        )
        scaling_fact = rng.uniform(500, 1000, [n_sectors, n_pt])
        wv_count_array = np.rint(
            np.multiply(
                scipy.stats.weibull_min.pdf(wsbin_full, k, scale=A),
                scaling_fact[np.newaxis, :, :],
            )
        )
        ds["wv_count"].values = wv_count_array

    ustack_ds = _empty_unstack(ds, is_scalar)
    ds = _update_var_attrs(_copy_chunks(output_locs, ustack_ds), _HIS_ATTRS)

    return _update_history(ds)



[docs]
def read_bwc(filename, *, crs=None, file_format="infer", **kwargs):
    """Creates binned wind climate xarray.Dataset from file.

    Parameters
    ----------
    filename : str or Path
        Path to a file that can be opened as a bwc. This includes .tab, .owc,
        .omwc, and .nc that were created as bwc files. The script will
        use the file extension to determine the file type and then parse it
        into a bwc DataSet object.
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
        Defaults to 4326 (Lat-Lon/WGS84 geodetic datum) for .tab, .owc and .omwc files.
    file_format : str
        The file format of the input file. If 'infer', the file format will be inferred from the file extension.
        Supported formats are 'tab', 'owc', and 'omwc'. Defaults to 'infer'.
    **kwargs : dict
        Additional arguments to be passed to the file reader.

    Returns
    -------
    ds : xarray.Dataset
        binned wind climate dataset that is formatted to match the bwc description.

    Raises
    ------
    ValueError
        If crs does not match dataset crs.
    ValueError
        If type of bwc is undetectable.
    """
    if file_format == "infer":
        file_format = _infer_file_format(filename)

    if file_format == "tab":
        if crs is None:
            crs = 4326
        bwc = _open_tabfile(filename, crs, **kwargs)
    elif file_format in ["owc", "omwc"]:
        if crs is None:
            crs = 4326
        bwc = _open_owcfile(str(filename), crs, **kwargs)
    elif file_format == "nc":
        bwc = xr.open_daaset(filename, **kwargs)
    else:
        raise ValueError(
            f"Unable to detect type of bwc file {filename} with extension {file_format}"
        )
    validate_bwc(bwc, run_extra_checks=False)
    bwc = _update_var_attrs(bwc, _BWC_ATTRS)
    return _update_history(bwc)




[docs]
def combine_bwcs(bwc_list):
    """Combines a list of bwc's into one binned wind climate.

    .. note:: The output is always an object with a point structure

    Parameters
    ----------
    bwc_list: list
        List of binned wind climate xarray.Dataset.

    Returns
    -------
    bwcs: xarray.Dataset
        xarray Dataset with merged binned wind climates.
    """
    max_bins = max([bwc.sizes["wsbin"] for bwc in bwc_list])
    filled_bwcs = []
    for bwc in bwc_list:
        if not is_point(bwc):
            bwc = to_point(bwc)
        filled_bwcs.append(_fill_wsbin(bwc, max_bins))

    bwcs = xr.concat(filled_bwcs, "point")
    return _update_history(bwcs)



def _open_tabfile(tab_file, crs=4326):
    """Creates bwc object from a "tab" ascii file

    Parameters
    ----------
    tab_file: str
        Path to file tab file
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
        Defaults to 4326 (Lat-Lon/WGS84 geodetic datum)

    Returns
    -------
    xr.DataSet
        xarray DataSet that is formatted to match the bwc description

    Raises
    ------
    ValueError
        If .tab file does not have appropriate string formatting
    """

    def _read_floats(fobj):
        return map(float, fobj.readline().split())

    def _load_tabfile(tab_file, encode):
        with open(tab_file, encoding=encode) as fobj:
            header = fobj.readline().strip()
            south_north, west_east, height = _read_floats(fobj)
            tuple_header = tuple(_read_floats(fobj))
            if len(tuple_header) == 4:
                n_sectors, _, _, tabfile_type = tuple_header
                if tabfile_type != 0:
                    raise ValueError(
                        "Tabfiles of type {0} are not supported".format(tabfile_type)
                    )
            else:
                n_sectors, _, _ = tuple_header
            n_sectors = int(n_sectors)

            wd_freq = np.array(list(_read_floats(fobj)))
            wsdata = np.genfromtxt(fobj)

        kwargs = {
            "south_north": south_north,
            "west_east": west_east,
            "height": height,
            "description": header,
            "crs": crs,
        }

        return kwargs, wd_freq, wsdata

    try:
        kwargs, wd_freq, wsdata = _load_tabfile(tab_file, "ascii")
    except (UnicodeDecodeError, UnicodeError):
        try:
            kwargs, wd_freq, wsdata = _load_tabfile(tab_file, "utf_8")
        except (UnicodeDecodeError, UnicodeError):
            try:
                kwargs, wd_freq, wsdata = _load_tabfile(tab_file, "utf_16")
            except (UnicodeDecodeError, UnicodeError):
                try:
                    kwargs, wd_freq, wsdata = _load_tabfile(tab_file, "cp1256")
                except (UnicodeDecodeError, UnicodeError):
                    raise ValueError(
                        "Unknown encoding for .tab file, please save as ASCII or UTF-8"
                    )

    ws_freq = wsdata[:, 1:]
    ws_bins = np.append(0, wsdata[:, 0])

    return _freqs_to_dataset(ws_freq, wd_freq, ws_bins, **kwargs)


def _parse_owc(owc, crs=4326):
    """
    Parses an OWC file into a bwc object

    Parameters
    ----------
    owc : xml tree
        An XML element loaded by lxml
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
        Defaults to 4326 (Lat-Lon/WGS84 geodetic datum)

    Returns
    -------
    xr.DataSet
        xarray DataSet that is formatted to match the bwc description
    """
    # Get main dimensions
    num_sec = int(owc.attrib["CountOfSectors"])
    num_wsbins = int(
        owc.xpath("ObservedWind[1]/SpeedFrequencyDistribution" + "/@NumberOfBins")[0]
    )

    # Create arrays
    ws_freq = np.zeros((num_wsbins, num_sec))
    wd_freq = np.zeros(num_sec)
    cen_angle = np.zeros(num_sec)
    ws_bins = np.zeros(num_wsbins)

    # Get site information
    site_info = owc.xpath("RveaAnemometerSiteDetails")[0].attrib
    lat = float(site_info["LatitudeDegrees"])
    lon = float(site_info["LongitudeDegrees"])
    height = float(site_info["HeightAGL"])
    header = site_info["Description"]

    # Get wind speed histogram
    for obsWind in owc.xpath("ObservedWind"):
        # Get sector information
        sec = int(obsWind.attrib["Index"]) - 1
        cen_angle[sec] = float(obsWind.attrib["SectorWidthDegrees"])
        wd_freq[sec] = float(obsWind.attrib["SectorFrequency"])

        # Get wind speed histogram
        for wsBin, ws in enumerate(obsWind.getchildren()[0]):
            ws_bins[wsBin] = float(ws.attrib["UpperSpeedBound"])
            ws_freq[wsBin, sec] = float(ws.attrib["Frequency"])

    # Extract 1st column which is wind speeds and
    # add a 0.0 value to the first position
    ws_bins = np.insert(ws_bins, 0, 0.0)

    kwargs = {
        "south_north": lat,
        "west_east": lon,
        "height": height,
        "description": header,
        "crs": crs,
    }

    return _freqs_to_dataset(ws_freq, wd_freq, ws_bins, **kwargs)


def _open_owcfile(xml_file, crs=4326):
    """Creates bwc object from a .owc xml file

    Parameters
    ----------
    n: xml_file
        Path to xml file
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
        Defaults to 4326 (Lat-Lon/WGS84 geodetic datum)

    Returns
    -------
    bwc
    """
    tree = etree.parse(str(xml_file))
    root = tree.getroot()
    return _parse_owc(root, crs)


def _count_to_wd_freq(wind_hist):
    """Convert wind vector count histogram to wind sector-wise relative frequency.

    Parameters
    ----------
    wind_hist : xarray.DataArray
        A valid pywasp wind-vector-count histogram DataArray


    Returns
    -------
    relfreq : xarray.DataArray
        Data array with sectorwise relative frequencies.
    """
    relfreq = wind_hist.sum(dim=[DIM_WS], skipna=False) / wind_hist.sum(
        dim=[DIM_WS] + [DIM_WD], skipna=False
    )
    relfreq.name = VAR_WD_FREQ
    return _update_var_attrs(relfreq, _BWC_ATTRS)


def _count_to_ws_freq_by_sector(wind_hist):
    """DEPRECATED, use windkit.bwc_from_counts instead"""
    warnings.warn(
        "The 'count_to_ws_freq_by_sector' function is renamed as 'bwc_from_counts' and will be removed in a future version. "
        + "Use 'bwc_from_counts' instead.",
        FutureWarning,
        stacklevel=2,
    )
    return _bwc_from_counts(wind_hist)


def _bwc_from_counts(wind_hist):
    """Convert wind vector count histogram to binned wind climate dataset.

    Parameters
    ----------
    wind_hist : xarray.DataArray
        A valid pywasp wind-vector-count histogram DataArray


    Returns
    -------
    relfreq : xarray.Dataset
        Dataset with wind speed frequencies
        normalized in each sector and the rel.
        frequency of occurance of wind direction.
    """
    ds = xr.Dataset()
    ds[VAR_WS_FREQ_BY_SECTOR] = wind_hist / wind_hist.sum(dim=DIM_WS, skipna=False)
    ds[VAR_WD_FREQ] = _count_to_wd_freq(wind_hist)
    ds.attrs.update(wind_hist.attrs)
    # replace nan with 0
    ds = ds.fillna(0)
    ds = _update_var_attrs(ds, _BWC_ATTRS)
    return _update_history(ds)



[docs]
@_validate_tswc_wrapper_factory(run_extra_checks=False)
def bwc_from_tswc(
    ts, wsbin_width=1.0, n_wsbins=30, n_sectors=12, normalize=True, **kwargs
):
    """Creates object from a timeseries.

    Parameters
    ----------
    ts : xarray.Dataset
        Xarray dataset with variables 'wind_speed'
        and 'wind_direction' and with a coordinate and
        dimension 'time'.
    wsbin_width: float
        Width of the wind speed bins, defaults to 1.0.
    n_wsbins : int
        Number of wind speed bins, defaults to 30.
    n_sectors : int
        Number of sectors, defaults to 12.
    normalize : bool
        If set to True, convert wind vector count histogram to
        wind sector-wise relative frequency. Defaults to True
    kwargs: dict
        Additional argument 'wasp_desription' can be added to the dataset.

    Returns
    -------
    bwc : xarray.Dataset
        Binned wind climate from the timeseries.
    """

    def point_histogram2d(ws, wd, bins):
        hist, _, _ = np.histogram2d(ws, wd, bins=bins)
        hist_no_nan = np.nan_to_num(hist)

        return hist_no_nan

    # stack windkit dataset to point
    ds = _spatial_stack(ts)
    ds_attrs = {
        "description": kwargs.get("wasp_desription", ""),
        **_create_time_attributes(ds),
    }
    # ignore times when any of the time series is not available
    ds = ds.dropna(dim=DIM_TIME)
    if ds.sizes["time"] < 1:
        raise ValueError("Need at least 1 sample to create a histogram")

    ws_bins = create_wsbin_coords(n_wsbins, width=wsbin_width)
    wd_bins = create_sector_coords(n_sectors)
    ws_bin_edges = np.append(0.0, ws_bins["wsceil"].values)

    # convert wdir to sectors
    # wd, wd_sec = dir_to_sec(ds[WD], n_sectors)
    (
        wd,
        _,
    ) = wd_to_sector(ds[WD], n_sectors, output_type="indices")
    ds["wd_bins"] = (ds[WD].dims, wd.data)
    bins = (ws_bin_edges, np.arange(n_sectors + 1))

    hist = xr.apply_ufunc(
        point_histogram2d,
        ds["wind_speed"],
        ds["wd_bins"],
        dask="allowed",
        vectorize=True,
        input_core_dims=[["time"], ["time"]],
        output_core_dims=[["wsbin", "sector"]],
        kwargs={"bins": bins},
        keep_attrs=True,
    )

    hist.name = WV_COUNT
    hist = hist.assign_coords({**ws_bins.coords, **wd_bins.coords})

    if normalize:
        hist = _bwc_from_counts(hist)
    else:
        hist = _update_var_attrs(hist.to_dataset(), _BWC_ATTRS)

    if "_pwio_data_is_2d" in ds["wind_speed"].attrs:
        for var in hist.data_vars:
            hist[var].attrs["_pwio_data_is_2d"] = ds["wind_speed"].attrs[
                "_pwio_data_is_2d"
            ]

        if "_pwio_data_is_2d" in hist.attrs:
            del hist.attrs["_pwio_data_is_2d"]

    # the _pwio_was_stacked_point attribute is lost with apply_ufunc
    hist.attrs.update(ds.attrs)

    hist = hist.assign_attrs(ds_attrs)
    hist = _spatial_unstack(hist)

    return _update_var_attrs(_spatial_unstack(hist), _BWC_ATTRS)



@_validate_tswc_wrapper_factory(run_extra_checks=False)
def _create_time_attributes(ds: xr.Dataset, hist=None):
    """Create time attributes for binned wind climate.

    We attached the time attributes to a new or existing
    binned wind climate. If it has existing attributes, these
    will be used as well when calculating the meta data.

    Parameters
    ----------
    ds : xarray.Dataset
        Xarray dataset with variables 'wind_speed'
        and 'wind_direction' and with a coordinate and
        dimension 'time'.
    hist : xarray.Dataset, optional
        A valid pywasp histogram dataset with, defaults to None

    Returns
    -------
    dic_result: dict
        Dictionary with 'start_time', 'end_time', 'interval',
        'count', 'count_missing', 'recovering_percentage'
    """
    # find the first and last time in the time series we process
    first_time = pd.Timestamp(ds["time"].min().values)
    last_time = pd.Timestamp(ds["time"].max().values)
    number_of_samples = int(ds.dropna(dim="time").sizes["time"])
    # find most frequent interval in minutes
    # currently when you provide an existing histogram
    # as input it will just overwrite the sampling interval
    # and recovery percentage. Since we only process model
    # data this way, it is probably OK for now.
    intervals, counts = np.unique(
        ds["time"].diff("time") / pd.Timedelta("1 ms"), return_counts=True
    )
    interval = intervals[counts.argmax()]
    series_start_to_end = np.arange(
        first_time, last_time, pd.Timedelta(f"{interval} ms")
    )
    # arange does not include the last step but we want
    # this for the max samples
    max_possible_samples = int(series_start_to_end.size + 1)

    # check for existing first and last time in input
    # it is assumed that this histogram has also been created
    # with this function and the attributes should be present
    try:
        first_time = np.min([first_time, pd.to_datetime(hist.attrs["start_time"])])
        last_time = np.max([last_time, pd.to_datetime(hist.attrs["end_time"])])
        max_possible_samples = max_possible_samples + hist.attrs["count_expected"]
        number_of_samples = number_of_samples + hist.attrs["count"]
    except (KeyError, AttributeError) as e:  # noqa
        logging.info("No attributes present yet, adding new ones.")

    dic_result = {
        "start_time": str(first_time),
        "end_time": str(last_time),
        "interval": str(pd.Timedelta(f"{interval} ms")),
        "count_expected": max_possible_samples,
        "count": number_of_samples,
        "count_missing": max_possible_samples - number_of_samples,
        "recovery_percentage": 100 * number_of_samples / max_possible_samples,
    }

    return dic_result


def _tab_string(bwc):
    """Returns string representation of bwc dataset

    Parameters
    ----------
    bwc: xr.Dataset
        Binned wind climate xr.Dataset object

    Returns
    -------
    string: str
        String representation of bwc dataset
    """

    def _to_string(node):
        nwsbin = node.sizes["wsbin"]
        n_sectors = node.sizes["sector"]

        le = "\n"

        header_str = node.attrs.get("description", "No Header")
        string = header_str + le

        string += (
            "\t".join(
                map(
                    str,
                    [
                        node["south_north"].values,
                        node["west_east"].values,
                        node["height"].values,
                    ],
                )
            )
            + le
        )

        string += "\t".join(map(str, [n_sectors, 1.0, 0.0])) + le

        string += "    \t" + "\t".join("%6.2f" % f for f in node.wdfreq * 100.0) + le

        wsfreq = np.round(node.wsfreq.values * 1000.0, 3)

        for iws in range(nwsbin):
            string += "%4.1f" % node.wsceil.values[iws] + "\t"
            string += "\t".join("%6.2f" % f for f in wsfreq[iws, :])
            if iws < nwsbin - 1:
                string += le

        return string

    return _to_string(bwc.squeeze())


@_wv_count_wrapper
def _to_tabfile(bwc, /, path=None):
    """Write bwc to tab-style ascii file.

    Parameters
    ----------
    bwc: xr.Dataset
        Binned wind climate xr.Dataset object.

    path: str or pathlib.Path
        dir or file path to write the file. Default value set
        to the current working directory.

    """

    def _write(node, fpath):
        with open(fpath, "w", newline="\r\n") as fobj:
            fobj.write(_tab_string(node))

    def _fmt_single_point_filename(ds):
        single_point_coords = ["height", "south_north", "west_east"]
        vals = []
        for coord in single_point_coords:
            vals.append(ds[coord].values.flatten()[0])
        filename = f"bwc_height{vals[0]}_south_north{vals[1]}_west_east{vals[2]}.tab"
        return filename

    if not get_crs(bwc).is_geographic:
        raise RuntimeError(
            "Binned wind climate dataset with projected coordinate systems cannot be written to a '.tab' file."
        )

    if path is None:
        path = Path.cwd()
    path = Path(path)
    if path.suffix == "":  # it is a Directory
        path.mkdir(parents=True, exist_ok=True)

    # If dataset has no extra dimensions (larger than  size=1):
    # write file and return early.
    if bwc.squeeze().wsfreq.ndim == 2:
        if Path(path).is_dir():
            # fpath = path / "bwc.tab"
            fpath = path / _fmt_single_point_filename(bwc)
        else:
            path.parent.mkdir(parents=True, exist_ok=True)
            fpath = path
        _write(bwc.squeeze(), fpath)
        return

    # If dataset has extra dimensions (of size > 1):
    # Stack extra dimensions, loop over them, and write to tab files
    # Using file_name that contains coordinate information.
    dims_extra = [d for d in bwc.wsfreq.dims if d not in ["wsbin", "sector"]]
    stacked = bwc.stack(point=dims_extra)

    # Create file_name format string
    if Path(path).is_dir():
        file_name_fmt = (
            "_".join(["bwc"] + [f"{d}" + "{" + f"{d}" + "}" for d in dims_extra])
            + ".tab"
        )
    else:
        raise ValueError(
            "'path' argument is a filename, but the dataset has more than one point."
            " Try giving a directory as an argument."
        )

    # Loop and write to tab files
    for ipt in range(stacked.sizes["point"]):
        node = stacked.isel(point=slice(ipt, ipt + 1)).reset_index("point").squeeze()
        kw = {d: node[d].values for d in dims_extra}
        fpath = path / file_name_fmt.format(**kw)
        _write(node, fpath)

    return



[docs]
@_validate_bwc_wrapper_factory(run_extra_checks=False)
def bwc_to_file(bwc, filename, *, file_format="infer", **kwargs):
    """Write binned wind climate to file.

    Parameters
    ----------
    bwc: xr.Dataset
        Binned wind climate xr.Dataset object.
    filename: str or pathlib.Path
        Path to the file to write the binned wind climate to.
    file_format: str
        The file format of the output file. If 'infer', the file format will be inferred from the file extension.
        Supported formats are 'tab'. Defaults to 'infer'.
    **kwargs: dict
        Additional keyword arguments to pass to the writing function.
    """
    if file_format == "infer":
        file_format = _infer_file_format(filename)

    if file_format == "tab":
        _to_tabfile(bwc, filename, **kwargs)
    else:
        raise ValueError(f"Unsupported file format: {file_format}")



@_wv_count_wrapper
def _mean_ws_moment(bwc, /, n=1.0, bysector=False):
    """Calculate the n^th moment of the wind speed from a bwc

    Parameters
    ----------
    bwc: xarray.Dataset
        Binned wind climate xr.Dataset object
    n: float
        Moment to compute, defaults to 1.0.
    bysector: bool
        Whether to return the sectorwise wind speed moment or the
        all-sector mean moment. Defaults to False.

    Returns
    -------
    ws_moment: xarray.DataArray
        Array of wind speed moments.
    """
    if bysector:
        return (bwc.wsfreq * bwc.wsbin**n).sum(dim=["wsbin"], skipna=False)
    return (bwc.wdfreq * bwc.wsfreq * bwc.wsbin**n).sum(
        dim=["wsbin", "sector"], skipna=False
    )


@_wv_count_wrapper
def _ws_cdf(bwc, /, bysector=False):
    """Calculate the cumulative distribution function (CDF) of the wind speed from a bwc

    Parameters
    ----------
    bwc: xarray.Dataset
        Binned wind climate xr.Dataset object
    bysector: bool
        Whether to return the sectorwise wind speed cdf or the
        all-sector mean cdf. Defaults to False which returns the
        all-sector mean cdf.

    Returns
    -------
    ws_cdf: xarray.DataArray
        Array of wind speed cdf.
    """
    if bysector:
        return bwc.wsfreq.cumsum(dim=["wsbin"], skipna=False)
    return (
        (bwc.wdfreq * bwc.wsfreq)
        .sum(dim="sector", skipna=False)
        .cumsum(dim=["wsbin"], skipna=False)
    )


@_wv_count_wrapper
def _ws_freq_gt_mean(bwc, /, bysector=False):
    """Calculate the frequency of wind speeds greater than the mean wind speed.

    Parameters
    ----------
    bwc: xarray.Dataset
        Binned wind climate xr.Dataset object

    bysector: bool
        Whether to return the sectorwise wind speed cdf or the
        all-sector mean cdf. Defaults to False which returns the
        all-sector value.

    Returns
    -------
    freq_gt_mean: xarray.DataArray
        Array of fraction greater than the mean wind speed in the histograms

    """
    # check for dimensions with no coordinates and temporarily add them
    # to the dataset to avoid problems with xarrays .interp method.
    # This is due to an issue in xarray when we have dimensions
    # with no coordiatnes on both the dataset and interpolation coordinates.
    # We should make an issue on xarray to fix this and remove this workaround afterward.

    dims_no_coord = [d for d in bwc.dims if d not in bwc.coords]
    bwc = bwc.assign_coords({d: (d, np.arange(bwc[d].size)) for d in dims_no_coord})

    # We set the wsceil as the coordinate because we want to interpolate
    # between bin edges.
    cdf = _ws_cdf(bwc, bysector=bysector).set_index({"wsbin": "wsceil"})
    mean = _mean_wind_speed(bwc, bysector=bysector)
    fgtm = 1.0 - cdf.interp(wsbin=mean, method="linear")
    # If the mean wind speed is below the first bin ceiling or above the last, we get nan values back
    # We set these to 0.5, which is the value we would get if the mean wind speed was exactly in the middle
    fgtm = fgtm.fillna(0.5)

    # Remove variables related to the wind speed bins if present
    fgtm = fgtm.drop_vars([v for v in ["wsbin", "wsfloor"] if v in fgtm.coords])

    # remove temporary dimensions
    fgtm = fgtm.drop_vars(dims_no_coord)

    return fgtm


def _fill_wsbin(bwc, /, n_wsbins):
    """Expands a binned wind climate to n_wsbins number of bins
    This is a useful feature when you want to combine several
    binnen wind climates in one dataset, because that requires that they
    have the same dimensions. The last two bins of the histogram
    are used to extrapolate the spacing. Up to this point the bins
    can also be irregular.

    Parameters
    ----------
    bwc: :any:`windkit.bwc`
        Binned wind climate
    n_wsbins: int
        Number of wind speeds bins

    Returns
    -------
    filled_bwc: :any:`windkit.bwc`
        Binned wind climate with n_wsbins number of bins
    """
    extension_bin = -2  # index of second last bin of histogram
    owsbin = bwc.sizes["wsbin"]
    # get spacing between last two bins
    dws = np.diff(bwc.wsbin[extension_bin:])
    newbins = np.zeros(n_wsbins)  # create new array with desired length
    newbins[0:owsbin] = bwc.wsbin.values  # set old bins
    newbins[owsbin:n_wsbins] = newbins[owsbin - 1] + dws * np.arange(
        1, n_wsbins - owsbin + 1
    )  #
    wsceil = np.append(
        bwc.wsceil, bwc.wsceil[[-1]] + dws * np.arange(1, n_wsbins - owsbin + 1)
    )
    wsfloor = np.append(
        bwc.wsfloor, bwc.wsfloor[[-1]] + dws * np.arange(1, n_wsbins - owsbin + 1)
    )
    filled_bwc = bwc.interp(wsbin=newbins, kwargs={"fill_value": 0.0})
    filled_bwc["wsceil"] = (("wsbin"), wsceil, _WSBIN_COORD_ATTRS["wsceil"])
    filled_bwc["wsfloor"] = (("wsbin"), wsfloor, _WSBIN_COORD_ATTRS["wsfloor"])

    return filled_bwc



[docs]
@_validate_bwc_wrapper_factory(run_extra_checks=False)
def weibull_fit(bwc, include_met_fields=None, atol=1e-8):
    """
    Returns sectorwise Weibull parameters using WAsP's fitting algorithm.

    Parameters
    ----------
    bwc : xarray.Dataset
        Binned wind climate xr.Dataset object

    include_met_fields : bool
        Whether to include met fields in the output. Defaults to False.

    atol : float
        Absolute tolerance for the weibull-fitting algorithm. Defaults to 1e-6.

    Returns
    -------
    xarray.Dataset
        Weibull Wind Climate of same spatial extent as the input bwc
    """

    # Not implemtened yet # TODO: implement
    if include_met_fields is not None:
        raise NotImplementedError(
            "The include_met_fields argument is not implemented yet."
        )

    m1 = _mean_ws_moment(bwc, 1.0, bysector=True)
    m3 = _mean_ws_moment(bwc, 3.0, bysector=True)
    fgtm = _ws_freq_gt_mean(bwc, bysector=True)

    shape, scale = weibull.fit_weibull_wasp_m1_m3_fgtm(m1, m3, fgtm, atol=atol)

    wb = bwc[["wdfreq"]].astype("float64").copy()
    wb["A"] = shape
    wb["k"] = scale

    wb = _update_var_attrs(wb, _WEIB_ATTRS)

    return _update_history(wb)



def _mean_wind_speed(bwc, bysector=False):
    """Calculate the mean wind speed.

    Parameters
    ----------
    bwc: xarray.Dataset
        Binned wind climate xr.Dataset object.
    bysector: bool
        Return sectorwise mean wind speed if True. Defaults to False.

    Returns
    -------
    bwc : xarray.DataArray
        Mean wind speed of the bwc.
    """
    ds = _mean_ws_moment(bwc, 1.0, bysector)
    return _update_history(ds)


def _mean_power_density(bwc, bysector=False, air_density=1.225):
    """Calculate the power density

    Calculates the power density using a standard atmosphere air density of 1.225 kg m-3

    Parameters
    ----------
    bwc: xarray.Dataset
        Binned wind climate xr.Dataset object.
    bysector: bool
        Return sectorwise mean wind speed if True. Defaults to False.
    air_dens : float
        Air density. Default set to 1.225 kg.m^-3.

    Returns
    -------
    bwc : xarray.DataArray
        Power density of the bwc.
    """
    return 1 / 2 * air_density * _mean_ws_moment(bwc, 3.0, bysector)