Source code for windkit.wind_climate.binned_wind_climate

# (c) 2022 DTU Wind Energy
"""Binned wind climate module

When measuring the wind speed and wind direction over a time period, one can
create a histogram by counting the frequency of occurence for each
wind speed and direction bin.

Because there can be large differences in the wind climate when the wind is
coming from different wind directions, a binned wind distribution is usually
specified per wind direction sector.

A valid Weibull wind climate therefore has a dimension ``sector`` and the variables
``wsbin`` and ``wdfreq``. Also it must have a valid spatial structure. This module contains
functions that operate on and create binned wind climates.
This includes the ability to create bwc datasets both from files and from
existing data, the ability to calculate common parameters from the bwc object,
and the ability to write them to the legacy *.tab* format.
"""

__all__ = [
    "validate_bwc",
    "is_bwc",
    "create_bwc",
    "read_bwc",
    "bwc_from_tswc",
    "bwc_to_file",
    "combine_bwcs",
    "weibull_fit",
]

import logging
import warnings
from functools import wraps
from pathlib import Path

import numpy as np
import pandas as pd
import scipy.stats
import xarray as xr
from lxml import etree

from windkit import weibull
from windkit.xarray_structures._validate import (
    _create_is_obj_function,
    _create_validation_wrapper_factory,
    _create_validator,
)
from windkit.xarray_structures.empty import (
    _copy_chunks,
    _define_std_arrays,
    _empty_unstack,
)
from windkit.xarray_structures.metadata import (
    _BWC_ATTRS,
    _HIS_ATTRS,
    _WEIB_ATTRS,
    _WSBIN_COORD_ATTRS,
    _update_history,
    _update_var_attrs,
)
from windkit.xarray_structures.sector import (
    create_sector_coords,
)
from windkit.xarray_structures.wsbin import (
    create_wsbin_coords,
)
from windkit.spatial import (
    create_dataset,
    get_crs,
    is_point,
    _spatial_stack,
    _spatial_unstack,
    to_point,
)
from windkit.utils import _infer_file_format
from windkit.wind import wd_to_sector
from windkit.wind_climate.time_series_wind_climate import _validate_tswc_wrapper_factory

logger = logging.getLogger(__name__)

SUPPORTED_BWC_FILE_FORMATS_READ = ["tab", "owc", "omwc", "nc"]
SUPPORTED_BWC_FILE_FORMATS_WRITE = ["tab"]

WS = "wind_speed"
WD = "wind_direction"
VAR_WS_FREQ = "ws_freq"
VAR_WD_FREQ = "wdfreq"
VAR_WS_FREQ_BY_SECTOR = "wsfreq"
DIM_TIME = "time"
DIM_WS = "wsbin"
DIM_WD = "sector"
WV_COUNT = "wv_count"
DATA_VAR_DICT_BWC = {"wsfreq": ["wsbin", "sector"], "wdfreq": ["sector"]}
REQ_DIMS_BWC = ["wsbin", "sector"]
REQ_COORDS_BWC = [
    "sector",
]


def _validate_freqs_greater_than_zero(wwc):
    """Helper function to validate wdfreq and wsfreq are positive"""
    response_list = []

    if "wsfreq" in wwc.data_vars and any(
        xr.where(wwc.wsfreq.values < 0.0, True, False).flatten()
    ):
        response_list.append("'wsfreq' has negative values")

    if "wdfreq" in wwc.data_vars and any(
        xr.where(wwc.wdfreq.values < 0.0, True, False).flatten()
    ):
        response_list.append("'wdfreq' has negative values")

    return response_list


validate_bwc = _create_validator(
    variables=DATA_VAR_DICT_BWC,
    dims=REQ_DIMS_BWC,
    coords=REQ_COORDS_BWC,
    extra_checks=[_validate_freqs_greater_than_zero],
)

_validate_bwc_wrapper_factory = _create_validation_wrapper_factory(validate_bwc)

is_bwc = _create_is_obj_function(validate_bwc)


def _wv_count_wrapper(func):
    """
    Decorator to handle wind vector count format.

    Handles the case where the parameter is an xarray.DataArray with a wv_count
    format or a xarray.Dataset with a 'wv_count' data variable, i.e. has wind
    counts by bin and sector. If so, returns a binned wind climate xarray.Dataset.
    If it is not the case, it does nothing.
    """

    @wraps(func)
    def wv_count_to_bwc(*args, **kwargs):
        obj = args[0]
        try:
            # handles the case of a xarray.Dataset
            obj = obj["wv_count"]
        except KeyError:
            pass
        if isinstance(obj, xr.DataArray) and obj.name == "wv_count":
            bwc = _bwc_from_counts(obj)
            result = func(bwc, *args[1:], **kwargs)
        else:
            result = func(*args, **kwargs)
        return result

    return wv_count_to_bwc


def _freqs_to_dataset(
    wsfreq,
    wdfreq,
    wsbins,
    south_north,
    west_east,
    height,
    crs,
    **kwargs,
):
    """
    Makes data variables, coordinates, and attributes ready for
    xarray.Dataset construction from a histogram of wind speeds and directions

    Parameters
    ----------
    wsfreq: np.array([n_wsbins, n_sectors]): float64
        Wind speed frequency by wind speed and sector
    wdfreq: np.array([n_sectors]): float64
        Wind direction frequency by sector
    wsbins: np.array([n_wsbins + 1]): float64
        Edges of wind speed bins
    south_north: float64
        Coordinate value in y-direction
    west_east: float64
        Coordinate value in x-direction
    height: float64
        Height above ground
    crs : int, dict, str or pyproj.crs.CRS
            Value to initialize `pyproj.crs.CRS`
    kwargs : dict
        Other kwargs are added as attributes to the dataset

    Returns
    -------
    xarray.Dataset
    """

    wsbins = np.asarray(wsbins, dtype=np.float64)

    _, n_sectors = wsfreq.shape

    wscenters = create_wsbin_coords(wsbins)
    wdcenters = create_sector_coords(n_sectors)

    na = np.newaxis

    wdfreq /= np.sum(wdfreq)
    with np.errstate(all="ignore"):
        wsfreq = wsfreq / np.sum(wsfreq, axis=0)[na, :]
    if np.isnan(wsfreq).any():
        logging.debug(
            "There are sectors with no wind observations (nan), which will be set to 0.0."
        )
        wsfreq = np.nan_to_num(wsfreq)

    # Build dataset
    ds = create_dataset(west_east, south_north, height, crs).drop_vars("output")
    ds["wdfreq"] = (("sector", "point"), wdfreq[:, na])
    ds["wsfreq"] = (("wsbin", "sector", "point"), wsfreq[:, :, na])

    ds = ds.assign_coords(
        {
            **wscenters.coords,
            **wdcenters.coords,
        }
    )

    ds = ds.assign_attrs(kwargs)

    ds = _update_var_attrs(ds, _BWC_ATTRS)
    return _update_history(ds)


[docs] def create_bwc(output_locs, n_sectors=12, n_wsbins=30, not_empty=True, seed=9876538): """ Create empty binned wind climate dataset. If not_empty=True, the data variables are filled with meaninful random numbers, e.g. the sum of wdfreq is 1. Parameters ---------- output_loc : xarray.Dataset Output geospatial information. n_sectors : int Number of sectors, defaults to 12. n_wsbins: int Number of histogram bins, defaults to 30. not_empty : bool If true, the empty dataset is filled with random meaningful data. seed : int Seed for the random data, defaults to 9876538. Returns ------- ds : xarray.Dataset Binned wind climate dataset either empty or filled with random numbers. """ da_dict, unstack_attrs, is_scalar = _define_std_arrays(output_locs, n_sectors) ds = xr.Dataset( {"wdfreq": da_dict["da_4d"], "wsfreq": da_dict["da_4d"]}, attrs=unstack_attrs ) wsbin_coords = create_wsbin_coords(n_wsbins, width=1.0) ds["wsfreq"] = ds["wsfreq"].expand_dims({"wsbin": wsbin_coords.values}) ds = ds.assign_coords({**wsbin_coords.coords}) n_pt = len(ds["point"]) if not_empty: wsbin_n = np.linspace(1, n_wsbins, n_wsbins) rng = np.random.default_rng(seed) wsbin_full = wsbin_n.repeat(n_sectors * n_pt).reshape( (n_wsbins, n_sectors, n_pt) ) k = rng.uniform(1.5, 2.5, [n_sectors, n_pt]) A = rng.uniform(5, 10, [n_sectors, n_pt]) wsbin_freq_not1 = scipy.stats.weibull_min.pdf(wsbin_full, k, scale=A) wsbin_freq = wsbin_freq_not1 / wsbin_freq_not1.sum(0) ds["wsfreq"] = xr.DataArray(wsbin_freq, ds["wsfreq"].coords, ds["wsfreq"].sizes) ds["wdfreq"] = xr.DataArray( rng.dirichlet(np.ones(n_sectors), n_pt).T, ds["wdfreq"].coords, ds["wdfreq"].sizes, ) ustack_ds = _empty_unstack(ds, is_scalar) ds = _update_var_attrs(_copy_chunks(output_locs, ustack_ds), _BWC_ATTRS) return _update_history(ds)
def _create_wv_count( output_locs, n_sectors=12, n_wsbins=30, not_empty=True, seed=9876538 ): """ Create empty wind vector count dataset. If not_empty=True, the data variables are filled with meaninful random numbers. Parameters ---------- output_loc : xarray.Dataset Output geospatial information n_sectors : int Number of sectors, defaults to 12. not_empty : bool If true, the empty dataset is filled with random meaningful data, defaults to True. seed : int Seed for the random data, defaults to 9876538. Returns ------- ds : xarray.Dataset Wind vector count dataset either empty or filled with random numbers. """ da_dict, unstack_attrs, is_scalar = _define_std_arrays(output_locs, n_sectors) ds = xr.Dataset({"wv_count": da_dict["da_4d"]}, attrs=unstack_attrs) wsbin_coords = create_wsbin_coords(n_wsbins, width=1.0) ds["wv_count"] = ds["wv_count"].expand_dims({"wsbin": wsbin_coords.values}) ds = ds.assign_coords({**wsbin_coords.coords}) n_pt = len(ds["point"]) if not_empty: rng = np.random.default_rng(seed) k = rng.uniform(1.5, 2.5, [n_sectors, n_pt]) A = rng.uniform(5, 10, [n_sectors, n_pt]) wsbin_n = np.linspace(1, n_wsbins, n_wsbins) wsbin_full = wsbin_n.repeat(n_sectors * n_pt).reshape( (n_wsbins, n_sectors, n_pt) ) scaling_fact = rng.uniform(500, 1000, [n_sectors, n_pt]) wv_count_array = np.rint( np.multiply( scipy.stats.weibull_min.pdf(wsbin_full, k, scale=A), scaling_fact[np.newaxis, :, :], ) ) ds["wv_count"].values = wv_count_array ustack_ds = _empty_unstack(ds, is_scalar) ds = _update_var_attrs(_copy_chunks(output_locs, ustack_ds), _HIS_ATTRS) return _update_history(ds)
[docs] def read_bwc(filename, *, crs=None, file_format="infer", **kwargs): """Creates binned wind climate xarray.Dataset from file. Parameters ---------- filename : str or Path Path to a file that can be opened as a bwc. This includes .tab, .owc, .omwc, and .nc that were created as bwc files. The script will use the file extension to determine the file type and then parse it into a bwc DataSet object. crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Defaults to 4326 (Lat-Lon/WGS84 geodetic datum) for .tab, .owc and .omwc files. file_format : str The file format of the input file. If 'infer', the file format will be inferred from the file extension. Supported formats are 'tab', 'owc', and 'omwc'. Defaults to 'infer'. **kwargs : dict Additional arguments to be passed to the file reader. Returns ------- ds : xarray.Dataset binned wind climate dataset that is formatted to match the bwc description. Raises ------ ValueError If crs does not match dataset crs. ValueError If type of bwc is undetectable. """ if file_format == "infer": file_format = _infer_file_format(filename) if file_format == "tab": if crs is None: crs = 4326 bwc = _open_tabfile(filename, crs, **kwargs) elif file_format in ["owc", "omwc"]: if crs is None: crs = 4326 bwc = _open_owcfile(str(filename), crs, **kwargs) elif file_format == "nc": bwc = xr.open_daaset(filename, **kwargs) else: raise ValueError( f"Unable to detect type of bwc file {filename} with extension {file_format}" ) validate_bwc(bwc, run_extra_checks=False) bwc = _update_var_attrs(bwc, _BWC_ATTRS) return _update_history(bwc)
[docs] def combine_bwcs(bwc_list): """Combines a list of bwc's into one binned wind climate. .. note:: The output is always an object with a point structure Parameters ---------- bwc_list: list List of binned wind climate xarray.Dataset. Returns ------- bwcs: xarray.Dataset xarray Dataset with merged binned wind climates. """ max_bins = max([bwc.sizes["wsbin"] for bwc in bwc_list]) filled_bwcs = [] for bwc in bwc_list: if not is_point(bwc): bwc = to_point(bwc) filled_bwcs.append(_fill_wsbin(bwc, max_bins)) bwcs = xr.concat(filled_bwcs, "point") return _update_history(bwcs)
def _open_tabfile(tab_file, crs=4326): """Creates bwc object from a "tab" ascii file Parameters ---------- tab_file: str Path to file tab file crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Defaults to 4326 (Lat-Lon/WGS84 geodetic datum) Returns ------- xr.DataSet xarray DataSet that is formatted to match the bwc description Raises ------ ValueError If .tab file does not have appropriate string formatting """ def _read_floats(fobj): return map(float, fobj.readline().split()) def _load_tabfile(tab_file, encode): with open(tab_file, encoding=encode) as fobj: header = fobj.readline().strip() south_north, west_east, height = _read_floats(fobj) tuple_header = tuple(_read_floats(fobj)) if len(tuple_header) == 4: n_sectors, _, _, tabfile_type = tuple_header if tabfile_type != 0: raise ValueError( "Tabfiles of type {0} are not supported".format(tabfile_type) ) else: n_sectors, _, _ = tuple_header n_sectors = int(n_sectors) wd_freq = np.array(list(_read_floats(fobj))) wsdata = np.genfromtxt(fobj) kwargs = { "south_north": south_north, "west_east": west_east, "height": height, "description": header, "crs": crs, } return kwargs, wd_freq, wsdata try: kwargs, wd_freq, wsdata = _load_tabfile(tab_file, "ascii") except (UnicodeDecodeError, UnicodeError): try: kwargs, wd_freq, wsdata = _load_tabfile(tab_file, "utf_8") except (UnicodeDecodeError, UnicodeError): try: kwargs, wd_freq, wsdata = _load_tabfile(tab_file, "utf_16") except (UnicodeDecodeError, UnicodeError): try: kwargs, wd_freq, wsdata = _load_tabfile(tab_file, "cp1256") except (UnicodeDecodeError, UnicodeError): raise ValueError( "Unknown encoding for .tab file, please save as ASCII or UTF-8" ) ws_freq = wsdata[:, 1:] ws_bins = np.append(0, wsdata[:, 0]) return _freqs_to_dataset(ws_freq, wd_freq, ws_bins, **kwargs) def _parse_owc(owc, crs=4326): """ Parses an OWC file into a bwc object Parameters ---------- owc : xml tree An XML element loaded by lxml crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Defaults to 4326 (Lat-Lon/WGS84 geodetic datum) Returns ------- xr.DataSet xarray DataSet that is formatted to match the bwc description """ # Get main dimensions num_sec = int(owc.attrib["CountOfSectors"]) num_wsbins = int( owc.xpath("ObservedWind[1]/SpeedFrequencyDistribution" + "/@NumberOfBins")[0] ) # Create arrays ws_freq = np.zeros((num_wsbins, num_sec)) wd_freq = np.zeros(num_sec) cen_angle = np.zeros(num_sec) ws_bins = np.zeros(num_wsbins) # Get site information site_info = owc.xpath("RveaAnemometerSiteDetails")[0].attrib lat = float(site_info["LatitudeDegrees"]) lon = float(site_info["LongitudeDegrees"]) height = float(site_info["HeightAGL"]) header = site_info["Description"] # Get wind speed histogram for obsWind in owc.xpath("ObservedWind"): # Get sector information sec = int(obsWind.attrib["Index"]) - 1 cen_angle[sec] = float(obsWind.attrib["SectorWidthDegrees"]) wd_freq[sec] = float(obsWind.attrib["SectorFrequency"]) # Get wind speed histogram for wsBin, ws in enumerate(obsWind.getchildren()[0]): ws_bins[wsBin] = float(ws.attrib["UpperSpeedBound"]) ws_freq[wsBin, sec] = float(ws.attrib["Frequency"]) # Extract 1st column which is wind speeds and # add a 0.0 value to the first position ws_bins = np.insert(ws_bins, 0, 0.0) kwargs = { "south_north": lat, "west_east": lon, "height": height, "description": header, "crs": crs, } return _freqs_to_dataset(ws_freq, wd_freq, ws_bins, **kwargs) def _open_owcfile(xml_file, crs=4326): """Creates bwc object from a .owc xml file Parameters ---------- n: xml_file Path to xml file crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Defaults to 4326 (Lat-Lon/WGS84 geodetic datum) Returns ------- bwc """ tree = etree.parse(str(xml_file)) root = tree.getroot() return _parse_owc(root, crs) def _count_to_wd_freq(wind_hist): """Convert wind vector count histogram to wind sector-wise relative frequency. Parameters ---------- wind_hist : xarray.DataArray A valid pywasp wind-vector-count histogram DataArray Returns ------- relfreq : xarray.DataArray Data array with sectorwise relative frequencies. """ relfreq = wind_hist.sum(dim=[DIM_WS], skipna=False) / wind_hist.sum( dim=[DIM_WS] + [DIM_WD], skipna=False ) relfreq.name = VAR_WD_FREQ return _update_var_attrs(relfreq, _BWC_ATTRS) def _count_to_ws_freq_by_sector(wind_hist): """DEPRECATED, use windkit.bwc_from_counts instead""" warnings.warn( "The 'count_to_ws_freq_by_sector' function is renamed as 'bwc_from_counts' and will be removed in a future version. " + "Use 'bwc_from_counts' instead.", FutureWarning, stacklevel=2, ) return _bwc_from_counts(wind_hist) def _bwc_from_counts(wind_hist): """Convert wind vector count histogram to binned wind climate dataset. Parameters ---------- wind_hist : xarray.DataArray A valid pywasp wind-vector-count histogram DataArray Returns ------- relfreq : xarray.Dataset Dataset with wind speed frequencies normalized in each sector and the rel. frequency of occurance of wind direction. """ ds = xr.Dataset() ds[VAR_WS_FREQ_BY_SECTOR] = wind_hist / wind_hist.sum(dim=DIM_WS, skipna=False) ds[VAR_WD_FREQ] = _count_to_wd_freq(wind_hist) ds.attrs.update(wind_hist.attrs) # replace nan with 0 ds = ds.fillna(0) ds = _update_var_attrs(ds, _BWC_ATTRS) return _update_history(ds)
[docs] @_validate_tswc_wrapper_factory(run_extra_checks=False) def bwc_from_tswc( ts, wsbin_width=1.0, n_wsbins=30, n_sectors=12, normalize=True, **kwargs ): """Creates object from a timeseries. Parameters ---------- ts : xarray.Dataset Xarray dataset with variables 'wind_speed' and 'wind_direction' and with a coordinate and dimension 'time'. wsbin_width: float Width of the wind speed bins, defaults to 1.0. n_wsbins : int Number of wind speed bins, defaults to 30. n_sectors : int Number of sectors, defaults to 12. normalize : bool If set to True, convert wind vector count histogram to wind sector-wise relative frequency. Defaults to True kwargs: dict Additional argument 'wasp_desription' can be added to the dataset. Returns ------- bwc : xarray.Dataset Binned wind climate from the timeseries. """ def point_histogram2d(ws, wd, bins): hist, _, _ = np.histogram2d(ws, wd, bins=bins) hist_no_nan = np.nan_to_num(hist) return hist_no_nan # stack windkit dataset to point ds = _spatial_stack(ts) ds_attrs = { "description": kwargs.get("wasp_desription", ""), **_create_time_attributes(ds), } # ignore times when any of the time series is not available ds = ds.dropna(dim=DIM_TIME) if ds.sizes["time"] < 1: raise ValueError("Need at least 1 sample to create a histogram") ws_bins = create_wsbin_coords(n_wsbins, width=wsbin_width) wd_bins = create_sector_coords(n_sectors) ws_bin_edges = np.append(0.0, ws_bins["wsceil"].values) # convert wdir to sectors # wd, wd_sec = dir_to_sec(ds[WD], n_sectors) ( wd, _, ) = wd_to_sector(ds[WD], n_sectors, output_type="indices") ds["wd_bins"] = (ds[WD].dims, wd.data) bins = (ws_bin_edges, np.arange(n_sectors + 1)) hist = xr.apply_ufunc( point_histogram2d, ds["wind_speed"], ds["wd_bins"], dask="allowed", vectorize=True, input_core_dims=[["time"], ["time"]], output_core_dims=[["wsbin", "sector"]], kwargs={"bins": bins}, keep_attrs=True, ) hist.name = WV_COUNT hist = hist.assign_coords({**ws_bins.coords, **wd_bins.coords}) if normalize: hist = _bwc_from_counts(hist) else: hist = _update_var_attrs(hist.to_dataset(), _BWC_ATTRS) if "_pwio_data_is_2d" in ds["wind_speed"].attrs: for var in hist.data_vars: hist[var].attrs["_pwio_data_is_2d"] = ds["wind_speed"].attrs[ "_pwio_data_is_2d" ] if "_pwio_data_is_2d" in hist.attrs: del hist.attrs["_pwio_data_is_2d"] # the _pwio_was_stacked_point attribute is lost with apply_ufunc hist.attrs.update(ds.attrs) hist = hist.assign_attrs(ds_attrs) hist = _spatial_unstack(hist) return _update_var_attrs(_spatial_unstack(hist), _BWC_ATTRS)
@_validate_tswc_wrapper_factory(run_extra_checks=False) def _create_time_attributes(ds: xr.Dataset, hist=None): """Create time attributes for binned wind climate. We attached the time attributes to a new or existing binned wind climate. If it has existing attributes, these will be used as well when calculating the meta data. Parameters ---------- ds : xarray.Dataset Xarray dataset with variables 'wind_speed' and 'wind_direction' and with a coordinate and dimension 'time'. hist : xarray.Dataset, optional A valid pywasp histogram dataset with, defaults to None Returns ------- dic_result: dict Dictionary with 'start_time', 'end_time', 'interval', 'count', 'count_missing', 'recovering_percentage' """ # find the first and last time in the time series we process first_time = pd.Timestamp(ds["time"].min().values) last_time = pd.Timestamp(ds["time"].max().values) number_of_samples = int(ds.dropna(dim="time").sizes["time"]) # find most frequent interval in minutes # currently when you provide an existing histogram # as input it will just overwrite the sampling interval # and recovery percentage. Since we only process model # data this way, it is probably OK for now. intervals, counts = np.unique( ds["time"].diff("time") / pd.Timedelta("1 ms"), return_counts=True ) interval = intervals[counts.argmax()] series_start_to_end = np.arange( first_time, last_time, pd.Timedelta(f"{interval} ms") ) # arange does not include the last step but we want # this for the max samples max_possible_samples = int(series_start_to_end.size + 1) # check for existing first and last time in input # it is assumed that this histogram has also been created # with this function and the attributes should be present try: first_time = np.min([first_time, pd.to_datetime(hist.attrs["start_time"])]) last_time = np.max([last_time, pd.to_datetime(hist.attrs["end_time"])]) max_possible_samples = max_possible_samples + hist.attrs["count_expected"] number_of_samples = number_of_samples + hist.attrs["count"] except (KeyError, AttributeError) as e: # noqa logging.info("No attributes present yet, adding new ones.") dic_result = { "start_time": str(first_time), "end_time": str(last_time), "interval": str(pd.Timedelta(f"{interval} ms")), "count_expected": max_possible_samples, "count": number_of_samples, "count_missing": max_possible_samples - number_of_samples, "recovery_percentage": 100 * number_of_samples / max_possible_samples, } return dic_result def _tab_string(bwc): """Returns string representation of bwc dataset Parameters ---------- bwc: xr.Dataset Binned wind climate xr.Dataset object Returns ------- string: str String representation of bwc dataset """ def _to_string(node): nwsbin = node.sizes["wsbin"] n_sectors = node.sizes["sector"] le = "\n" header_str = node.attrs.get("description", "No Header") string = header_str + le string += ( "\t".join( map( str, [ node["south_north"].values, node["west_east"].values, node["height"].values, ], ) ) + le ) string += "\t".join(map(str, [n_sectors, 1.0, 0.0])) + le string += " \t" + "\t".join("%6.2f" % f for f in node.wdfreq * 100.0) + le wsfreq = np.round(node.wsfreq.values * 1000.0, 3) for iws in range(nwsbin): string += "%4.1f" % node.wsceil.values[iws] + "\t" string += "\t".join("%6.2f" % f for f in wsfreq[iws, :]) if iws < nwsbin - 1: string += le return string return _to_string(bwc.squeeze()) @_wv_count_wrapper def _to_tabfile(bwc, /, path=None): """Write bwc to tab-style ascii file. Parameters ---------- bwc: xr.Dataset Binned wind climate xr.Dataset object. path: str or pathlib.Path dir or file path to write the file. Default value set to the current working directory. """ def _write(node, fpath): with open(fpath, "w", newline="\r\n") as fobj: fobj.write(_tab_string(node)) def _fmt_single_point_filename(ds): single_point_coords = ["height", "south_north", "west_east"] vals = [] for coord in single_point_coords: vals.append(ds[coord].values.flatten()[0]) filename = f"bwc_height{vals[0]}_south_north{vals[1]}_west_east{vals[2]}.tab" return filename if not get_crs(bwc).is_geographic: raise RuntimeError( "Binned wind climate dataset with projected coordinate systems cannot be written to a '.tab' file." ) if path is None: path = Path.cwd() path = Path(path) if path.suffix == "": # it is a Directory path.mkdir(parents=True, exist_ok=True) # If dataset has no extra dimensions (larger than size=1): # write file and return early. if bwc.squeeze().wsfreq.ndim == 2: if Path(path).is_dir(): # fpath = path / "bwc.tab" fpath = path / _fmt_single_point_filename(bwc) else: path.parent.mkdir(parents=True, exist_ok=True) fpath = path _write(bwc.squeeze(), fpath) return # If dataset has extra dimensions (of size > 1): # Stack extra dimensions, loop over them, and write to tab files # Using file_name that contains coordinate information. dims_extra = [d for d in bwc.wsfreq.dims if d not in ["wsbin", "sector"]] stacked = bwc.stack(point=dims_extra) # Create file_name format string if Path(path).is_dir(): file_name_fmt = ( "_".join(["bwc"] + [f"{d}" + "{" + f"{d}" + "}" for d in dims_extra]) + ".tab" ) else: raise ValueError( "'path' argument is a filename, but the dataset has more than one point." " Try giving a directory as an argument." ) # Loop and write to tab files for ipt in range(stacked.sizes["point"]): node = stacked.isel(point=slice(ipt, ipt + 1)).reset_index("point").squeeze() kw = {d: node[d].values for d in dims_extra} fpath = path / file_name_fmt.format(**kw) _write(node, fpath) return
[docs] @_validate_bwc_wrapper_factory(run_extra_checks=False) def bwc_to_file(bwc, filename, *, file_format="infer", **kwargs): """Write binned wind climate to file. Parameters ---------- bwc: xr.Dataset Binned wind climate xr.Dataset object. filename: str or pathlib.Path Path to the file to write the binned wind climate to. file_format: str The file format of the output file. If 'infer', the file format will be inferred from the file extension. Supported formats are 'tab'. Defaults to 'infer'. **kwargs: dict Additional keyword arguments to pass to the writing function. """ if file_format == "infer": file_format = _infer_file_format(filename) if file_format == "tab": _to_tabfile(bwc, filename, **kwargs) else: raise ValueError(f"Unsupported file format: {file_format}")
@_wv_count_wrapper def _mean_ws_moment(bwc, /, n=1.0, bysector=False): """Calculate the n^th moment of the wind speed from a bwc Parameters ---------- bwc: xarray.Dataset Binned wind climate xr.Dataset object n: float Moment to compute, defaults to 1.0. bysector: bool Whether to return the sectorwise wind speed moment or the all-sector mean moment. Defaults to False. Returns ------- ws_moment: xarray.DataArray Array of wind speed moments. """ if bysector: return (bwc.wsfreq * bwc.wsbin**n).sum(dim=["wsbin"], skipna=False) return (bwc.wdfreq * bwc.wsfreq * bwc.wsbin**n).sum( dim=["wsbin", "sector"], skipna=False ) @_wv_count_wrapper def _ws_cdf(bwc, /, bysector=False): """Calculate the cumulative distribution function (CDF) of the wind speed from a bwc Parameters ---------- bwc: xarray.Dataset Binned wind climate xr.Dataset object bysector: bool Whether to return the sectorwise wind speed cdf or the all-sector mean cdf. Defaults to False which returns the all-sector mean cdf. Returns ------- ws_cdf: xarray.DataArray Array of wind speed cdf. """ if bysector: return bwc.wsfreq.cumsum(dim=["wsbin"], skipna=False) return ( (bwc.wdfreq * bwc.wsfreq) .sum(dim="sector", skipna=False) .cumsum(dim=["wsbin"], skipna=False) ) @_wv_count_wrapper def _ws_freq_gt_mean(bwc, /, bysector=False): """Calculate the frequency of wind speeds greater than the mean wind speed. Parameters ---------- bwc: xarray.Dataset Binned wind climate xr.Dataset object bysector: bool Whether to return the sectorwise wind speed cdf or the all-sector mean cdf. Defaults to False which returns the all-sector value. Returns ------- freq_gt_mean: xarray.DataArray Array of fraction greater than the mean wind speed in the histograms """ # check for dimensions with no coordinates and temporarily add them # to the dataset to avoid problems with xarrays .interp method. # This is due to an issue in xarray when we have dimensions # with no coordiatnes on both the dataset and interpolation coordinates. # We should make an issue on xarray to fix this and remove this workaround afterward. dims_no_coord = [d for d in bwc.dims if d not in bwc.coords] bwc = bwc.assign_coords({d: (d, np.arange(bwc[d].size)) for d in dims_no_coord}) # We set the wsceil as the coordinate because we want to interpolate # between bin edges. cdf = _ws_cdf(bwc, bysector=bysector).set_index({"wsbin": "wsceil"}) mean = _mean_wind_speed(bwc, bysector=bysector) fgtm = 1.0 - cdf.interp(wsbin=mean, method="linear") # If the mean wind speed is below the first bin ceiling or above the last, we get nan values back # We set these to 0.5, which is the value we would get if the mean wind speed was exactly in the middle fgtm = fgtm.fillna(0.5) # Remove variables related to the wind speed bins if present fgtm = fgtm.drop_vars([v for v in ["wsbin", "wsfloor"] if v in fgtm.coords]) # remove temporary dimensions fgtm = fgtm.drop_vars(dims_no_coord) return fgtm def _fill_wsbin(bwc, /, n_wsbins): """Expands a binned wind climate to n_wsbins number of bins This is a useful feature when you want to combine several binnen wind climates in one dataset, because that requires that they have the same dimensions. The last two bins of the histogram are used to extrapolate the spacing. Up to this point the bins can also be irregular. Parameters ---------- bwc: :any:`windkit.bwc` Binned wind climate n_wsbins: int Number of wind speeds bins Returns ------- filled_bwc: :any:`windkit.bwc` Binned wind climate with n_wsbins number of bins """ extension_bin = -2 # index of second last bin of histogram owsbin = bwc.sizes["wsbin"] # get spacing between last two bins dws = np.diff(bwc.wsbin[extension_bin:]) newbins = np.zeros(n_wsbins) # create new array with desired length newbins[0:owsbin] = bwc.wsbin.values # set old bins newbins[owsbin:n_wsbins] = newbins[owsbin - 1] + dws * np.arange( 1, n_wsbins - owsbin + 1 ) # wsceil = np.append( bwc.wsceil, bwc.wsceil[[-1]] + dws * np.arange(1, n_wsbins - owsbin + 1) ) wsfloor = np.append( bwc.wsfloor, bwc.wsfloor[[-1]] + dws * np.arange(1, n_wsbins - owsbin + 1) ) filled_bwc = bwc.interp(wsbin=newbins, kwargs={"fill_value": 0.0}) filled_bwc["wsceil"] = (("wsbin"), wsceil, _WSBIN_COORD_ATTRS["wsceil"]) filled_bwc["wsfloor"] = (("wsbin"), wsfloor, _WSBIN_COORD_ATTRS["wsfloor"]) return filled_bwc
[docs] @_validate_bwc_wrapper_factory(run_extra_checks=False) def weibull_fit(bwc, include_met_fields=None, atol=1e-8): """ Returns sectorwise Weibull parameters using WAsP's fitting algorithm. Parameters ---------- bwc : xarray.Dataset Binned wind climate xr.Dataset object include_met_fields : bool Whether to include met fields in the output. Defaults to False. atol : float Absolute tolerance for the weibull-fitting algorithm. Defaults to 1e-6. Returns ------- xarray.Dataset Weibull Wind Climate of same spatial extent as the input bwc """ # Not implemtened yet # TODO: implement if include_met_fields is not None: raise NotImplementedError( "The include_met_fields argument is not implemented yet." ) m1 = _mean_ws_moment(bwc, 1.0, bysector=True) m3 = _mean_ws_moment(bwc, 3.0, bysector=True) fgtm = _ws_freq_gt_mean(bwc, bysector=True) shape, scale = weibull.fit_weibull_wasp_m1_m3_fgtm(m1, m3, fgtm, atol=atol) wb = bwc[["wdfreq"]].astype("float64").copy() wb["A"] = shape wb["k"] = scale wb = _update_var_attrs(wb, _WEIB_ATTRS) return _update_history(wb)
def _mean_wind_speed(bwc, bysector=False): """Calculate the mean wind speed. Parameters ---------- bwc: xarray.Dataset Binned wind climate xr.Dataset object. bysector: bool Return sectorwise mean wind speed if True. Defaults to False. Returns ------- bwc : xarray.DataArray Mean wind speed of the bwc. """ ds = _mean_ws_moment(bwc, 1.0, bysector) return _update_history(ds) def _mean_power_density(bwc, bysector=False, air_density=1.225): """Calculate the power density Calculates the power density using a standard atmosphere air density of 1.225 kg m-3 Parameters ---------- bwc: xarray.Dataset Binned wind climate xr.Dataset object. bysector: bool Return sectorwise mean wind speed if True. Defaults to False. air_dens : float Air density. Default set to 1.225 kg.m^-3. Returns ------- bwc : xarray.DataArray Power density of the bwc. """ return 1 / 2 * air_density * _mean_ws_moment(bwc, 3.0, bysector)