Source code for wind_validation.hist.metrics

from wind_validation.metrics import Metrics
import numpy as np
import xarray as xr
from scipy.spatial import distance_matrix
import functools

from windkit.wind import wind_vectors
from ..constants import *
from .stats import bwc_ws_moment
from ..metrics import error


def _bins_distance_matrix(hist, dim=None):
    """Calculate the inter-bin distances appropriate dimension
    by default, the m/s distance in U,V-space is provided
    between each bin. dim="wsbin" gives the bin-distances along
    the wind speed axis and dim="sector" gives the bin-stances
    along the direction axis

    Parameters
    ----------
    hist : xarray.DataArray
        Wind histogram
    dim : str, optional
        dimension to calculate inter-bin distances, by default None,
        which gives the U,V-space distances in meters per second.

    Returns
    -------
    numpy.array
        Matrix of inter-bin distances
    """
    if dim == None:
        wspd, wdir = xr.broadcast(
            hist.coords[DIM_WIND_SPEED], hist.coords[DIM_WIND_DIRECTION]
        )
        u, v = wind_vectors(wspd, wdir)
        pos2d = np.stack([u.values.ravel(), v.values.ravel()], axis=1)
        return distance_matrix(pos2d, pos2d)
    elif dim == DIM_WIND_SPEED:
        pos2d = np.array([(v, 0.0) for v in hist.coords[DIM_WIND_SPEED]])
        return distance_matrix(pos2d, pos2d)
    elif dim == DIM_WIND_DIRECTION:
        pos2d = np.array([(v, 0) for v in hist.coords[DIM_WIND_DIRECTION]])
        mdist = distance_matrix(pos2d, pos2d)
        mdist[mdist > 180.0] -= 360.0
        mdist = np.abs(mdist)
        return mdist


[docs]def emd(hist1, hist2, dim=None): """Calculates the Earth Movers Distance (EMD) between two xarray.DataArrays's containing pywasp wind histogram's Parameters ---------- hist1, hist2 : xarray.DataArray PyWAsP-formatted Wind Histograms dim : str or None, optional The dimension to calculate the EMD over, options are: "wsbin" and "sector" and None (the default), which calculates the 2D EMD in U,V-space If "sector" is chosen, the shortest circular distance between bins is used. Returns ------- xarray.DataArray: Earth Movers Distance the histograms """ try: import pyemd except (ModuleNotFoundError, ImportError): raise ModuleNotFoundError( "You need to install the 'pyemd' package to calculate the emd. This is most easily done with `conda install pyemd`." ) def _emd(freq1, freq2, dist_matrix): return pyemd.emd( freq1.astype(np.float64).ravel(), freq2.astype(np.float64).ravel(), dist_matrix, ) dist_matrix = _bins_distance_matrix(hist1, dim=dim) dims_agg_total = (DIM_WIND_SPEED, DIM_WIND_DIRECTION) if dim is None: dims_emd = (DIM_WIND_SPEED, DIM_WIND_DIRECTION) dims_agg_count = None elif dim == DIM_WIND_SPEED: dims_emd = (DIM_WIND_SPEED,) dims_agg_count = (DIM_WIND_DIRECTION,) elif dim == DIM_WIND_DIRECTION: dims_emd = (DIM_WIND_DIRECTION,) dims_agg_count = (DIM_WIND_SPEED,) if dims_agg_count is None: freq1 = hist1 / hist1.sum(dim=dims_agg_total) freq2 = hist2 / hist2.sum(dim=dims_agg_total) else: freq1 = hist1.sum(dim=dims_agg_count) / hist1.sum(dim=dims_agg_total) freq2 = hist2.sum(dim=dims_agg_count) / hist2.sum(dim=dims_agg_total) # TODO: this is related to hist/stats.py count_to_ws_freq_by_sector if "wsfreq" in freq1.data_vars: freq1 = freq1["wsfreq"] freq2 = freq2["wsfreq"] elif "wv_count" in freq1.data_vars: freq1 = freq1["wv_count"] freq2 = freq2["wv_count"] result = xr.apply_ufunc( _emd, freq1, freq2, dist_matrix, input_core_dims=[dims_emd, dims_emd, ["_D1", "_D2"]], output_core_dims=[[]], dask="allowed", vectorize=True, ) result.name = "euclidian_emd" return result
[docs]def euclidian_emd(hist1, hist2): """Wrapper for :py:func:`emd` Calculates the EMD over the dimension wind speed with keyword argument `dim='wsbin'`. """ return emd(hist1, hist2, dim=DIM_WIND_SPEED)
[docs]def emd_2d(hist1, hist2): """Wrapper for :py:func:`emd` Calculates the EMD over the dimension wind speed with keyword argument `dim=None`. """ return emd(hist1, hist2, dim=None)
[docs]def circular_emd(hist1, hist2): """Wrapper for :py:func:`emd` Calculates the EMD over the dimension wind speed with keyword argument `dim='sector'` """ return emd(hist1, hist2, dim=DIM_WIND_DIRECTION)
[docs]class HistogramMetrics(Metrics): """Stores specific histogram metrics function mappings""" SUITE_MAPPER = { "none": [], "basic": [ ((VAR_WIND_SPEED, "mean"), "me"), ((VAR_WIND_SPEED, "mean"), "mae"), ((VAR_WIND_SPEED, "mean"), "mpe"), ((VAR_WIND_SPEED, "mean"), "mape"), ((VAR_POWER_DENS, "mean"), "me"), ((VAR_POWER_DENS, "mean"), "mae"), ((VAR_POWER_DENS, "mean"), "mpe"), ((VAR_POWER_DENS, "mean"), "mape"), ((VAR_AIR_DENS, "mean"), "mpe"), ], "all": [ ((VAR_WIND_SPEED, "mean"), "me"), ((VAR_WIND_SPEED, "mean"), "mae"), ((VAR_WIND_SPEED, "mean"), "mpe"), ((VAR_WIND_SPEED, "mean"), "mape"), ((VAR_POWER_DENS, "mean"), "me"), ((VAR_POWER_DENS, "mean"), "mae"), ((VAR_POWER_DENS, "mean"), "mpe"), ((VAR_POWER_DENS, "mean"), "mape"), ((VAR_AIR_DENS, "mean"), "mpe"), (VAR_WV_COUNT, "emd"), (VAR_WV_COUNT, "eemd"), (VAR_WV_COUNT, "cemd"), ], } FUNC_MAPPER = { "me": functools.partial(error, percent=False, abs=False), "mpe": functools.partial(error, percent=True, abs=False), "mae": functools.partial(error, percent=False, abs=True), "mape": functools.partial(error, percent=True, abs=True), "eemd": euclidian_emd, "cemd": circular_emd, "emd": emd_2d, } @classmethod def _get_suite_mapper(self) -> dict: return self.SUITE_MAPPER @classmethod def _get_func_mapper(self) -> dict: return self.FUNC_MAPPER