Source code for windkit.ltc.scores

# (c) 2023 DTU Wind Energy
"""
Functions to calculate common scoring metrics to be used on windkit
time series wind climate datasets.
"""
import numpy as np
import pandas as pd

from ._validation import ltc_validate


# @ltc_validate
[docs] @ltc_validate(num_args=2) def rmse(ds_tgt, ds_pred): """ Calculates the root mean square error for the wind speed of two time series wind climate datasets. The implementation is inspired by sklearn.metrics.root_mean_squared_error source code. Parameters ---------- ds_tgt : xarray.Dataset target time series wind climate dataset ds_pred : xarray.Dataset predicted time series wind climate dataset Returns ------- rmse: float root mean square error of the two wind speeds """ y_tgt = ds_tgt.wind_speed.values.flatten() y_pred = ds_pred.wind_speed.values.flatten() return np.sqrt( np.average( (y_tgt - y_pred) ** 2, axis=0, ) )
# @ltc_validate
[docs] @ltc_validate(num_args=2) def r2(ds_tgt, ds_pred): """ Calculates the r2 regression score for the wind speed of two time series wind climate datasets. The implementation is inspired by sklearn.metrics.r2_score source code. Parameters ---------- ds_tgt : xarray.Dataset target time series wind climate dataset ds_pred : xarray.Dataset predicted time series wind climate dataset Returns ------- r2: float r2 score of the two wind speeds """ y_tgt = ds_tgt.wind_speed.values.flatten() y_pred = ds_pred.wind_speed.values.flatten() numerator = ((y_tgt - y_pred) ** 2).sum(axis=0, dtype=np.float64) denominator = ((y_tgt - np.average(y_tgt, axis=0)) ** 2).sum( axis=0, dtype=np.float64 ) return 1 - numerator / denominator
# @ltc_validate
[docs] @ltc_validate(num_args=2) def bias(ds_tgt, ds_pred): """ Calculates the bias for the wind speed of two time series wind climate datasets. ---------- Parameters ---------- ds_tgt : xarray.Dataset target time series wind climate dataset ds_pred : xarray.Dataset predicted time series wind climate dataset Returns ------- bias: float bias of the two wind speeds """ y_tgt = ds_tgt.wind_speed.values.flatten() y_pred = ds_pred.wind_speed.values.flatten() return y_tgt.mean() - y_pred.mean()
# @ltc_validate
[docs] @ltc_validate(num_args=2) def wasserstein_distance(ds_tgt, ds_pred): """ Calculates the wasserstein distance for the wind speed of two time series wind climate datasets. ---------- Parameters ---------- ds_tgt : xarray.Dataset target time series wind climate dataset ds_pred : xarray.Dataset predicted time series wind climate dataset Returns ------- wasserstein_distance: float wasserstein distance of the two wind speeds """ bins = np.linspace(0.0, 20.0, 21) y_tgt = ds_tgt.wind_speed y_pred = ds_pred.wind_speed p, _ = np.histogram(y_tgt, bins=bins, density=True) q, _ = np.histogram(y_pred, bins=bins, density=True) P = np.cumsum(p) Q = np.cumsum(q) return np.sum(np.abs(P - Q))
# @ltc_validate
[docs] @ltc_validate(num_args=2) def calc_scores(ds_tgt, ds_pred, name="name", period="full"): """ Helper function that calculates r2, rmse, bias and wasserstein distance scores and generate a pandas dataframe with useful headers for reporting. Parameters ---------- ds_tgt : xarray.Dataset target time series wind climate dataset ds_pred : xarray.Dataset predicted time series wind climate dataset name: str a desired name for the case will be reported. Defaults to 'name' period: str the period for the datasets to be included in the report. Defaults to 'full'. Returns ------- df : pandas.DataFrame dataframe with colummns "Name, "Period", "Metric" and "Score" including the calculated scores. """ labels = ["R^2", "RMSE", "Mean bias", "EMD"] scoring = [r2, rmse, bias, wasserstein_distance] vals = np.array([f(ds_tgt, ds_pred) for f in scoring]) names = [name] * len(vals) periods = [period] * len(vals) df = pd.DataFrame( columns=["Name", "Period", "Metric", "Score"], data={"Name": names, "Period": periods, "Metric": labels, "Score": vals}, ) return df