"""Binned Wind Climate Module
Extends windkit Binned Wind Climate module to add a function for creating a weibull
distribution out of the binned wind climate using the WAsP method. It contains a much
faster fortran based binning routine that can be used for example in combination with
mesoscale model output, where speed is crucial.
It also contains functions for calculating stability information efficiently using
a binning algorithm which computes the conditional mean stability based on the wind
histogram in a single pass without storing the whole time series of wind speed.
"""
import logging
import numpy as np
import xarray as xr
from scipy import interpolate
from windkit.binned_wind_climate import (
create_time_attributes,
bwc_validate_wrapper,
bwc_from_counts,
)
from windkit.time_series_wind_climate import ts_validate_wrapper, WS, WD, DIM_TIME
from windkit.sector import create_sector_coords, create_ws_bin_coords
from windkit.spatial import spatial_stack, spatial_unstack
from windkit.metadata import (
_BWC_ATTRS,
_HIS_ATTRS,
_HIS_STAB_ATTRS,
_TS_STAB_ATTRS,
_MEAN_STAB_ATTRS,
_HIS_BARO_ATTRS,
_MEAN_BARO_ATTRS,
update_var_attrs,
ALL_VARS_META,
update_history,
)
from ..core import Rvea0288, Rvea0326 # pylint:disable=no-name-in-module
logger = logging.getLogger(__name__)
# fortran calls
_CREATE_HIST = Rvea0288.histograms.add_histogram_points
_CALC_TSCALE = Rvea0326.calc_points.tstar_points
_NORM_HIST = Rvea0288.histograms.normalize_histogram_points
_COND_HIST = Rvea0288.histograms.conditional_hist
_MEAN_HIST = Rvea0288.histograms.mean_from_hist
_STD_HIST = Rvea0288.histograms.std_from_hist
_RESAMPLE_HIST = Rvea0288.histograms.histogram_resample_sectors
_RESAMPLE_HIST_WSBINS = Rvea0288.histograms.histogram_resample_wsbins
_RESAMPLE_HIST_WSBINS_AK = Rvea0288.histograms.histogram_resample_wsbins_ak
# constants
RTD = 180 / np.pi
[docs]
@ts_validate_wrapper
def bwc_from_timeseries(
ds,
hist=None,
normalize=True,
revert_to_original=True,
ws_bin_width=1,
nwsbins=30,
nsecs=12,
):
"""Add timeseries to histogram
Converts a time series with wind speed & direction to derive
summed histogram of the wind vector.
.. warning::
This function is experimental and its signature may change.
Parameters
----------
ds: xr.Dataset
PyWAsP Timeseries dataset. Can have any of the pywasp
spatial data structures
hist: xr.Dataset, optional
Histogram with dimensions point, wsbin, sector
containg a count
Default is None, creating a histogram using ws_bin_width, nwsbins, and nsecs
normalize: boolean
Normalize histogram for each sector, storing the frequency per sector in "wdfreq"
variable, Default True
revert_to_original: boolean
Return the histogram input in the same spatial struction as ds? If false, keeps
the data is kept in stacked point, which is more efficient when doing large
calculations for numerical wind atlases.
ws_bin_width: float
width of wind speed bins
nwsbins: int
Number of wind speed bins
nsecs: int
Number of sectors (wind direction bins)
Returns
-------
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
with the values from ds-timeseries added.
"""
logger.debug(
f"Convert time series ({ds[WS].sizes[DIM_TIME]} samples) into histogram"
)
if hist is None:
hist = _init_histogram_wind(ds, ws_bin_width, nwsbins, nsecs)
dss = spatial_stack(ds, copy=False).transpose(..., DIM_TIME, "point")
add = np.full(dss[WS].shape, 1.0, order="F", dtype=np.float32)
if "point" not in hist.dims:
hist = spatial_stack(hist, copy=False)
hist = hist.transpose(..., "wsbin", "sector", "point")
wv_count, count_bins_exceeded = _create_hist(
dss[WS].fillna(-9999.0),
dss[WD].fillna(-9999.0),
0,
hist.wsbin,
hist.sector,
add,
hist["wv_count"].values,
hist["count_bins_exceeded"].values,
)
nr_fails = count_bins_exceeded[count_bins_exceeded > 0].shape[0]
if nr_fails > 0:
logger.warning(
f"""The wind speeds you provided
exceeded the range used for binning ({np.max(hist.wsbin.values)})
in {nr_fails} points"""
)
var_names = ("wv_count", "count_bins_exceeded")
for name in var_names:
hist[name] = (hist[name].dims, eval(name), _HIS_ATTRS[name])
update_var_attrs(hist[name], _HIS_ATTRS)
hist[name].attrs["_pwio_data_is_2d"] = False
dic_attrs = create_time_attributes(ds, hist=hist)
hist = hist.assign_attrs(dic_attrs).drop_vars("point", errors="ignore")
if normalize:
hist = _normalize_hist(hist)
if revert_to_original:
hist = spatial_unstack(hist)
hist = update_var_attrs(hist, _BWC_ATTRS)
return update_history(hist)
def _create_hist(wspd, wdir, isuv, wsbin, wdsec, add, wv_count, count_bins_exceeded):
"""creating histogram from wind speed and wind dir
Wrapper around fortran call that loops over all points
and generates 2d histograms from wind speed and direction.
If the wind speed or wind direction is outside of the range
used for binning it adds 1 to `count_bins_exceeded` for each point
Parameters
----------
wspd: array of float
Array with wind speeds dimensions (`time`, `point`)
wdir: array of float
Array with meteorological wind direction dimensions (`time`, `point`)
isuv: int {0,1}
Are the input values wind speed / wind direction (0)
or u / v (1)?
wsbin: array of float
center of wind speed bins used for creating histogram
wdsec: array of float
center of wind direction bins used for creating histogram
add: array of float
Array with values that will be added to histogram at each point
wv_count: array of float
Array with histogram with dimensions #wsbin, #wdsec, #point
count_bins_exceeded: array of int
Array with dimension point
Returns
-------
wv_count: array of float
Array with histogram with dimensions #wsbin, #wdsec, #point
count_bins_exceeded: array of int
Array with dimension point
"""
_CREATE_HIST(wspd, wdir, isuv, wsbin, wdsec, add, wv_count, count_bins_exceeded)
return (wv_count, count_bins_exceeded)
[docs]
def calc_temp_scale(ds, revert_to_original=True, enhmix=1.0, use_humidity=False):
"""Calculate temperature scale
This calculates a surface layer temperature scale that can
be used to model the wind profile. Using the enhmix can be used
to control mixing over land, because ERA5 has a large bias
in $u_*$ over land. If you supply enhmix you will have to
add a landmask because it is only applied over land. If use_humidity
is true the humidity is used for calculating the air density.
.. warning::
This function is experimental and its signature may change.
Parameters
----------
ds : xarray.Dataset
Dataset containing variables
["PSFC", "T2", "UST", "HFX", "LH", "PBLH"]
Can have any pywasp spatial structure
revert_to_original: boolean
Return the histogram input in original format from the
input :class:`xarray.Dataset`? If false, keeps the data in stacked point
format which is more efficient when doing large
calculations for numerical wind atlases.
enhmix : float
Decrease mixing over land with this factor. If different from
1.0, requires a land mask variable "LANDMASK" since it is only
applied over land.
use_humidity: bool
If True, use the humdity for calculating the air density. If
true, requires variable "elevation" (for the terrain elevation)
and "Q2" to be present on the dataset. By default False
Returns
-------
hist : xr.DataArray
DataArray 'temp_scale' with same dimensions as input
"""
dss = spatial_stack(ds, copy=revert_to_original).transpose(..., DIM_TIME, "point")
req_vars = ["PSFC", "T2", "UST", "HFX", "LH", "PBLH"]
if enhmix != 1.0:
req_vars = req_vars + ["LANDMASK"]
if use_humidity:
req_vars = req_vars + ["elevation", "Q2"]
if "LANDMASK" not in req_vars:
logger.debug("Adding dummy variable LANDMASK")
# land mask not required in dataset, so we just create a dummy array
dss["LANDMASK"] = xr.full_like(dss["point"], 0)
if "Q2" not in req_vars:
# humidity not required in dataset, so we just create a dummy array
logger.debug("Adding dummy variables Q2 and elevation")
dss["elevation"] = xr.full_like(dss["point"], 0)
dss["Q2"] = xr.full_like(dss["UST"], 0)
try:
logger.debug("Calculating temperature scale")
tstar = _CALC_TSCALE(
dss["PSFC"],
dss["T2"],
dss["UST"],
dss["HFX"],
dss["LH"],
dss["PBLH"],
dss["elevation"],
dss["Q2"],
dss["LANDMASK"],
enhmix,
int(use_humidity),
)
except KeyError as e:
raise KeyError(f"Required variable {e} not found in dataset!")
dss["temp_scale"] = (dss.UST.dims, tstar)
dss = dss[["temp_scale"]]
update_var_attrs(dss, _TS_STAB_ATTRS)
if revert_to_original:
res = spatial_unstack(dss)["temp_scale"]
else:
res = dss["temp_scale"]
return update_history(res)
[docs]
def stability_histogram(
ds,
hist=None,
finalize=True,
revert_to_original=True,
ws_bin_width=1,
nwsbins=40,
nsecs=12,
percentile=0.5,
wv_count=None,
landmask=None,
):
"""Add timeseries to existing histogram
Uses the stability parameters "temp_scale" and "ustar_over_pblh".
See the paper "Using observed and modelled heat fluxes for
improved extrapolation of wind distributions" for the definitions
of these variables. The temp_scale variable is defined as $T_*$
in that paper, whereas the "ustar_over_pblh" should be defined as
$u_*/pblh$ as input for this function. This is done to give priority
to the smallest pblh that are most important for the wind profile.
The output is reversed ($**-1$) to give the input format as required
by WAsP ($pblh/u_*$).
The time dimension is copied so that one can see what was the
period that was used to generate the histogram.
.. warning::
This function is experimental and its signature may change.
Parameters
----------
ds : xarray.Dataset
Dataset containing variables ['wind_speed', 'wind_direction',
'temp_scale', 'ustar_over_pblh']
Can have any pywasp spatial structure
hist : xarray.Dataset
Histogram with dimensions point, wsbin, sector
finalize : boolean
Convert the stabilility histogram into a mean and
standard deviation?
revert_to_original : boolean
Return the histogram input in original format from the
input ``ds``? If false, keeps the data in stacked point
format which is more efficient when doing large
calculations for numerical wind atlases.
ws_bin_width : float
width of wind speed bins
nwsbins : int
Number of wind speed bins
nsecs : int
Number of sectors (wind direction bins)
kwargs :
Other keyword arguments passed on to _finalize_meso
Returns
-------
hist : xarray.Dataset
Histogram with dimensions point, wsbin, sector
with the values from ds-timeseries added.
"""
if hist is None:
hist = _init_histogram_stab(ds, ws_bin_width, nwsbins, nsecs)
ds_sub = ds[[WS, WD, "temp_scale", "ustar_over_pblh"]]
dss = spatial_stack(ds_sub, copy=revert_to_original).transpose(
..., DIM_TIME, "point"
)
hist = spatial_stack(hist, copy=revert_to_original).transpose(
..., "wsbin", "sector", "point"
)
# error arrays are inout in fortran, so we must create copies
# to not add the error counter 3 times
ierr_sum_squared_temp_scale = hist["count_bins_exceeded"].copy().values
ierr_sum_pblh = hist["count_bins_exceeded"].copy().values
sum_temp_scale, count_bins_exceeded = _create_hist(
dss[WS],
dss[WD],
0,
hist.wsbin,
hist.sector,
dss["temp_scale"],
hist["sum_temp_scale"].values,
hist["count_bins_exceeded"].values,
)
sum_squared_temp_scale, count_bins_exceeded = _create_hist(
dss[WS],
dss[WD],
0,
hist.wsbin,
hist.sector,
dss["temp_scale"] ** 2,
hist["sum_squared_temp_scale"].values,
ierr_sum_squared_temp_scale,
)
sum_pblh, count_bins_exceeded = _create_hist(
dss[WS],
dss[WD],
0,
hist.wsbin,
hist.sector,
dss["ustar_over_pblh"],
hist["sum_pblh"].values,
ierr_sum_pblh,
)
var_names = (
"sum_temp_scale",
"sum_squared_temp_scale",
"sum_pblh",
"count_bins_exceeded",
)
for name in var_names:
hist[name] = (hist[name].dims, eval(name))
update_var_attrs(hist[name], _HIS_STAB_ATTRS)
hist[name].attrs["_pwio_data_is_2d"] = True
dic_attrs = create_time_attributes(ds_sub, hist=hist)
hist = hist.assign_attrs(dic_attrs)
if finalize:
if wv_count is None:
wv_count = bwc_from_timeseries(
dss,
normalize=False,
revert_to_original=False,
ws_bin_width=ws_bin_width,
nwsbins=nwsbins,
nsecs=nsecs,
)
else:
wv_count = bwc_from_timeseries(
dss, hist=wv_count, normalize=False, revert_to_original=False
)
res = _finalize_meso(
hist, wv_count["wv_count"], landmask, percentile=percentile
)
return update_history(res)
if revert_to_original:
res = spatial_unstack(hist)
return update_history(res)
else:
res = hist
return update_history(res)
def _init_histogram_stab(ds, ws_bin_width=1, nws=40, nwd=12):
"""Initialize histogram
Initialize histogram with the variables needed for
a stability histogram. The dimensions of the spatial
structure are the same as the input ds and extended with
dimensions nws, nwd.
Parameters
----------
ds: xr.Dataset
Can have any of the pywasp spatial data structures
var_dic: dict
Dictonairy contain the variables that will be added to
the xarray container, including meta-data according to
the CF conventions
wsbin: float
width of wind speed bins
wdsec: float
width of wind direction bins
nws: int
Number of wind speed bins
nwd: int
Number of wind direction bins
Returns
-------
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
"""
hist = spatial_stack(ds, copy=False)[["point"]]
var_names = ("sum_temp_scale", "sum_squared_temp_scale", "sum_pblh")
for var in var_names:
hist[var] = _init_histogram(
ds, var, nwd=nwd, nws=nws, ws_bin_width=ws_bin_width
)
hist["count_bins_exceeded"] = xr.zeros_like(hist["point"], dtype=np.int32)
return hist
def _init_histogram(ds, name, nwd=12, nws=None, ws_bin_width=None):
"""Initialize generic histogram
Initialize histogram with the variables needed for
a wind speed and wind direction dependent histogram. The dimensions
of the spatial structure are the same as the input ds and extended
with dimensions nws, nwd.
Parameters
----------
ds: `xarray.Dataset`
Can have any of the pywasp spatial data structures
name: str
Name of the `xarray.DataArray` that is created
nwd: int
Number of wind direction bins
nws: int
Number of wind speed bins
ws_bin_width: float
width of wind speed bins
Returns
-------
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
"""
all_dims = (spatial_stack(ds, copy=False)["point"],)
all_dims = all_dims + (create_sector_coords(nwd),)
if ws_bin_width is not None and nws is not None:
all_dims = all_dims + (create_ws_bin_coords(bin_width=ws_bin_width, nws=nws),)
template = xr.broadcast(*all_dims)
# combined point, ws_bin, wd_bin array filled with zeroes
hist = xr.zeros_like(template[0], dtype="float32")
# make sure we copy auxilary coordinates from sector and wind speed bins
for i in template[1:]:
hist = hist.assign_coords(i.coords)
hist.name = name
try:
update_var_attrs(hist, {**ALL_VARS_META})
except KeyError:
logging.warning("No metadata could be added to the variable!")
return hist
def _init_histogram_wind(ds, ws_bin_width, nws, nwd):
"""Initialize histogram
Initialize histogram with the variables needed for
a wind vector count histogram. The dimensions of the spatial
structure are the same as the input ds and extended with
dimensions nws, nwd.
Parameters
----------
ds: xr.Dataset
Can have any of the pywasp spatial data structures
ws_bin_width: float
width of wind speed bins
nws: int
Number of wind speed bins
nwd: int
Number of wind direction bins
Returns
-------
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
"""
hist = spatial_stack(ds, copy=False)[["point"]]
hist["wv_count"] = _init_histogram(
ds, "wv_count", nwd=nwd, nws=nws, ws_bin_width=ws_bin_width
)
hist["count_bins_exceeded"] = xr.zeros_like(hist["point"], dtype=np.int32)
return hist
def _stab_mean_std(hist, wv_count, percentile=0.5):
"""Compute mean or standard deviation from histogram
Compute mean or standard deviation from histogram. The
variables are binned by sector and wind speed bin and
a mean or std. dev. is calulcated based on the highest
percentile of wind speeds for each sector.
Parameters
----------
hist: xr.Dataset
Dataset with variables `sum_temp_scale`, `sum_squared_temp_scale`
and `sum_pblh` and dimensions `wsbin`, `sector`
Returns
-------
clim: xr.Dataset
Dataset with mean mesoclimate variables per sector
"""
hist["wv_count"] = wv_count
hist = spatial_stack(hist, copy=False)
cond_hist, freq = _COND_HIST(hist.sum_pblh, hist.wv_count, percentile)
pblhmean = _MEAN_HIST(cond_hist, freq)
cond_hist, freq = _COND_HIST(hist.sum_temp_scale, hist.wv_count, percentile)
cond_hist2, freq = _COND_HIST(
hist.sum_squared_temp_scale, hist.wv_count, percentile
)
tstarstd = _STD_HIST(cond_hist, cond_hist2, freq)
tstarmean = _MEAN_HIST(cond_hist, freq)
clim = hist[["point", "sector"]]
clim["mean_pblh_scale"] = (("sector", "point"), 1 / pblhmean)
clim["rms_temp_scale"] = (("sector", "point"), tstarstd)
clim["mean_temp_scale"] = (("sector", "point"), tstarmean)
return spatial_unstack(clim)
def create_histogram_z0(
ds,
hist=None,
finalize=True,
revert_to_original=True,
ws_bin_width=1,
nwsbins=40,
nsecs=12,
percentile=0.5,
wv_count=None,
landmask=None,
):
"""Add timeseries to existing histogram
Uses the stability parameters to derive
summed histogram of the surface layer temperature scale,
squared surface layer temperature scale, boundary
layer height and logarithm of roughness length. The time
dimension is copied so that one can see what was the
period that was used to generate the histogram.
.. warning::
This function is experimental and its signature may change.
Parameters
----------
ds: xr.Dataset
Dataset containing variables
['wind_speed', 'wind_direction', 'LANDMASK', 'ZNT']
Can have any pywasp spatial structure
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
finalize: boolean
Convert the stabilility histogram into a mean and
standard deviation?
revert_to_original: boolean
Return the histogram input in original format from the
input `ds`? If false, keeps the data in stacked point
format which is more efficient when doing large
calculations for numerical wind atlases.
ws_bin_width: float
width of wind speed bins
nwsbins: int
Number of wind speed bins
nsecs: int
Number of sectors (wind direction bins)
kwargs:
Other keyword arguments passed on to _finalize_meso
Returns
-------
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
with the values from ds-timeseries added.
"""
if hist is None:
hist = spatial_stack(ds, copy=False)[["point"]]
hist["z0sum"] = _init_histogram(
ds[["ZNT"]], "z0sum", nsecs, nwsbins, ws_bin_width
)
hist["count_bins_exceeded"] = xr.zeros_like(hist["point"], dtype=np.int32)
ds_sub = ds[[WS, WD, "ZNT"]]
dss = spatial_stack(ds_sub, copy=False).transpose(..., DIM_TIME, "point")
hist = spatial_stack(hist, copy=False).transpose(..., "wsbin", "sector", "point")
z0sum, count_bins_exceeded = _create_hist(
dss[WS],
dss[WD],
0,
hist.wsbin,
hist.sector,
np.log(dss["ZNT"]),
hist["z0sum"].values,
hist["count_bins_exceeded"].values,
)
hist["z0sum"] = (("wsbin", "sector", "point"), z0sum)
hist["count_bins_exceeded"] = (("point"), count_bins_exceeded)
dic_attrs = create_time_attributes(ds_sub, hist=hist)
hist = hist.assign_attrs(dic_attrs).drop_vars("point", errors="ignore")
if finalize:
if wv_count is None:
wv_count = bwc_from_timeseries(
dss,
normalize=False,
revert_to_original=False,
ws_bin_width=ws_bin_width,
nwsbins=nwsbins,
nsecs=nsecs,
)
else:
wv_count = bwc_from_timeseries(
dss, hist=wv_count, normalize=False, revert_to_original=False
)
res = _z0_mean(hist, wv_count["wv_count"], landmask, percentile=percentile)
return update_history(res)
if revert_to_original:
res = spatial_unstack(hist)
return update_history(res)
else:
return update_history(hist)
def _z0_mean(hist, wv_count, landmask, percentile=0.5):
"""Compute mean or standard deviation from histogram
Compute mean or standard deviation from histogram. The
variables are binned by sector and wind speed bin and
a mean or std. dev. is calulcated based on the highest
percentile of wind speeds for each sector.
Parameters
----------
hist: xr.Dataset
Dataset with variables `sum_temp_scale`, `sum_squared_temp_scale`
and `sum_pblh` and dimensions `wsbin`, `sector`
Returns
-------
clim: xr.Dataset
Dataset with mean mesoclimate variables per sector
"""
hist["wv_count"] = wv_count
hist["landmask"] = landmask
hist = spatial_stack(hist, copy=False)
cond_hist, wd_count = _COND_HIST(hist["z0sum"], hist["wv_count"], percentile)
z0mean = _MEAN_HIST(cond_hist, wd_count)
hist["logz0sec"] = (("sector", "point"), z0mean)
hist["wd_count"] = (("sector", "point"), wd_count)
hist["mean_z0"] = mean_z0(hist)
hist["mean_z0"] = xr.where(hist["landmask"] == 0, 0.0, hist["mean_z0"])
return spatial_unstack(hist)
def mean_z0(hist):
"""Compute mean roughness
Compute mean roughness from log(z0) per wsbin and
sector
.. warning::
This function is experimental and its signature may change.
Parameters
----------
hist: xr.Dataset
Dataset with variables `logz0sec` and `wd_count`
and dimensions `wsbin`, `sector`
Returns
-------
ds: xr.DataArray
Dataarray with mean z0 per sector
"""
mean_z0 = hist["logz0sec"] * hist["wd_count"] / hist["wd_count"].sum(dim=["sector"])
return np.exp(mean_z0.sum(dim=["sector"]))
def _normalize_hist(hist_point):
"""Convert frequency to probability-density histogram
Convert frequency to probability density histogram in the
WAsP core structure, i.e. with an additional variables
containing wind speed sector frequencies.
Parameters
----------
hist: xr.Dataset
Dataset with the frequency of occurence in each bin
Returns
-------
hist: xr.Dataset
Binned wind climate with probability density in each bin
"""
wsfreq, wdfreq = _NORM_HIST(hist_point["wv_count"])
var_names = ("wsfreq", "wdfreq")
var_dims = (("wsbin", "sector", "point"), ("sector", "point"))
for name, dims in zip(var_names, var_dims):
hist_point[name] = (dims, eval(name))
hist = hist_point.drop_vars(["wv_count", "count_bins_exceeded"], errors="ignore")
return hist
def _finalize_meso(total_stab_histogram, wv_count, landmask=None, percentile=0.5):
"""Finalize cumulative mesoscale climate
Computes conditional mean mesoscale climate from cumulative
histogram with mesoscale climate variables. The mean
and standard deviation of heat flux are calculated based on
the values occuring at the 50 percent highest wind speeds
(percentile=0.5). These fields are calculated seperately
over land and sea.
Parameters
----------
total_stab_histogram: xr.Dataset
Dataset with the sum of mesoscale variables
wrf: xr.Dataset
Dataset (e.g. `wrfout`) with the WRF variables
`LANDMASK`, `HGT`, `LU_INDEX`, `COSALPHA`,
`SINALPHA`
Returns
-------
ds: xr.Dataset
Dataset with mean or standard deviation of the
histograms
"""
hist = _stab_mean_std(total_stab_histogram, wv_count, percentile=percentile)
if landmask is not None:
hist = _nearest_landsea(hist, landmask)
update_var_attrs(hist, _MEAN_STAB_ATTRS)
return hist
def _nearest_landsea(ds, landmask):
"""Calculate mean values over sea and land
To use stability setting over land and sea seperately
we calculate two fields from one: the land and sea fields
are generated using Scipy's nearest neighbour interpolation
Parameters
----------
ds: xr.Dataset
Dataset with the variables given in `ori_vars`
Returns
-------
ds: xr.Dataset
Dataset with the variables given in `new_vars`
"""
ori_vars = [
"mean_temp_scale",
"rms_temp_scale",
"mean_pblh_scale",
"mean_temp_scale",
"rms_temp_scale",
"mean_pblh_scale",
]
new_vars = [
"mean_temp_scale_land",
"rms_temp_scale_land",
"mean_pblh_scale_land",
"mean_temp_scale_sea",
"rms_temp_scale_sea",
"mean_pblh_scale_sea",
]
for var, ori_var in zip(new_vars, ori_vars):
list_secs = []
for sec in ds.sector.values:
# era5 has a fraction for the landmask, if the ratio
# land sea is larger than 0.5 we consider it land
if "_land" in var:
landflag = 1
else:
landflag = 0
tarr = (
ds[ori_var]
.squeeze()
.sel(sector=sec)
.where(np.round(landmask) == landflag)
)
array = np.ma.masked_invalid(tarr)
xx, yy = np.meshgrid(ds.west_east.values, ds.south_north.values)
x1 = xx[~array.mask]
y1 = yy[~array.mask]
newarr = array[~array.mask]
GD1 = interpolate.griddata(
(x1, y1), newarr.ravel(), (xx, yy), method="nearest"
)
list_secs.append(GD1)
ds[var] = (("sector", "south_north", "west_east"), list_secs)
return ds[new_vars]
def _init_histogram_baro(ds, ws_bin_width=1, nws=40, nwd=12):
"""Initialize histogram
Initialize histogram with the variables needed for
a stability histogram. The dimensions of the spatial
structure are the same as the input ds and extended with
dimensions nws, nwd.
Parameters
----------
ds: xr.Dataset
Can have any of the pywasp spatial data structures
var_dic: dict
Dictonairy contain the variables that will be added to
the xarray container, including meta-data according to
the CF conventions
wsbin: float
width of wind speed bins
wdsec: float
width of wind direction bins
nws: int
Number of wind speed bins
nwd: int
Number of wind direction bins
Returns
-------
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
"""
bin_centers_ws = create_ws_bin_coords(bin_width=ws_bin_width, nws=nws)
bin_centers_wd = create_sector_coords(nwd)
hist = spatial_stack(ds, copy=False)[["point"]]
stabdim = hist.sizes["point"]
sum_dugdz = np.zeros((nws, nwd, stabdim), order="F", dtype=np.float32)
sum_dvgdz = np.zeros((nws, nwd, stabdim), order="F", dtype=np.float32)
count_bins_exceeded = np.zeros(stabdim, order="F", dtype=np.int32)
hist = hist.assign_coords({**bin_centers_wd.coords, **bin_centers_ws.coords})
var_names = (
"sum_dugdz",
"sum_dvgdz",
"count_bins_exceeded",
)
var_dims = (("wsbin", "sector", "point"),) * 2 + ("point",)
for name, dims in zip(var_names, var_dims):
hist[name] = (dims, eval(name), _HIS_BARO_ATTRS[name])
return hist
def _finalize_baro(hist, percentile=0.5):
"""Compute mean or standard deviation from histogram
Compute mean or standard deviation from histogram. The
variables are binned by sector and wind speed bin and
a mean or std. dev. is calulcated based on the highest
percentile of wind speeds for each sector.
Parameters
----------
hist: xr.Dataset
Dataset with variables `tstarsum`, `tstar2sum`
and `pblhsum` and dimensions `wsbin`, `sector`
Returns
-------
clim: xr.Dataset
Dataset with mean mesoclimate variables per sector
"""
hist = spatial_stack(hist, copy=False)
cond_hist, freq = _COND_HIST(hist["sum_dugdz"], hist["wv_count"], percentile)
mean_dugdz = _MEAN_HIST(cond_hist, freq)
cond_hist, freq = _COND_HIST(hist["sum_dvgdz"], hist["wv_count"], percentile)
mean_dvgdz = _MEAN_HIST(cond_hist, freq)
clim = hist[["point", "sector"]]
clim["mean_dgdz_dir_north"] = (
("sector", "point"),
np.arctan2(-mean_dvgdz, -mean_dugdz) * RTD,
)
clim["mean_dgdz_dir_south"] = (
("sector", "point"),
np.arctan2(mean_dvgdz, mean_dugdz) * RTD,
)
clim["mean_dgdz_dir"] = xr.where(
clim["south_north"] < 0,
clim["mean_dgdz_dir_south"],
clim["mean_dgdz_dir_north"],
)
clim["mean_dgdz"] = (
("sector", "point"),
np.sqrt(mean_dugdz**2 + mean_dvgdz**2),
)
out = spatial_unstack(
clim.drop_vars(["mean_dgdz_dir_north", "mean_dgdz_dir_south"])
)
update_var_attrs(out, _MEAN_BARO_ATTRS)
return out
def baroclinicity_histogram(
ds, hist=None, finalize=True, revert_to_original=True, **kw
):
"""Add timeseries to existing histogram
Uses the baroclinicity parameters to derive
summed histogram of the geostrophic shear components.
.. warning::
This function is experimental and its signature may change.
Parameters
----------
ds: xr.Dataset
Dataset containing variables
['wind_speed', 'wind_direction', 'PSFC', 'T2', 'UST',
'HFX', 'LH', 'PBLH', 'LANDMASK', 'ZNT']
Can have any pywasp spatial structure
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
finalize: boolean
Convert the stabilility histogram into a mean and
standard deviation?
revert_to_original: boolean
Return the histogram input in original format from the
input `ds`? If false, keeps the data in stacked point
format which is more efficient when doing large
calculations for numerical wind atlases.
**kwargs
Arbitrary keyword arguments passed on to
initialization of histogram
Returns
-------
hist: xr.Dataset
Histogram with dimensions point, wsbin, sector
with the values from ds-timeseries added.
"""
if hist is None:
hist = _init_histogram_baro(ds, **kw)
ds_sub = ds[[WS, WD, "dugdz", "dvgdz"]]
dss = spatial_stack(ds_sub, copy=False).transpose(..., DIM_TIME, "point")
add = np.full(dss[WS].shape, 1.0, order="F", dtype=np.float32)
hist = spatial_stack(hist, copy=False).transpose(..., "wsbin", "sector", "point")
# error arrays are inout in fortran, so we must create copies
# to not add the error counter 2 times
ierr_sum_dvgdz = hist["count_bins_exceeded"].copy().values
sum_dugdz, count_bins_exceeded = _create_hist(
dss[WS],
dss[WD],
0,
hist.wsbin,
hist.sector,
dss["dugdz"],
hist["sum_dugdz"].values,
hist["count_bins_exceeded"].values,
)
sum_dvgdz, count_bins_exceeded = _create_hist(
dss[WS],
dss[WD],
0,
hist.wsbin,
hist.sector,
dss["dvgdz"],
hist["sum_dvgdz"].values,
ierr_sum_dvgdz,
)
var_names = (
"sum_dugdz",
"sum_dvgdz",
"count_bins_exceeded",
)
for name in var_names:
hist[name] = (hist[name].dims, eval(name))
update_var_attrs(hist[name], _HIS_BARO_ATTRS)
hist[name].attrs["_pwio_data_is_2d"] = False
dic_attrs = create_time_attributes(ds_sub, hist=hist)
hist = hist.assign_attrs(dic_attrs)
if finalize:
return _finalize_baro(hist)
if revert_to_original:
return spatial_unstack(hist)
else:
return hist
[docs]
@bwc_validate_wrapper
def bwc_resample_like(source, target):
"""
Resamples a histogram to different sector or wind speed bin structure.
Parameters
----------
source: xarray.Dataset
Binned wind climate xr.Dataset object that we want to resample
target: xarray.Dataset
Binned wind climate xr.Dataset object structure that we want to resample to
Returns
-------
xarray.Dataset
Binned Wind Climate
"""
result = bwc_resample_sectors(source, target.sizes["sector"])
result = bwc_resample_wsbins_like(result, target)
return result
[docs]
@bwc_validate_wrapper
def bwc_resample_sectors(source, n_sectors=12):
"""
Resamples a histogram to different sector structure.
Parameters
----------
source: xarray.Dataset
Binned wind climate xr.Dataset object that we want to resample
target: xarray.Dataset
Binned wind climate xr.Dataset object structure that we want to resample to
Returns
-------
xarray.Dataset
Binned Wind Climate
TODO: make sure we can also resample binned wind climates that have their first bin
not centered at the north. This can be easily achieved by using the doff argument
in the fortran routine _RESAMPLE_HIST, which will rotate the histogram with a specified
angle.
"""
if source.sizes["sector"] == n_sectors:
return source
source = spatial_stack(source)
def _resample(wv_freq, n_sectors):
return _RESAMPLE_HIST(wv_freq, 0.0, n_sectors)
wv_freq = xr.apply_ufunc(
_resample,
(source.wsfreq * source.wdfreq).transpose(..., "wsbin", "sector"),
input_core_dims=[["wsbin", "sector"]],
output_core_dims=[["wsbin", "sector"]],
exclude_dims={"sector"},
dask="allowed",
vectorize=True,
kwargs={"n_sectors": n_sectors},
keep_attrs=True,
)
wdcenters = create_sector_coords(n_sectors)
wv_freq = wv_freq.assign_coords({**wdcenters.coords})
source = bwc_from_counts(wv_freq)
# Update metadata
source.attrs["title"] = "Binned wind climate"
res = spatial_unstack(source)
return update_history(res)
[docs]
@bwc_validate_wrapper
def bwc_resample_wsbins_like(source, target, fit_weibull=True, errors="raise"):
"""
Resamples a histogram to different wind speed bin structure.
Parameters
----------
source: xarray.Dataset
Binned wind climate xr.Dataset object that we want to resample
target: xarray.Dataset
Binned wind climate xr.Dataset object structure that we want to resample to
fit_weibull: boolean, optional
Default True, when resampling the histogram find the sectorwise A and k and
use these to resample the histogram. If false, use the A and k provided in
the dataset instead.
errors: {'raise', 'ignore'}, optional
If 'raise' (default), raises a ValueError error if probability mass
is transfered beyond the highest wind speed bin.
If "ignore", all points including those that lose probability mass
are returned.
Returns
-------
xarray.Dataset
Binned Wind Climate
"""
# we reset coordinates with drop=True to make sure there is no user added coordinates that cause the dataarrays to be different
if (
not source["sector"]
.reset_coords(drop=True)
.equals(target["sector"].reset_coords(drop=True))
):
raise ValueError(
"Input and target histogram must have the same sector structure!"
)
# source and target wsbin structure are the same, so no resampling needed
same_wsbins = (
source["wsbin"]
.reset_coords(drop=True)
.equals(target["wsbin"].reset_coords(drop=True))
)
if same_wsbins and fit_weibull:
return source
source = spatial_stack(source).transpose(..., "wsbin", "sector")
target = spatial_stack(target).transpose(..., "wsbin", "sector")
if not fit_weibull:
if not all(v in target.data_vars for v in ["A", "k"]):
raise ValueError(
"Variables A and k are needed for resampling when fit_weibull is set to False"
)
if fit_weibull:
wsfreq, ierror = xr.apply_ufunc(
_RESAMPLE_HIST_WSBINS,
source.wsfreq,
source.wsceil,
target.wsceil,
target.wsfloor,
input_core_dims=[["wsbin", "sector"], *[["wsbin"]] * 3],
output_core_dims=[["wsbin", "sector"], ["sector"]],
exclude_dims={"wsbin"},
dask="allowed",
vectorize=True,
keep_attrs=True,
)
else:
wsfreq, ierror = xr.apply_ufunc(
_RESAMPLE_HIST_WSBINS_AK,
source.wsfreq,
source.wsceil,
target.A,
target.k,
target.wsceil,
target.wsfloor,
input_core_dims=[
["wsbin", "sector"],
["wsbin"],
*[["sector"]] * 2,
*[["wsbin"]] * 2,
],
output_core_dims=[["wsbin", "sector"], ["sector"]],
exclude_dims={"wsbin"},
dask="allowed",
vectorize=True,
keep_attrs=True,
)
if ierror.sum() > 0 and errors == "raise":
i_points = [i for i, v in enumerate(ierror) if v > 0]
str_points = ",".join(map(str, i_points))
raise ValueError(
f"Resampling of the wsbins in {len(i_points)} out of {source.sizes['point']} points (i={str_points}) exceeded the number of target bins!"
)
result, _ = xr.broadcast(source[["point", "sector"]], target[["wsbin"]])
result = result.drop_vars("point", errors="ignore")
result["wsfreq"] = wsfreq
result["wdfreq"] = source["wdfreq"]
result.coords["wsceil"] = target["wsceil"].reset_coords(drop=True)
result.coords["wsfloor"] = target["wsfloor"].reset_coords(drop=True)
res = spatial_unstack(result)
return update_history(res)