Source code for windkit.generalized_wind_climate

# (c) 2022 DTU Wind Energy
"""
Generalized wind climate module

This module contains the various functions for working with generalized wind climates.

Currently this only supports creating gwc datasets from .lib files or from
NetCDF files. In the future we will also support the ability to read in .gwc
files.
"""

import re
import warnings
from pathlib import Path

import numpy as np
import xarray as xr
from lxml import etree

from ._errors import WindClimateValidationError
from ._rvea_xml import (
    _parse_rvea_anemometer_site_details,
    _parse_rvea_generalised_mean_wind_climate,
)
from ._validate import create_validator
from .metadata import (
    _WEIB_ATTRS,
    ALL_VARS_META,
    create_coords,
    update_history,
    update_var_attrs,
)
from .sector import create_sector_coords
from .spatial import create_dataset, crs_are_equal, reproject, to_point

_GEN_COORDS_META = {
    "gen_height": ALL_VARS_META["gen_height"],
    "gen_roughness": ALL_VARS_META["gen_roughness"],
}

DATA_VAR_DICT_GWC = {
    "A": ["sector", "gen_height", "gen_roughness"],
    "wdfreq": ["sector", "gen_height", "gen_roughness"],
    "k": ["sector", "gen_height", "gen_roughness"],
}

REQ_DIMS_GWC = ["sector", "gen_height", "gen_roughness"]

REQ_COORDS_GWC = [
    "south_north",
    "west_east",
    "gen_height",
    "gen_roughness",
    "sector_ceil",
    "sector_floor",
    "sector",
    "crs",
]


def _validate_height_and_roughness(genwc):
    """Helper function to validate generalized_height and generalized_roughness"""
    response_list = []

    if "gen_height" in genwc.dims:
        if genwc.sizes["gen_height"] < 2:
            response_list.append("gen_height needs at least 2 entries")

    if "gen_roughness" in genwc.dims:
        if genwc.sizes["gen_roughness"] < 2:
            response_list.append("gen_roughness needs at least 2 entries")
        if genwc["gen_roughness"].min() != 0.0:
            response_list.append("The first entry of gen_roughness must be 0.0")

    return response_list


def _validate_greater_than_zero(genwc):
    """Helper function to validate generalized heights and roughness are positive"""
    response_list = []

    if "gen_roughness" in genwc.dims and any(
        xr.where(genwc.gen_roughness.values < 0.0, True, False)
    ):
        response_list.append("'gen_roughness' has negative values")

    if "gen_height" in genwc.dims and any(
        xr.where(genwc.gen_height.values < 0.0, True, False)
    ):
        response_list.append("'gen_height' has negative values")

    return response_list


gwc_validate, gwc_validate_wrapper = create_validator(
    DATA_VAR_DICT_GWC,
    REQ_DIMS_GWC,
    REQ_COORDS_GWC,
    checks_iterator=[_validate_height_and_roughness, _validate_greater_than_zero],
)


def _is_gwc(wco):
    """Check if this is a generalized wind climate

    Returns true if gwc and false if not

    Parameters
    ----------
    wco: xarray.Dataset
        Wind Climate Object

    Returns
    -------
    Bool
        Returns true if gwc and false if not
    """
    try:
        gwc_validate(wco)
        return True
    except WindClimateValidationError:
        return False


[docs] @gwc_validate_wrapper def lib_string(gwc, /, gen_height=None): """Generates string representation of gwc dataset. Parameters ---------- gwc: xarray.Dataset Dataset containing A, k, and wdfreq. Dimensions should be ('gen_height', 'gen_roughness', 'sector') gen_height: float Value of gen_height to use for saving to libfile. Since libfiles only allow a single set of wdfreq values, when your data has varying wdfreq values, you need to set this value. It is selected using the .sel selector from xarray. Returns ------- str String representation of gwc dataset. """ def _fmt_floats(dat, prec=3, extra=False): """ Format a list of floats into a common format Parameters ---------- dat: list List of floats to be formatted prec: int Precision of output string Default set to 3 extra: bool Extra space between characters Default set to False (i.e., no extra space) Returns ------- str String containing space separated floats """ sep = " " if extra else "" fmt = "{0:9.%df}" % prec return sep.join([fmt.format(i) for i in dat]) def _to_string(node): """Generates string representation of gwc dataset Parameters ---------- node : xarray.Dataset Dataset containing A, k, and wdfreq. Dimesions should be ('height', 'roughness', 'sector') Returns ------- str String representation of xarray dataset """ if "height" not in node.coords: node = node.expand_dims({"height": [0.0]}).isel(height=0) nrough = node.sizes["gen_roughness"] nhgt = node.sizes["gen_height"] nsec = node.sizes["sector"] node = reproject(node, 4326).squeeze() # Extract numpy arrays to speed up the processing A = node.A.values k = node.k.values wdfreq = node.wdfreq.values string = "" newline = "\n" height = node.height # Write the header header_without_coord = re.sub( "<coordinates>(.*)</coordinates>", "", node.attrs["wasp_header"] ) string += ( header_without_coord + f"<coordinates>{float(node.west_east)},{float(node.south_north)},{float(height)}</coordinates>{newline}" ) # Write the dimensions nz0,nz,nsec string += " ".join([str(i) for i in (nrough, nhgt, nsec)]) + newline # Write the roughness classes string += _fmt_floats(node.gen_roughness.values, 3, True) + newline # Write the heights string += _fmt_floats(node.gen_height.values, 1, True) + newline # Write the data arrays for i in range(nrough): # sectorwise frequency in percent string += _fmt_floats(100.0 * wdfreq[i, ...], 2) + newline for j in range(nhgt): # Sectorwise A's string += _fmt_floats(A[j, i, ...], 2) + newline # Sectorwise k's string += _fmt_floats(k[j, i, ...]) + newline return string dims_order = ["gen_height", "gen_roughness", "sector"] gwc = gwc.copy() gwc = gwc.transpose(*dims_order, ...) # if wdfreq varues with height we require users to choose the height if _wdfreq_constant_with_gen_height(gwc): gwc["wdfreq"] = gwc["wdfreq"].isel(gen_height=0) else: if gen_height is None: raise ValueError( "Lib files do not support different 'wdfreq' values for different heights. Please specify the 'gen_height' for the 'wdfreq' values that you wish to use for all levels." ) else: gwc["wdfreq"] = gwc["wdfreq"].sel(gen_height=gen_height, method="nearest") if gwc.squeeze().A.ndim == 3: return _to_string(gwc.squeeze()) dims_extra = [d for d in gwc.A.dims if d not in dims_order] stacked = gwc.stack(point=dims_extra).transpose(*dims_order, "point") # Get numbers of sectors, roughness classes and strings = [] for ipt in range(stacked.sizes["point"]): # Need the slice to keep the point dimension as multi-index node = stacked.isel(point=slice(ipt, ipt + 1)).reset_index("point").squeeze() strings.append(_to_string(node)) return strings
def _wdfreq_constant_with_gen_height(gwc): """Check if 'wdfreq' is constant with 'gen_height' dimension. Parameters ---------- gwc : xarray.Dataset Generalized wind climate dataset. Returns ------- bool True if 'wdfreq' is constant with 'gen_height' dimension across whole dataset. """ return np.isclose(gwc["wdfreq"].diff(dim="gen_height").max(), 0.0).all()
[docs] @gwc_validate_wrapper def to_libfile(gwc, /, path=None, gen_height=None): """Creates lib-style ascii file from gwc dataset Parameters ---------- gwc : xarray.Dataset Generalized wind climate dataset. path : str dir or file path for storing lib file Default value set to the current working directory. gen_height: float Value of gen_height to use for saving to libfile. Since libfiles only allow a single set of wdfreq values, when your data has varying wdfreq values, you need to set this value. It is selected using the .sel selector from xarray. """ gwc = gwc.copy() def _write(node, fpath): # Set newline explicitly in string not in open. with open(fpath, "w", newline="\r\n") as fobj: fobj.write(lib_string(node, gen_height=gen_height)) def _fmt_single_point_filename(ds): single_point_coords = ["height", "south_north", "west_east"] vals = [] for coord in single_point_coords: vals.append(ds[coord].values.flatten()[0]) filename = f"gwc_height{vals[0]}_south_north{vals[1]}_west_east{vals[2]}.lib" return filename if path is None: path = Path.cwd() path = Path(path) if path.suffix == "": # it is a directory path.mkdir(parents=True, exist_ok=True) if gwc.squeeze().A.ndim == 3: if path.is_dir(): # fpath = path / "gwc.lib" fpath = path / _fmt_single_point_filename(gwc) else: path.parent.mkdir(parents=True, exist_ok=True) fpath = path _write(gwc.squeeze(), fpath) return # If dataset has extra dimensions (of size > 1): # Stack extra dimensions, loop over them, and write to tab files # Using file_name that contains coordinate information. dims_extra = [ d for d in gwc.A.dims if d not in ["gen_height", "gen_roughness", "sector"] ] stacked = gwc.stack(point=dims_extra) # Create file_name format string if Path(path).is_dir(): file_name_fmt = ( "_".join(["gwc"] + [f"{d}" + "{" + f"{d}" + "}" for d in dims_extra]) + ".lib" ) else: raise ValueError( "'path' argument is a filename, but the dataset has more than one point." " Try giving a directory as an argument." ) # Loop and write to tab files for ipt in range(stacked.sizes["point"]): node = stacked.isel(point=slice(ipt, ipt + 1)).reset_index("point").squeeze() kwds = {d: node[d].values for d in dims_extra} fpath = path / file_name_fmt.format(**kwds) _write(node, fpath) return
[docs] def read_gwc( file_or_obj, crs=None, west_east=None, south_north=None, height=None, header=None ): """Create gwc xarray.Dataset from file. Parameters ---------- file : str or Path Path to a file that can be opened a gwc. This includes .lib, .gwc, and .nc files that were created as gwc files. The script will use the file extension to determine the file type and then parse it into a gwc object. crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Defaults to 4326 (Lat-Lon on the WGS84 geodetic datum). for .lib and .gwc. west_east : float or None, optional West-East coordinate of the GWC grid, by default None, which will attempt to read from the file. south_north : float or None, optional South-North coordinate of the GWC grid, by default None, which will attempt to read from the file. height : float or None, optional Height coordinate of the GWC grid, by default None, which will attempt to read from the file. header : str or None, optional Header string to use for .lib and .gwc files, by default None, which will attempt to read from the file. Returns ------- xarray.Dataset Generalized wind climate dataset. Raises ------ ValueError If the file extension is not recognized. ValueError If the requested crs does not match the dataset crs. """ file_or_obj = Path(file_or_obj) ext = file_or_obj.suffix if ext == ".lib": ds = _read_lib_file( file_or_obj, crs=crs, west_east=west_east, south_north=south_north, height=height, header=header, ) elif ext in [".gwc"]: ds = _read_gwc_file( file_or_obj, crs=crs, west_east=west_east, south_north=south_north, height=height, header=header, ) elif ext in [".nc"]: ds = xr.open_dataset(file_or_obj) if crs is not None and not crs_are_equal(ds, crs): raise ValueError(f"Requested crs does not match dataset crs") else: raise ValueError( f"Unable to detect type of gwc file {file_or_obj} with extension {ext}." ) gwc_validate(ds) # Validate for all file types ds = update_var_attrs(ds, _WEIB_ATTRS) return update_history(ds)
def _read_lib_file( lib_file, crs=None, west_east=None, south_north=None, height=None, header=None ): r""" Create GenWindClimate object from WAsP .lib file Parameters ---------- lib_file : str, pathlib.Path Path to lib file crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Defaults to 4326 (Lat-Lon on the WGS84 geodetic datum) west_east : float or None, optional West-East coordinate of the GWC grid, by default None, which will attempt to read from the file. south_north : float or None, optional South-North coordinate of the GWC grid, by default None, which will attempt to read from the file. height : float or None, optional Height coordinate of the GWC grid, by default None, which will attempt to read from the file. header : str or None, optional Header string to use for .lib and .gwc files, by default None, which will attempt to read from the file. Returns ------- xr.DataSet xarray DataSet that is formatted to match the gwc description Raises ------ ValueError If coordinates are not present in the file and are not provided as arguments. Notes ----- Automatically adds lat, lon coords if present inside <coordinates>lon,lat,height<\coordinates> brackets """ def _read_float_(f): """Reads a line of space separated data and splits it into floats Parameters ---------- f : file Object with method readline Returns ------- list List of floats """ return [np.float32(i) for i in f.readline().strip().split()] def _read_int_(f): """Reads a line of space-separated data and splits it into integers Parameters ---------- f : file Object with method readline Returns ------- list List of integers """ return [np.int32(i) for i in f.readline().strip().split()] # Open libfile with open(lib_file) as f: # Read header information one line at a time desc = f.readline().strip() # File Description nrough, nhgt, nsec = _read_int_(f) # dimensions gen_roughness = _read_float_(f) # Roughness classes gen_height = _read_float_(f) # heights # Initialize arrays wdfreq = np.zeros([nsec, nrough], dtype="f4", order="F") k = np.zeros([nsec, nhgt, nrough], dtype="f4", order="F") A = np.zeros([nsec, nhgt, nrough], dtype="f4") ################################################################## # The remainder of the file is made up of rows with nsec columns. # For each height there is first a frequency row, then pairs of # A & k rows for each height. ################################################################## # Loop over roughness classes to read frequency line for i, dummy in enumerate(gen_roughness): wdfreq[:, i] = _read_float_(f) # Loop over heights to read in all A & k values for j, dummy in enumerate(gen_height): A[:, j, i] = _read_float_(f) k[:, j, i] = _read_float_(f) if crs is None: crs = "EPSG:4326" # Find the coordinates if they aren't provided if all(c is None for c in [west_east, south_north, height]): # Find the coordinates latlons = re.search("<coordinates>(.*)</coordinates>", desc) if latlons: west_east, south_north, height = map( np.float32, latlons.group(1).split(",") ) else: raise ValueError( f"Coordinates not found in {lib_file}, " + "please set 'west_east', 'south_north', and 'height' explicitly. " + "These values should ge in the projection given by the 'crs' argument," + " which defaults to the WGS84 projection." ) if header is None: if len(desc) > 0: header = desc else: header = "" # Add height to wdfreq wdfreq = np.tile(wdfreq[:, np.newaxis, :], (1, nhgt, 1)) return _weibull_to_dataset( wdfreq, A, k, gen_roughness, gen_height, south_north, west_east, height, crs, wasp_header=header, ) def _weibull_to_dataset( wdfreq, A, k, gen_roughness, gen_height, south_north, west_east, height, crs, **kwargs, ): """ Converts parsed xml gwc object to WindKit gwc xarray dataset Parameters ---------- wdfreq : 1-D sequence of floats Wind direction frequencies. Dimensions=[sector, gen_height, gen_roughnness] A : numpy Weibull A parameters. Dimensions=[sector, gen_height, gen_roughnness] k : numpy Weibull k parameters. Dimensions=[sector, gen_height, gen_roughnness] gen_roughness : numpy Array of generalized roughnesses gen_height : numpy Array of generalized heights west_east : float or None, optional West-East coordinate of the GWC grid, by default None, which will attempt to read from the file. south_north : float or None, optional South-North coordinate of the GWC grid, by default None, which will attempt to read from the file. height : float or None, optional Height coordinate of the GWC grid, by default None, which will attempt to read from the file. crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Defaults to 4326 (Lat-Lon on the WGS84 geodetic datum) kwargs : dict, optional Other key-word arguments are added as attributes to the dataset. Returns ------- xarray.Dataset WindKit GWC dataset """ nsec, _, _ = wdfreq.shape na = np.newaxis # Create dataset ds = create_dataset(west_east, south_north, height, crs).drop_vars("output") ds.attrs = kwargs # Add variables dims = ("sector", "gen_height", "gen_roughness", "point") ds["A"] = (dims, A[..., na]) ds["k"] = (dims, k[..., na]) ds["wdfreq"] = (dims, wdfreq[..., na]) # Add coordinates ds = ds.assign_coords( { **create_coords(gen_height, "gen_height", _GEN_COORDS_META).coords, **create_coords(gen_roughness, "gen_roughness", _GEN_COORDS_META).coords, **create_sector_coords(nsec).coords, } ) ds["wdfreq"] = ds["wdfreq"] / ds["wdfreq"].sum(dim="sector") return ds.transpose("gen_height", "gen_roughness", "sector", "point") def _read_gwc_file( gwc_file, crs=None, west_east=None, south_north=None, height=None, header=None, ): """Read Generalized Wind Climate from XML-based .gwc file. Parameters ---------- gwc_file : str, pathlib.Path input file crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Defaults to 4326 (Lat-Lon on the WGS84 geodetic datum) west_east : float or None, optional West-East coordinate of the GWC grid, by default None, which will attempt to read from the file. south_north : float or None, optional South-North coordinate of the GWC grid, by default None, which will attempt to read from the file. height : float or None, optional Height coordinate of the GWC grid, by default None, which will attempt to read from the file. header : str or None, optional Header string to use for .lib and .gwc files, by default None, which will attempt to read from the file. Returns ------- xarray.Dataset WindKit GWC dataset Raises ------ ValueError If no RveaGeneralisedMeanWindClimate is found in the file. ValueError If coordinates are not found in the file and are not provided explicitly. Warns ----- UserWarning If 'height' is not found in the file and are not provided explicitly. It will be set to 0.0 """ if crs is None: crs = "EPSG:4326" tree = etree.parse(str(gwc_file)) root = tree.getroot() descendants = list(e.tag for e in root.getiterator()) if root.tag == "RveaGeneralisedMeanWindClimate": gwc_tree = root elif "RveaGeneralisedMeanWindClimate" in descendants: gwc_tree = tree.find(".//RveaGeneralisedMeanWindClimate") else: raise ValueError(f"No RveaGeneralisedMeanWindClimate found in '{gwc_file}'") gwc_data = _parse_rvea_generalised_mean_wind_climate(gwc_tree) if "RveaAnemometerSiteDetails" in descendants: site_data = _parse_rvea_anemometer_site_details( tree.find(".//RveaAnemometerSiteDetails") ) else: site_data = {} if west_east is None: west_east = gwc_data.get("longitude", site_data.get("longitude", None)) if south_north is None: south_north = gwc_data.get("latitude", site_data.get("latitude", None)) if height is None: height = gwc_data.get("height", site_data.get("height", None)) if header == None: header = gwc_data.get("header", site_data.get("header", "")) if west_east is None or south_north is None: raise ValueError( "'west_east' or 'south_north' coordinate not found in file or provided as argument. " + "Please set 'west_east' and 'south_north' explicitly." ) if height is None: height = 0.0 warnings.warn( "No height found in file or provided as argument, using 0 m. Set 'height' explicitly to avoid this warning.", ) return _weibull_to_dataset( wdfreq=gwc_data["wdfreq"], A=gwc_data["A"], k=gwc_data["k"], gen_roughness=gwc_data["gen_roughness"], gen_height=gwc_data["gen_height"], west_east=west_east, south_north=south_north, height=height, crs=crs, wasp_header=header, )
[docs] @gwc_validate_wrapper def reproject_gwc(gwc, /, to_crs): """Reprojects Generalized wind climate dataset. Parameters ---------- gwc: xarray.Dataset Valid GWC dataset. crs : int, dict, str or pyproj.crs.CRS Value to initialize `pyproj.crs.CRS` Returns ------- xarray.Dataset Point based generalized wind climate dataset in new projection. """ if not crs_are_equal(gwc, to_crs): return reproject(gwc, to_crs) # Return point based dataset even if not reprojected ds = to_point(gwc) return update_history(ds)