Source code for windkit.generalized_wind_climate

# (c) 2022 DTU Wind Energy
"""
Generalized wind climate module

This module contains the various functions for working with generalized wind climates.

Currently this only supports creating gwc datasets from .lib files or from
NetCDF files. In the future we will also support the ability to read in .gwc
files.
"""

import re
import warnings
from pathlib import Path

import numpy as np
import xarray as xr
from lxml import etree

from ._errors import WindClimateValidationError
from ._rvea_xml import (
    _parse_rvea_anemometer_site_details,
    _parse_rvea_generalised_mean_wind_climate,
)
from ._validate import create_validator
from .metadata import (
    _WEIB_ATTRS,
    ALL_VARS_META,
    create_coords,
    update_history,
    update_var_attrs,
)
from .sector import create_sector_coords
from .spatial import create_dataset, crs_are_equal, reproject, to_point

_GEN_COORDS_META = {
    "gen_height": ALL_VARS_META["gen_height"],
    "gen_roughness": ALL_VARS_META["gen_roughness"],
}

DATA_VAR_DICT_GWC = {
    "A": ["sector", "gen_height", "gen_roughness"],
    "wdfreq": ["sector", "gen_height", "gen_roughness"],
    "k": ["sector", "gen_height", "gen_roughness"],
}

REQ_DIMS_GWC = ["sector", "gen_height", "gen_roughness"]

REQ_COORDS_GWC = [
    "south_north",
    "west_east",
    "gen_height",
    "gen_roughness",
    "sector_ceil",
    "sector_floor",
    "sector",
    "crs",
]


def _validate_height_and_roughness(genwc):
    """Helper function to validate generalized_height and generalized_roughness"""
    response_list = []

    if "gen_height" in genwc.dims:
        if genwc.sizes["gen_height"] < 2:
            response_list.append("gen_height needs at least 2 entries")

    if "gen_roughness" in genwc.dims:
        if genwc.sizes["gen_roughness"] < 2:
            response_list.append("gen_roughness needs at least 2 entries")
        if genwc["gen_roughness"].min() != 0.0:
            response_list.append("The first entry of gen_roughness must be 0.0")

    return response_list


def _validate_greater_than_zero(genwc):
    """Helper function to validate generalized heights and roughness are positive"""
    response_list = []

    if "gen_roughness" in genwc.dims and any(
        xr.where(genwc.gen_roughness.values < 0.0, True, False)
    ):
        response_list.append("'gen_roughness' has negative values")

    if "gen_height" in genwc.dims and any(
        xr.where(genwc.gen_height.values < 0.0, True, False)
    ):
        response_list.append("'gen_height' has negative values")

    return response_list


gwc_validate, gwc_validate_wrapper = create_validator(
    DATA_VAR_DICT_GWC,
    REQ_DIMS_GWC,
    REQ_COORDS_GWC,
    checks_iterator=[_validate_height_and_roughness, _validate_greater_than_zero],
)


def _is_gwc(wco):
    """Check if this is a generalized wind climate

    Returns true if gwc and false if not

    Parameters
    ----------
    wco: xarray.Dataset
        Wind Climate Object

    Returns
    -------
    Bool
        Returns true if gwc and false if not
    """
    try:
        gwc_validate(wco)
        return True
    except WindClimateValidationError:
        return False



[docs]
@gwc_validate_wrapper
def lib_string(gwc, /, gen_height=None):
    """Generates string representation of gwc dataset.

    Parameters
    ----------
    gwc: xarray.Dataset
        Dataset containing A, k, and wdfreq.
        Dimensions should be ('gen_height', 'gen_roughness', 'sector')
    gen_height: float
        Value of gen_height to use for saving to libfile. Since libfiles only allow a
        single set of wdfreq values, when your data has varying wdfreq values, you need
        to set this value. It is selected using the .sel selector from xarray.

    Returns
    -------
    str
        String representation of gwc dataset.

    """

    def _fmt_floats(dat, prec=3, extra=False):
        """
        Format a list of floats into a common format

        Parameters
        ----------
        dat: list
            List of floats to be formatted
        prec: int
            Precision of output string
            Default set to 3
        extra: bool
            Extra space between characters
            Default set to False (i.e., no extra space)

        Returns
        -------
        str
            String containing space separated floats
        """

        sep = " " if extra else ""
        fmt = "{0:9.%df}" % prec
        return sep.join([fmt.format(i) for i in dat])

    def _to_string(node):
        """Generates string representation of gwc dataset

        Parameters
        ----------
        node : xarray.Dataset
            Dataset containing A, k, and wdfreq.
            Dimesions should be ('height', 'roughness', 'sector')

        Returns
        -------
        str
            String representation of xarray dataset
        """
        if "height" not in node.coords:
            node = node.expand_dims({"height": [0.0]}).isel(height=0)
        nrough = node.sizes["gen_roughness"]
        nhgt = node.sizes["gen_height"]
        nsec = node.sizes["sector"]
        node = reproject(node, 4326).squeeze()
        # Extract numpy arrays to speed up the processing
        A = node.A.values
        k = node.k.values
        wdfreq = node.wdfreq.values
        string = ""
        newline = "\n"

        height = node.height

        # Write the header
        header_without_coord = re.sub(
            "<coordinates>(.*)</coordinates>", "", node.attrs["wasp_header"]
        )
        string += (
            header_without_coord
            + f"<coordinates>{float(node.west_east)},{float(node.south_north)},{float(height)}</coordinates>{newline}"
        )

        # Write the dimensions nz0,nz,nsec
        string += " ".join([str(i) for i in (nrough, nhgt, nsec)]) + newline

        # Write the roughness classes
        string += _fmt_floats(node.gen_roughness.values, 3, True) + newline

        # Write the heights
        string += _fmt_floats(node.gen_height.values, 1, True) + newline

        # Write the data arrays
        for i in range(nrough):
            # sectorwise frequency in percent
            string += _fmt_floats(100.0 * wdfreq[i, ...], 2) + newline
            for j in range(nhgt):
                # Sectorwise A's
                string += _fmt_floats(A[j, i, ...], 2) + newline
                # Sectorwise k's
                string += _fmt_floats(k[j, i, ...]) + newline

        return string

    dims_order = ["gen_height", "gen_roughness", "sector"]

    gwc = gwc.copy()
    gwc = gwc.transpose(*dims_order, ...)

    # if wdfreq varues with height we require users to choose the height
    if _wdfreq_constant_with_gen_height(gwc):
        gwc["wdfreq"] = gwc["wdfreq"].isel(gen_height=0)
    else:
        if gen_height is None:
            raise ValueError(
                "Lib files do not support different 'wdfreq' values for different heights. Please specify the 'gen_height' for the 'wdfreq' values that you wish to use for all levels."
            )
        else:
            gwc["wdfreq"] = gwc["wdfreq"].sel(gen_height=gen_height, method="nearest")

    if gwc.squeeze().A.ndim == 3:
        return _to_string(gwc.squeeze())

    dims_extra = [d for d in gwc.A.dims if d not in dims_order]
    stacked = gwc.stack(point=dims_extra).transpose(*dims_order, "point")
    # Get numbers of sectors, roughness classes and
    strings = []
    for ipt in range(stacked.sizes["point"]):
        # Need the slice to keep the point dimension as multi-index
        node = stacked.isel(point=slice(ipt, ipt + 1)).reset_index("point").squeeze()
        strings.append(_to_string(node))

    return strings



def _wdfreq_constant_with_gen_height(gwc):
    """Check if 'wdfreq' is constant with 'gen_height' dimension.

    Parameters
    ----------
    gwc : xarray.Dataset
        Generalized wind climate dataset.

    Returns
    -------
    bool
        True if 'wdfreq' is constant with 'gen_height' dimension across whole dataset.
    """
    return np.isclose(gwc["wdfreq"].diff(dim="gen_height").max(), 0.0).all()



[docs]
@gwc_validate_wrapper
def to_libfile(gwc, /, path=None, gen_height=None):
    """Creates lib-style ascii file from gwc dataset

    Parameters
    ----------
    gwc : xarray.Dataset
        Generalized wind climate dataset.
    path : str
        dir or file path for storing lib file
        Default value set to the current working directory.
    gen_height: float
        Value of gen_height to use for saving to libfile. Since libfiles only allow a
        single set of wdfreq values, when your data has varying wdfreq values, you need
        to set this value. It is selected using the .sel selector from xarray.

    """

    gwc = gwc.copy()

    def _write(node, fpath):
        # Set newline explicitly in string not in open.
        with open(fpath, "w", newline="\r\n") as fobj:
            fobj.write(lib_string(node, gen_height=gen_height))

    def _fmt_single_point_filename(ds):
        single_point_coords = ["height", "south_north", "west_east"]
        vals = []
        for coord in single_point_coords:
            vals.append(ds[coord].values.flatten()[0])
        filename = f"gwc_height{vals[0]}_south_north{vals[1]}_west_east{vals[2]}.lib"
        return filename

    if path is None:
        path = Path.cwd()
    path = Path(path)

    if path.suffix == "":  # it is a directory
        path.mkdir(parents=True, exist_ok=True)

    if gwc.squeeze().A.ndim == 3:
        if path.is_dir():
            # fpath = path / "gwc.lib"
            fpath = path / _fmt_single_point_filename(gwc)
        else:
            path.parent.mkdir(parents=True, exist_ok=True)
            fpath = path
        _write(gwc.squeeze(), fpath)
        return

    # If dataset has extra dimensions (of size > 1):
    # Stack extra dimensions, loop over them, and write to tab files
    # Using file_name that contains coordinate information.
    dims_extra = [
        d for d in gwc.A.dims if d not in ["gen_height", "gen_roughness", "sector"]
    ]
    stacked = gwc.stack(point=dims_extra)

    # Create file_name format string
    if Path(path).is_dir():
        file_name_fmt = (
            "_".join(["gwc"] + [f"{d}" + "{" + f"{d}" + "}" for d in dims_extra])
            + ".lib"
        )
    else:
        raise ValueError(
            "'path' argument is a filename, but the dataset has more than one point."
            " Try giving a directory as an argument."
        )

    # Loop and write to tab files
    for ipt in range(stacked.sizes["point"]):
        node = stacked.isel(point=slice(ipt, ipt + 1)).reset_index("point").squeeze()
        kwds = {d: node[d].values for d in dims_extra}
        fpath = path / file_name_fmt.format(**kwds)
        _write(node, fpath)

    return




[docs]
def read_gwc(
    file_or_obj, crs=None, west_east=None, south_north=None, height=None, header=None
):
    """Create gwc xarray.Dataset from file.

    Parameters
    ----------
    file : str or Path
        Path to a file that can be opened a gwc. This includes .lib, .gwc, and
        .nc files that were created as gwc files. The script will use the file
        extension to determine the file type and then parse it into a gwc object.
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
        Defaults to 4326 (Lat-Lon on the WGS84 geodetic datum). for .lib and .gwc.
    west_east : float or None, optional
        West-East coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    south_north : float or None, optional
        South-North coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    height : float or None, optional
        Height coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    header : str or None, optional
        Header string to use for .lib and .gwc files, by default None, which will
        attempt to read from the file.

    Returns
    -------
    xarray.Dataset
        Generalized wind climate dataset.

    Raises
    ------
    ValueError
        If the file extension is not recognized.
    ValueError
        If the requested crs does not match the dataset crs.

    """
    file_or_obj = Path(file_or_obj)
    ext = file_or_obj.suffix

    if ext == ".lib":
        ds = _read_lib_file(
            file_or_obj,
            crs=crs,
            west_east=west_east,
            south_north=south_north,
            height=height,
            header=header,
        )
    elif ext in [".gwc"]:
        ds = _read_gwc_file(
            file_or_obj,
            crs=crs,
            west_east=west_east,
            south_north=south_north,
            height=height,
            header=header,
        )
    elif ext in [".nc"]:
        ds = xr.open_dataset(file_or_obj)
        if crs is not None and not crs_are_equal(ds, crs):
            raise ValueError(f"Requested crs does not match dataset crs")
    else:
        raise ValueError(
            f"Unable to detect type of gwc file {file_or_obj} with extension {ext}."
        )
    gwc_validate(ds)  # Validate for all file types
    ds = update_var_attrs(ds, _WEIB_ATTRS)
    return update_history(ds)



def _read_lib_file(
    lib_file, crs=None, west_east=None, south_north=None, height=None, header=None
):
    r"""
    Create GenWindClimate object from WAsP .lib file

    Parameters
    ----------
    lib_file : str, pathlib.Path
        Path to lib file
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
        Defaults to 4326 (Lat-Lon on the WGS84 geodetic datum)
    west_east : float or None, optional
        West-East coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    south_north : float or None, optional
        South-North coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    height : float or None, optional
        Height coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    header : str or None, optional
        Header string to use for .lib and .gwc files, by default None, which will
        attempt to read from the file.

    Returns
    -------
    xr.DataSet
        xarray DataSet that is formatted to match the gwc description

    Raises
    ------
    ValueError
        If coordinates are not present in the file and are not provided as
        arguments.

    Notes
    -----
    Automatically adds lat, lon coords if present inside
    <coordinates>lon,lat,height<\coordinates> brackets
    """

    def _read_float_(f):
        """Reads a line of space separated data and splits it into floats

        Parameters
        ----------
        f : file
            Object with method readline

        Returns
        -------
        list
            List of floats
        """
        return [np.float32(i) for i in f.readline().strip().split()]

    def _read_int_(f):
        """Reads a line of space-separated data and splits it into integers

        Parameters
        ----------
        f : file
            Object with method readline

        Returns
        -------
        list
            List of integers
        """
        return [np.int32(i) for i in f.readline().strip().split()]

    # Open libfile
    with open(lib_file) as f:
        # Read header information one line at a time
        desc = f.readline().strip()  # File Description
        nrough, nhgt, nsec = _read_int_(f)  # dimensions
        gen_roughness = _read_float_(f)  # Roughness classes
        gen_height = _read_float_(f)  # heights

        # Initialize arrays
        wdfreq = np.zeros([nsec, nrough], dtype="f4", order="F")
        k = np.zeros([nsec, nhgt, nrough], dtype="f4", order="F")
        A = np.zeros([nsec, nhgt, nrough], dtype="f4")

        ##################################################################
        # The remainder of the file is made up of rows with nsec columns.
        # For each height there is first a frequency row, then pairs of
        # A & k rows for each height.
        ##################################################################
        # Loop over roughness classes to read frequency line
        for i, dummy in enumerate(gen_roughness):
            wdfreq[:, i] = _read_float_(f)
            # Loop over heights to read in all A & k values
            for j, dummy in enumerate(gen_height):
                A[:, j, i] = _read_float_(f)
                k[:, j, i] = _read_float_(f)

    if crs is None:
        crs = "EPSG:4326"

    # Find the coordinates if they aren't provided
    if all(c is None for c in [west_east, south_north, height]):
        # Find the coordinates
        latlons = re.search("<coordinates>(.*)</coordinates>", desc)
        if latlons:
            west_east, south_north, height = map(
                np.float32, latlons.group(1).split(",")
            )
        else:
            raise ValueError(
                f"Coordinates not found in {lib_file}, "
                + "please set 'west_east', 'south_north', and 'height' explicitly. "
                + "These values should ge in the projection given by the 'crs' argument,"
                + " which defaults to the WGS84 projection."
            )

    if header is None:
        if len(desc) > 0:
            header = desc
        else:
            header = ""

    # Add height to wdfreq
    wdfreq = np.tile(wdfreq[:, np.newaxis, :], (1, nhgt, 1))

    return _weibull_to_dataset(
        wdfreq,
        A,
        k,
        gen_roughness,
        gen_height,
        south_north,
        west_east,
        height,
        crs,
        wasp_header=header,
    )


def _weibull_to_dataset(
    wdfreq,
    A,
    k,
    gen_roughness,
    gen_height,
    south_north,
    west_east,
    height,
    crs,
    **kwargs,
):
    """
    Converts parsed xml gwc object to WindKit gwc xarray dataset

    Parameters
    ----------
    wdfreq : 1-D sequence of floats
        Wind direction frequencies. Dimensions=[sector, gen_height, gen_roughnness]
    A : numpy
        Weibull A parameters.  Dimensions=[sector, gen_height, gen_roughnness]
    k : numpy
        Weibull k parameters.  Dimensions=[sector, gen_height, gen_roughnness]
    gen_roughness : numpy
        Array of generalized roughnesses
    gen_height : numpy
        Array of generalized heights
    west_east : float or None, optional
        West-East coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    south_north : float or None, optional
        South-North coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    height : float or None, optional
        Height coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
        Defaults to 4326 (Lat-Lon on the WGS84 geodetic datum)

    kwargs : dict, optional
        Other key-word arguments are added as attributes to the dataset.

    Returns
    -------
    xarray.Dataset
        WindKit GWC dataset
    """

    nsec, _, _ = wdfreq.shape

    na = np.newaxis

    # Create dataset
    ds = create_dataset(west_east, south_north, height, crs).drop_vars("output")
    ds.attrs = kwargs

    # Add variables
    dims = ("sector", "gen_height", "gen_roughness", "point")
    ds["A"] = (dims, A[..., na])
    ds["k"] = (dims, k[..., na])
    ds["wdfreq"] = (dims, wdfreq[..., na])

    # Add coordinates
    ds = ds.assign_coords(
        {
            **create_coords(gen_height, "gen_height", _GEN_COORDS_META).coords,
            **create_coords(gen_roughness, "gen_roughness", _GEN_COORDS_META).coords,
            **create_sector_coords(nsec).coords,
        }
    )

    ds["wdfreq"] = ds["wdfreq"] / ds["wdfreq"].sum(dim="sector")

    return ds.transpose("gen_height", "gen_roughness", "sector", "point")


def _read_gwc_file(
    gwc_file,
    crs=None,
    west_east=None,
    south_north=None,
    height=None,
    header=None,
):
    """Read Generalized Wind Climate from XML-based .gwc file.

    Parameters
    ----------
    gwc_file : str, pathlib.Path
        input file
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
        Defaults to 4326 (Lat-Lon on the WGS84 geodetic datum)
    west_east : float or None, optional
        West-East coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    south_north : float or None, optional
        South-North coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    height : float or None, optional
        Height coordinate of the GWC grid, by default None, which will attempt to
        read from the file.
    header : str or None, optional
        Header string to use for .lib and .gwc files, by default None, which will
        attempt to read from the file.

    Returns
    -------
    xarray.Dataset
        WindKit GWC dataset

    Raises
    ------
    ValueError
        If no RveaGeneralisedMeanWindClimate is found in the file.
    ValueError
        If coordinates are not found in the file and are not provided explicitly.

    Warns
    -----
    UserWarning
        If 'height' is not found in the file and are not provided explicitly.
        It will be set to 0.0

    """

    if crs is None:
        crs = "EPSG:4326"

    tree = etree.parse(str(gwc_file))
    root = tree.getroot()

    descendants = list(e.tag for e in root.getiterator())

    if root.tag == "RveaGeneralisedMeanWindClimate":
        gwc_tree = root
    elif "RveaGeneralisedMeanWindClimate" in descendants:
        gwc_tree = tree.find(".//RveaGeneralisedMeanWindClimate")
    else:
        raise ValueError(f"No RveaGeneralisedMeanWindClimate found in '{gwc_file}'")

    gwc_data = _parse_rvea_generalised_mean_wind_climate(gwc_tree)

    if "RveaAnemometerSiteDetails" in descendants:
        site_data = _parse_rvea_anemometer_site_details(
            tree.find(".//RveaAnemometerSiteDetails")
        )
    else:
        site_data = {}

    if west_east is None:
        west_east = gwc_data.get("longitude", site_data.get("longitude", None))

    if south_north is None:
        south_north = gwc_data.get("latitude", site_data.get("latitude", None))

    if height is None:
        height = gwc_data.get("height", site_data.get("height", None))

    if header == None:
        header = gwc_data.get("header", site_data.get("header", ""))

    if west_east is None or south_north is None:
        raise ValueError(
            "'west_east' or 'south_north' coordinate not found in file or provided as argument. "
            + "Please set 'west_east' and 'south_north' explicitly."
        )

    if height is None:
        height = 0.0
        warnings.warn(
            "No height found in file or provided as argument, using 0 m. Set 'height' explicitly to avoid this warning.",
        )

    return _weibull_to_dataset(
        wdfreq=gwc_data["wdfreq"],
        A=gwc_data["A"],
        k=gwc_data["k"],
        gen_roughness=gwc_data["gen_roughness"],
        gen_height=gwc_data["gen_height"],
        west_east=west_east,
        south_north=south_north,
        height=height,
        crs=crs,
        wasp_header=header,
    )



[docs]
@gwc_validate_wrapper
def reproject_gwc(gwc, /, to_crs):
    """Reprojects Generalized wind climate dataset.

    Parameters
    ----------
    gwc: xarray.Dataset
        Valid GWC dataset.
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`

    Returns
    -------
    xarray.Dataset
        Point based generalized wind climate dataset in new projection.
    """
    if not crs_are_equal(gwc, to_crs):
        return reproject(gwc, to_crs)

    # Return point based dataset even if not reprojected
    ds = to_point(gwc)
    return update_history(ds)