Source code for windkit.time_series_wind_climate

# (c) 2022 DTU Wind Energy
"""Time series wind climate module

A time series wind climate is defined by dataset with a time series
``wind speed`` and ``wind direction``.

A valid time series wind climate therefore has a dimension ``time``.
Also it must have one of the valid :ref:`geospatial_structures`. This module contains
functions that operate on time series wind climates.
This includes the ability to create time series datasets from files and from
existing data.
"""

import collections
import re
import warnings

import numpy as np
import pandas as pd
import xarray as xr

from ._errors import WindClimateValidationError
from ._validate import create_validator
from .metadata import _TS_ATTRS, update_history, update_var_attrs
from .spatial import to_stacked_point
from .spatial._crs import add_crs

WS = "wind_speed"
WD = "wind_direction"
DIM_TIME = "time"
DATA_VAR_DICT_TS = {WS: [DIM_TIME], WD: [DIM_TIME]}
REQ_DIMS_TS = [DIM_TIME]
REQ_COORDS_TS = ["south_north", "west_east", "height", "crs"]


ts_validate, ts_validate_wrapper = create_validator(
    DATA_VAR_DICT_TS, REQ_DIMS_TS, REQ_COORDS_TS
)


def _is_ts(wco):
    """Check if this is a times series wind climate

    Returns true if tswc and false if not

    Parameters
    ----------
    wco: xarray.Dataset
        Wind Climate Object

    Returns
    -------
    Bool
        Returns true if tswc and false if not
    """
    try:
        ts_validate(wco)
        return True
    except WindClimateValidationError:
        return False



[docs]
def read_ts_windpro_txt(fpath):
    """Parses windpro format txt file into a dataset.



    Parameters
    ----------
    fpath : [str]
        [file path to be parsed]

    Returns
    -------
    xarray.Dataset

    """

    def _is_float(value):
        try:
            float(value)
            return True
        except ValueError:
            return False

    # parse parameters from windpro header;
    lng, lat = 0.0, 0.0
    data_start_line = -1
    disp_height = -1
    with open(fpath, "r") as file:
        for i, line in enumerate(file):
            # parse coordinates
            if "Geographical Coordinates" in line:
                parts = line.split()
                for j, p in enumerate(parts):
                    if _is_float(p) and parts[j - 1] == "Longitude:":
                        lng = float(p)
                    if _is_float(p) and parts[j - 1] == "Latitude:":
                        lat = float(p)
            # parse height
            if "Displacement height" in line:
                parts = line.split()
                for p in parts:
                    if _is_float(p):
                        disp_height = float(p)
            # reached header
            if "TimeStamp" in line:
                data_start_line = i
                break

    if disp_height > 0:
        warnings.warn(
            "Displacement height cannot be used in WindKit. Set it up via the map instead."
        )
    if lng == 0.0 and lat == 0.0:
        raise Exception("Couldn't parse coordinates")

    ts = pd.read_csv(
        fpath,
        delimiter="\t{2}|\t",
        parse_dates=["TimeStamp"],
        skiprows=range(data_start_line),
        engine="python",
    )

    # parse height from the wind speed/direction column
    for col in ts.columns:
        if "Mean wind speed" in col:
            height = float(re.findall(r"[0-9]+.[0-9]+m", col)[0].replace("m", ""))
            ts = ts.rename({col: "ws"}, axis="columns")
        if "Wind direction" in col:
            ts = ts.rename({col: "wd"}, axis="columns")

    ts = ts[~ts.ws.str.contains("-")]
    ts = ts[ts["ws"].notna()]
    ts = ts[ts["wd"].notna()]
    ts["ws"] = ts["ws"].astype(float)
    ts["wd"] = ts["wd"].astype(float)

    ts_ds = xr.Dataset(
        {
            "wind_speed": (["time"], ts["ws"]),
            "wind_direction": (["time"], ts["wd"]),
        },
        coords={
            "time": ("time", ts["TimeStamp"]),
            "south_north": lat,
            "west_east": lng,
            "height": height,
            "crs": 0,
        },
    )

    add_crs(ts_ds, 4326)
    update_var_attrs(ts_ds, {**_TS_ATTRS})
    # validate the dataset before returning
    ts_validate(ts_ds)
    return ts_ds




[docs]
def read_timeseries_from_csv(
    csv_filename,
    west_east,
    south_north,
    crs,
    time_col=0,
    height_to_columns=None,
    **kwargs,
):
    """
    Reads a csv file into a time series wind climate xarray.Dataset. The file must have one time
    entry per row, a column with a time stamp and at least one wind speed and one wind direction. It
    allows to create a dataset for several heights.

    Parameters
    ----------
    csv_filename : str
        file path to a csv file with wind speed and wind direction measurements for different timestamps.
    west_east: float
        west east locaton of the measurement
    south_north: float
        south north location of the measurement
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
    time_col: int, str
        column position (integer) or header (str) where the timestamp is located. it can be overriden by
        using `pandas.read_csv` kwargs. Defaults to 0 (first column in the file).
    height_to_columns: dict
        dictionary to map the wind speed and directions to its corresponding height. The key is a float
        with the height, and the value is a tuple (str,str) with the header for the wind speed and the
        header for the wind direction, respectively. If the parameter is `None`, the columns are inferred
        from the column names in the files. The function will find wind speeds for different heights and
        after that will look for wind direction columns, matching them to the closest height.
        Examples of autodetected header formats:

            - ws_10, ws_10_mean, ws10, WS10 (wind speed at 10 m)
            - windagl10, windagl_10, windagl_10_mean (wind speed at 10 m)
            - wd_15, wd_15_mean, w15, WD15 (wind direction at 15m)
            - wdiragl15, wdiragl_15, wdiragl_15_mean (wind direction at 15 m)

    ``**kwargs``: dict
        Optional arguments that are forwarded to `pandas.read_csv` for customizing its behavior.

    Returns
    -------
    da: xarray.Dataset
        Time series wind climate dataset  with variables 'wind_speed' and 'wind_direction'
        and with a coordinate and dimension 'time'.
    Raises
    ------
    RuntimeError
        If the time column cannot be parsed or if the wind speed and wind direction columns cannot
        be detected.
    """

    default_kwargs = {
        "parse_dates": True,
        "index_col": time_col,
    }

    kwargs = {**default_kwargs, **kwargs}

    pd_df = pd.read_csv(csv_filename, **kwargs)

    return read_timeseries_from_pandas(
        pd_df, west_east, south_north, crs, height_to_columns
    )




[docs]
def read_timeseries_from_pandas(
    pd_df,
    west_east,
    south_north,
    crs,
    height_to_columns=None,
):
    """
    transforms a pandas.DataFrame into a time series wind climate xarray.Dataset. The dataframe must have
    an index with time format and at least one wind speed and one wind direction. It allows to create a
    dataset for several heights.

    Parameters
    ----------
    pd_df : pandas.DataFrame
        pandas dataframe with wind speed and wind direction measurements for different timestamps and
        heights.
    west_east: float
        west east locaton of the measurement
    south_north: float
        south north location of the measurement
    crs : int, dict, str or pyproj.crs.CRS
        Value to initialize `pyproj.crs.CRS`
    height_to_columns: dict
        dictionary to map the wind speed and directions to its corresponding height. The key is a float
        with the height, and the value is a tuple (str,str) with the header for the wind speed and the
        header for the wind direction, respectively. If the parameter is `None`, the columns are inferred
        from the column names in the dataframe. The function will find wind speeds for different heights
        and after that will look for wind direction columns, matching them to the closest height.
        Examples of autodetected header formats:

           - ws_10, ws_10_mean, ws10, WS10 (wind speed at 10 m)

           - windagl10, windagl_10, windagl_10_mean (wind speed at 10 m)

           - wd_15, wd_15_mean, w15, WD15 (wind direction at 15m)

           - wdiragl15, wdiragl_15, wdiragl_15_mean (wind direction at 15 m)

    Returns
    -------
    da: xarray.Dataset
        Time series wind climate dataset with variables 'wind_speed' and 'wind_direction'
        and with a coordinate and dimension 'time'.
    Raises
    ------
    RuntimeError
        If it fails to autodetect the columns
    """
    # Check if index is datetime
    if not isinstance(pd_df.index, pd.DatetimeIndex):
        raise RuntimeError(
            "The dataframe index is not of type 'datetime'. Please provide a pandas.DataFrame with the time as index."
        )

    if height_to_columns is None:
        try:
            height_to_columns = _headers_to_dict(pd_df)
            print("Columns detected")
            print("{:<5} {:<12} {:<12}".format("h", "Wind speed", "Wind dir"))
            for k, v in height_to_columns.items():
                print("{:<5} {:<12} {:<12}".format(k, v[0], v[1]))

        except Exception as err:
            raise RuntimeError(
                str(err)
                + "\nColumns could not be detected automatically. Provide a height_to_columns dictionary."
            )

    ds_pieces = []
    for k, v in height_to_columns.items():
        ws = xr.DataArray(pd_df[v[0]], dims=["time"])
        wd = xr.DataArray(pd_df[v[1]], dims=["time"])
        ds_piece = xr.Dataset({"wind_speed": ws, "wind_direction": wd}).assign_coords(
            height=k,
        )
        ds_pieces.append(ds_piece)

    ds = xr.concat(ds_pieces, dim="height")
    ds = ds.assign_coords(
        {
            "west_east": west_east,
            "south_north": south_north,
        }
    )
    ds = ds.transpose("time", ...)
    ds = add_crs(ds, crs)
    ds = update_history(ds)
    return to_stacked_point(update_var_attrs(ds, {**_TS_ATTRS}))



def _headers_to_dict(df):
    """
    Tries to detect the wind speed and wind direction columns on a pandas dataframe
    and builds a dictionary

    Parameters
    ----------
    df : pandas.DataFrame
        dataframe with wind data

    Returns
    -------
    dict: Dictionary where the key is the height (float) and the value is a tuple (string,string)
    with the wind speed column header and the wind direction column header, or None if the headers
    format does not allow autodetection.
    """
    headers_list = df.columns
    vel_columns, dir_columns = _parse_heights_from_headers(headers_list)
    if len(vel_columns) == 0:
        raise RuntimeError("No wind speed columns were found.")
    if len(dir_columns) == 0:
        raise RuntimeError("No wind direction columns were found.")
    v_list = np.array([x[0] for x in vel_columns])
    d_list = np.array([x[0] for x in dir_columns])
    duplicate_heights = [
        x for x, count in collections.Counter(v_list).items() if count > 1
    ]
    if len(duplicate_heights) > 0:
        duplicate_heights_err_msg = ""
        for val in duplicate_heights:
            duplicate_heights_err_msg += (
                f"There are duplicate entries for height {val}\n"
            )
        raise RuntimeError(duplicate_heights_err_msg.rstrip())

    indices = _closest_dir_index(v_list, d_list)

    final_dict = {}
    for i, val in enumerate(vel_columns):
        final_dict.update({val[0]: (val[1], dir_columns[indices[i]][1])})

    return final_dict


def _parse_heights_from_headers(header_list):
    """Detect the wind speed and wind direction columns from a header list

    Parameters
    ----------
    header_list : list of string
        list with each header

    Returns
    -------
    list: list with tuples (float,string) with the height and the string header of the wind speed
    list: list with tuples (float,string) with the height and the string header of the wind direction
    """
    response_h = []
    response_d = []
    velocity_patterns_list = [
        r"(?:(?:windagl)|(?:ws))_*(?P<height>\d*\.*\d+).*(?:mean)*",
        r"a(?P<height>\d*\.*\d+)(:?(:?|:?T0deg))_wind_speed_mean",
    ]
    direction_patterns_list = [
        r"(?:(?:wdiragl)|(?:wd))_*(?P<height>\d*\.*\d+).*(?:mean)*",
        r"d(?P<height>\d*\.*\d+)(:?(:?|:?T0deg))_wind_direction_mean",
    ]

    while len(velocity_patterns_list) != 0:
        velocity_pattern = velocity_patterns_list.pop(0)
        direction_pattern = direction_patterns_list.pop(0)
        for val in header_list:
            match_vel = re.match(velocity_pattern, val, re.IGNORECASE)
            match_dir = re.match(direction_pattern, val, re.IGNORECASE)
            if match_vel is not None:
                height_vel = match_vel.group("height")
                response_h.append((float(height_vel), val))
            if match_dir is not None:
                height_dir = match_dir.group("height")
                response_d.append((float(height_dir), val))

    return response_h, response_d


def _closest_dir_index(vel_list, dir_list):
    """
    returns  a list with the indices with the closest value of wind
    direction for a given wind velocity

    Parameters
    ----------
    vel_list : numpy.array
        array with heights where the velocity was measured
    dir_list : numpy.array
        array with height where the direction was measured

    Returns
    -------
    list : list
        list with the indices in dir_list corresponding to vel_list
    """
    resp = []
    for val in vel_list:
        resp.append(np.argmin(abs(val - dir_list)))
    return resp