Source code for woom.iters

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Iteration utilities for date cycles and ensembles
"""

import math
import re

import pandas as pd

from . import WoomError
from . import util as wutil

# %% Cycles


[docs] class Cycle: """Container for a time cycle A Cycle represents either a point in time (single date) or a time interval (begin and end dates). Cycles are used to organize workflow execution over different time periods. Parameters ---------- begin_date : date-like The start date of the cycle end_date : date-like, optional The end date of the cycle. If None, the cycle represents a single point in time. horizon : timedelta-like, optional Forecast horizon. When set on a single-date cycle (``end_date=None``), sets ``end_date = begin_date + horizon`` so that ``cycle_end_date`` and ``cycle_duration`` are available in templates without making ``is_interval`` True. Notes ----- Cycles support equality comparison with flexible semantics: - Two cycles are equal if they have the same begin_date and end_date - A single date cycle equals an interval cycle if they share the same begin_date - Cycles can be compared with ISO 8601 formatted strings Examples -------- >>> from woom.iters import Cycle >>> # Single date cycles >>> c1 = Cycle("2020-01-01") >>> c2 = Cycle("2020-01-01") >>> c1 == c2 True >>> # Interval cycles >>> i1 = Cycle("2020-01-01", "2020-01-10") >>> i2 = Cycle("2020-01-01", "2020-01-10") >>> i1 == i2 True >>> # Mixed comparison - compared by begin_date >>> c1 == i1 True >>> # String comparison >>> c1 == "2020-01-01T00:00:00+00:00" True """
[docs] def __init__(self, begin_date, end_date=None, horizon=None): #: Begin date (:class:`~woom.util.WoomDate`) self.begin_date = wutil.WoomDate(begin_date) #: Same as :attr:`begin_date` self.date = self.begin_date #: Whether it is an interval or a single date (:class:`bool`) self.is_interval = end_date is not None #: Whether it is the first cycle (:class:`bool`) self.is_first = False #: Whether it is the last cycle (:class:`bool`) self.is_last = False #: Forecast horizon (:class:`~pandas.Timedelta` or None) self.horizon = pd.to_timedelta(horizon) if horizon is not None else None if not self.is_interval: if self.horizon is not None: #: End date (:class:`~woom.util.WoomDate` or None) self.end_date = wutil.WoomDate(self.begin_date + self.horizon) #: Interval duration (:class:`~pandas.Timedelta` or None) self.duration = self.horizon else: self.end_date = self.duration = None else: #: End date (:class:`~woom.util.WoomDate` or None) self.end_date = wutil.WoomDate(end_date) #: Interval duration (:class:`~pandas.Timedelta` or None) self.duration = self.end_date - self.begin_date # Label if self.is_interval: self.label = f"{self.begin_date.isoformat()} -> {self.end_date.isoformat()} ({self.duration})" elif self.horizon is not None: #: String used for for printing and based on the ISO 8601 format (:class:`str`) self.label = f"{self.begin_date.isoformat()} +{self.duration}" else: #: String used for for printing and based on the ISO 8601 format (:class:`str`) self.label = self.begin_date.isoformat() # Token — always anchored to begin_date only (drives directory names) if self.is_interval: self.token = f"{self.begin_date.isoformat()}-{self.end_date.isoformat()}" else: #: String used in file and directory names and based on the ISO 8601 format (:class:`str`) self.token = f"{self.begin_date.isoformat()}" #: Next cycle (:class:`Cycle` or None) self.next = None #: Previous cycle (:class:`Cycle` or None) self.prev = None
def __str__(self): return self.token def __repr__(self): ss = f"<Cycle({self.begin_date}, {self.end_date})>\n" for attr in "duration", "date", "label", "token", "is_first", "is_last", "prev", "next": ss += " {}: {}\n".format(attr, getattr(self, attr)) return ss def __eq__(self, other): if str(self) == str(other): return True if isinstance(other, str): m = re.match(r"^(\d+-\d+-\d+.*)-(\d+-\d+-\d+.*)$", other) if m: try: other = [wutil.WoomDate(o) for o in m.groups()] except Exception: return False else: try: other = [wutil.WoomDate(other)] except Exception: return False elif isinstance(other, wutil.WoomDate): other = [other] else: other_ = other other = [other_.begin_date] if other_.end_date is not None: other.append(other_.end_date) if other[0] != self.begin_date: return False # If either is a single date, they're equal (begin_date matches) if len(other) == 1 or self.end_date is None: return True # Both are intervals, check end dates if other[1] != self.end_date: return False return True
[docs] def describe(self): return self.__repr__()
def __hash__(self): return hash(self.token)
[docs] def get_params(self, suffix=None): """Export a dict of substitution parameters about this cycle""" if suffix: if not suffix.startswith("_"): suffix = "_" + suffix else: suffix = "" params = { "cycle" + suffix: self, "cycle_begin_date" + suffix: self.begin_date, "cycle_label" + suffix: self.label, "cycle_token" + suffix: self.token, } if not self.is_interval: params["cycle_date" + suffix] = params["cycle_begin_date" + suffix] if self.end_date is not None: params.update( { "cycle_end_date" + suffix: self.end_date, "cycle_duration" + suffix: self.duration, } ) params["cycle_is_first" + suffix] = self.is_first params["cycle_is_last" + suffix] = self.is_last params["cycle_next" + suffix] = self.next params["cycle_prev" + suffix] = self.prev return params
[docs] def get_env_vars(self, suffix=None): """Export a dict of WOOM environment variables about this cycle""" params = self.get_params(suffix=suffix) return wutil.dict_to_env_vars(params)
[docs] def gen_cycles( begin_date, end_date=None, freq=None, ncycles=None, round=None, as_intervals=True, horizon=None ): """Get a list of :class:`Cycle` instances given time specifications The first cycle has the :attr:`Cycle.is_first` attribute set to True. The last cycle has the :attr:`Cycle.is_last` attribute set to True. The cycles are related with one another thanks to the :attr:`Cycle.prev` and :attr:`Cycle.next` attributes. Parameters ---------- begin_date: date-like First date end_date: date_like, None Last date freq: freq-like, None Difference of time between to dates ncycles: int, None Number of cycles. This parameters takes precedence over `freq`. round: freq_like, None Round dates to this precision as_intervals: bool Consider dates as independant dates or intervals. When set to True, ``[date0, date1, date2]`` becomes ``[Cycle(date0, date1), Cycle(date1, date2)]``, else ``[Cycle(date0), Cycle(date1), Cycle(date2)]``. horizon: timedelta-like, None Forecast horizon applied when ``as_intervals=False``. Each cycle's :attr:`~Cycle.end_date` is set to ``begin_date + horizon`` and :attr:`~Cycle.duration` to ``horizon``, making ``cycle_end_date`` and ``cycle_duration`` available in templates. Ignored when ``as_intervals=True``. """ if begin_date is None: raise WoomError("begin_date must be None to generate cycles") begin_date = wutil.WoomDate(begin_date, round) if end_date: end_date = wutil.WoomDate(end_date, round) if ncycles: rundates = pd.date_range( start=begin_date, end=end_date, periods=ncycles + 1, ) elif freq: rundates = pd.date_range( start=begin_date, end=end_date, freq=freq, ) else: rundates = [ pd.to_datetime(begin_date), pd.to_datetime(end_date), ] elif ncycles and freq: rundates = pd.date_range( start=begin_date, periods=ncycles + 1, freq=freq, ) else: rundates = [begin_date] # Single date if len(rundates) == 1: return [Cycle(rundates[0], horizon=horizon)] # A list of time intervals if as_intervals: cycles = [] for i, date0 in enumerate(rundates[:-1]): date1 = rundates[i + 1] cycles.append(Cycle(date0, date1)) else: cycles = [Cycle(date, horizon=horizon) for date in rundates] if not cycles: raise WoomError( "Unable to generate cycles with these specs: " f"begin_date={begin_date}, end_date={end_date}, freq={freq}, ncycle={ncycles}, round={round}" ) cycles[0].is_first = True cycles[-1].is_last = True for i in range(0, len(cycles)): if i != 0: cycles[i].prev = cycles[i - 1] if i != len(cycles) - 1: cycles[i].next = cycles[i + 1] return cycles
# %% Ensembles
[docs] class Member: """Container for an ensemble member Parameters ---------- member_id : int Member id starting from 1 nmembers : int Total number of members in the ensemble """
[docs] def __init__(self, member_id, nmembers): #: Member id starting from 1 (:class:`int`) self.id = member_id #: Total number of members in the esemble (:class:`int`) self.nmembers = nmembers self._ndigits = int(math.log10(self.nmembers)) + 1 self._props = set()
def __str__(self): return str(self.label)
[docs] def set_prop(self, name, value): """Set a property""" setattr(self, name, value) self._props.update({name})
@property def props(self): """Properties of this member (:class:`dict`)""" return dict((name, getattr(self, name)) for name in self._props) @property def label(self): """String like 'member12' (:class:`str`)""" return f"member{self.id:0{self._ndigits}}" @property def rank(self): """String like '012/120' (:class:`str`)""" return f"{self.id:0{self._ndigits}}/{self.nmembers}" @property def params(self): """Contains this instance, :attr:`nmembers` and all :attr:`properties <props>` (:class:`dict`) It is used for string substitutions """ params = {"member": self, "nmembers": self.nmembers} params.update(self.props) return params @property def env_vars(self): """Conversion of :attr:`params` to a dict of environment variables (:class:`dict`)""" return wutil.dict_to_env_vars(self.params)
[docs] def gen_ensemble(nmembers, skip=None, **iters): """Generate a list of :class:`Member` objects""" # nmembers from iters if nmembers is None: if iters: nmembers = min([len(v) for v in iters.values()]) else: nmembers = 0 # Skip some members if skip: skip = wutil.pages2ints(skip, nmembers) # loop on members members = [] for member_id in range(1, nmembers + 1): if skip and member_id in skip: continue member = Member(member_id, nmembers) for attr, values in iters.items(): nvalues = len(values) if nvalues != nmembers: raise WoomError( f"Ensemble iterator names '{attr}' must have a length of {nmembers}, not {nvalues}!" ) member.set_prop(attr, values[member_id - 1]) members.append(member) return members