Source code for woom.util

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Misc utilities
"""

import collections
import json
import logging
import os
import re
import subprocess
import sys

import pandas as pd
from configobj import ConfigObj



[docs]
class WoomDate(pd.Timestamp):
    """Extended pandas Timestamp with custom formatting and time arithmetic

    Parameters
    ----------
    date : str or datetime-like
        Date specification
    round : str, optional
        Frequency string for rounding
    """

    re_match_since = re.compile(r"^(years|months|days|hours|minutes|seconds)\s+since\s+(\d+.*)$", re.I).match
    # re_match_add = re.compile(r"^([+\-].+)$").match

    def __new__(cls, date, round=None):
        if isinstance(date, str) and date in ["now", "today"]:
            date = pd.to_datetime(date, utc=True)
        else:
            date = pd.to_datetime(date)
            if date.tzinfo is None:
                date = date.tz_localize("utc")
        # date = pd.to_datetime(date, utc=utc)
        # if utc:
        #     date = date.tz_localize(None)
        if round:
            date = date.round(round)
        instance = super().__new__(cls, date)
        instance.__class__ = cls
        return instance

    def __format__(self, spec):
        m = self.re_match_since(spec)
        if m:
            units, origin = m.groups()
            origin = pd.to_datetime(origin)
            if origin.tzinfo is None:
                origin = origin.tz_localize("utc")
            return "{:g}".format((self - pd.to_datetime(origin)) / pd.to_timedelta(1, units))

        return super().__format__(spec)


[docs]
    def add(self, *args, **kwargs):
        """Add time delta"""
        date = self
        for arg in args:
            date = date + pd.to_timedelta(arg)
        for unit, value in kwargs.items():
            date = date + pd.to_timedelta(value, unit)
        return date





[docs]
def check_dir(filepath, dry=False, logger=None):
    """Make sure that the directory that contains file exists

    Parameters
    ----------
    filepath : str
        File path
    dry : bool
        Fake mode. Do not create the directory
    logger : logging.Logger, optional
        To inform that we create the directory, even in dry mode

    Returns
    -------
    str
        Absolute file path
    """
    if logger is None:
        logger = logging.getLogger(__name__)
    filepath = os.path.abspath(filepath)
    dirname = os.path.dirname(filepath)
    if not os.path.exists(dirname):
        if logger:
            logger.debug(f"Creating directory: {dirname}")
        if not dry:
            os.makedirs(dirname)
        if logger:
            logger.info(f"Created directory: {dirname}")
    return filepath




[docs]
class WoomJSONEncoder(json.JSONEncoder):
    """Custom JSON encoder for woom objects"""


[docs]
    def default(self, obj):
        # Dict
        if isinstance(obj, collections.UserDict):
            return dict(obj)

        # Process
        if hasattr(obj, "pid") or isinstance(obj, subprocess.Popen):
            return obj.pid

        # Workflow and managers
        if hasattr(obj, "to_json_entry"):
            return obj.to_json_entry()

        try:
            return super().default(obj)
        except TypeError:
            return str(obj)





[docs]
def dict_to_env_vars(items=None, select=None, exclude=None, prefix="WOOM_", **extra_items):
    """Convert a dict to env vars whose name starts with `prefix`

    Supports nested dictionaries, lists, timestamps, and various data types.
    Nested dictionaries are flattened with underscore-separated keys.
    Lists are joined using the OS path separator (`:` on Unix, `;` on Windows).

    Parameters
    ----------
    items : dict, optional
        Dictionary to convert
    select : list, optional
        Keys to select from items (only these will be included)
    exclude : list, optional
        Keys to exclude from items (applied recursively)
    prefix : str, optional
        Prefix for environment variable names (default: "WOOM_")
    **extra_items
        Additional items to include

    Returns
    -------
    dict
        Environment variables dictionary with string values

    Examples
    --------
    Simple conversion:

    >>> dict_to_env_vars({'key': 'value', 'count': 42})
    {'WOOM_KEY': 'value', 'WOOM_COUNT': '42'}

    Nested dictionaries:

    >>> dict_to_env_vars({'db': {'host': 'localhost', 'port': 5432}})
    {'WOOM_DB_HOST': 'localhost', 'WOOM_DB_PORT': '5432'}

    Lists are joined with os.pathsep:

    >>> dict_to_env_vars({'paths': ['/usr/bin', '/usr/local/bin']})
    {'WOOM_PATHS': '/usr/bin:/usr/local/bin'}  # Unix

    Boolean values:

    >>> dict_to_env_vars({'debug': True, 'quiet': False})
    {'WOOM_DEBUG': '1', 'WOOM_QUIET': '0'}

    Custom prefix:

    >>> dict_to_env_vars({'key': 'val'}, prefix='MY_APP_')
    {'MY_APP_KEY': 'val'}

    Filtering with select:

    >>> dict_to_env_vars({'a': 1, 'b': 2, 'c': 3}, select=['a', 'b'])
    {'WOOM_A': '1', 'WOOM_B': '2'}

    Filtering with exclude:

    >>> dict_to_env_vars({'keep': 1, 'skip': 2}, exclude=['skip'])
    {'WOOM_KEEP': '1'}
    """
    if not isinstance(prefix, str):
        raise TypeError(f"prefix must be a string, got {type(prefix).__name__}")
    if not prefix:
        raise ValueError("prefix cannot be empty")
    if items is None:
        items = extra_items
    else:
        items = items.copy()
        items.update(extra_items)
    env_vars = {}
    _dict_to_env_vars_(items, env_vars, prefix, select, exclude)
    return env_vars



def _dict_to_env_vars_(dd, env_vars, prefix, select, exclude):
    for key, value in dd.items():
        if select and key not in select:
            continue
        if exclude and key in exclude:
            continue
        if isinstance(value, (pd.Timestamp, pd.Timedelta)):
            value = value.isoformat()
        if value is None:
            value = ""
        if isinstance(value, list):
            value = os.pathsep.join([str(v) for v in value])
        if isinstance(value, bool):
            value = str(int(value))
        if isinstance(value, (dict, ConfigObj)):
            _dict_to_env_vars_(value, env_vars, prefix + key.upper() + "_", select, exclude)
        else:
            env_vars[prefix + key.upper()] = str(value)



[docs]
def pages2ints(pages, n):
    """Convert a list of 1-based integers and zero-based slices to a pure list of one-based integers

    Parameters
    ----------
    pages : list
        List of integers or slices
    n : int
        Total number of pages

    Returns
    -------
    list
        List of 1-based integer indices
    """
    out = []
    indices = [i + 1 for i in range(n)]
    for page in pages:
        if isinstance(page, int):
            out.append(page)
        else:
            out.extend(indices[page])
    return out



#: Available colors
COLORS = {
    "bold": "\033[1m",
    "green": "\033[32m",
    "yellow": "\033[33m",
    "red": "\033[31m",
    "reset": "\033[0m",
}



[docs]
def colorize(text, mapping, colorize=True):
    """Colorize text depending on mapping.

    Parameters
    ----------
    text: str
        Test to colorize
    mapping: dict
        Keys are regular expressions and values are valid :data:`COLORS`.
    colorize: bool
        Whether to colorize or not.

    Return
    ------
    str
    """
    if not colorize or not sys.stdout.isatty():
        return text
    for pattern, color in mapping.items():
        m = re.match(pattern, text)
        if m:
            cc = ""
            for c in color.split("_"):
                cc += COLORS[c]
            return cc + text + COLORS["reset"]
    return text




[docs]
def flatten(content):
    """Convert an object like a dict to a flat list

    Parameters
    ----------
    content: dict, list, any

    Return
    ------
    list

    """
    out = []
    if isinstance(content, dict):
        for value in content.values():
            out.extend(flatten(value))
    elif isinstance(content, list):
        for value in content:
            out.extend(flatten(value))
    else:
        out.append(content)
    return out




[docs]
def set_deep_item(dd, value, *keys):
    """Set a deep item in dict

    Parameters
    ----------
    dd: dict
        The dict to modify
    value:
        The value to set
    keys: tuple
        The keys. A key is ignored if set to None.
    """
    keys = [k for k in keys if k is not None]
    for i, key in enumerate(keys):
        if i == len(keys) - 1:
            dd[key] = value
        else:
            dd = dd.setdefault(key, {})