Jinja templates#

These templates are filled with the Input context and the Input environment variables that are generated by the workflow for each task.

The job.sh template#

This template is used by woom.tasks.Task.render_content() to create the job script.

Default job.sh jinja template#
{% block header -%}
#!/bin/bash

# Prolog
set -eo pipefail
# - handler for graceful termination
on_sigterm() {
    echo "Received termination signal, cleaning up..."
    # Just exit cleanly, let on_exit handle status
    exit 0
}
# - handler for killing termination
on_sigkill() {
    echo "Received kill signal, cleaning up..." >&2
    # Just exit cleanly, let on_exit handle status
    exit 1
}
# - handler for exit (always called)
on_exit() {
    status=$?
    if [ ! -f "{{ task_submission_dir }}/job.terminating" ]; then
        echo $status > "{{ task_submission_dir }}/job.status"
    fi
    exit $status
}
trap on_sigterm SIGKILL
trap on_sigkill SIGTERM SIGINT
trap on_exit EXIT
{% endblock %}

{% block env -%}
{% include "env.sh" %}
{% endblock %}

{% block pre_run -%}
{% if task_run_dir %}
# Go to run dir
{ mkdir -p {{ task_run_dir }}; cd {{ task_run_dir }}; } || exit 1
{% endif %}
{% endblock %}

{% block run -%}
# Run the commandline(s)
{{ task.commandline }}
{% endblock %}

{% block post_run -%}
{% if task.artifacts %}
# Check artifacts
{% for name, path in task.artifacts.items() -%}
  {% if path is string %}
test -f "{{ path }}" || { echo artifact not created: {{ name }}={{ path }}; exit 1; }
  {% else %}
    {% for path_ in path -%}
test -f "{{ path_ }}" || { echo artifact not created: {{ name }}={{ path_ }}; exit 1; }
    {% endfor %}
  {% endif %}
{% endfor %}
{% endif %}
{% endblock %}

The env.sh template#

This template is used by woom.env.EnvConfig.render() to format the environment declaration in The job.sh template.

Default env.sh jinja template#
{% block raw_text -%}
{% if task_env.raw_text %}
# Raw init env
{{ task_env.raw_text }}
{% endif %}
{% endblock %}
{% block modules -%}
{% if task_env.module_load %}

# Environment modules
{% if task_env.module_setup %}
{{ task_env.module_setup }}
{% endif %}
{% if task_env.module_use %}
module use {{ task_env.module_use }}
{% endif %}
{% if task_env.module_load %}
module load {{ task_env.module_load }}
{% endif %}
{% endif %}
{% endblock %}
{% block uv -%}
{% if workflow_dir is defined %}
{% set venv_activate = os.path.join(workflow_dir, ".venv", "bin", "activate") %}
{% if task_env.uv_venv is true and os.path.exists(venv_activate) %}

# UV virtual environment
source {{ venv_activate }}
{% endif %}
{% endif %}

{% endblock %}
{% block env_vars -%}
{% if task_env.has_vars() %}
# Environment variables
{# forward #}
{% for name in task_env.vars_forward %}
export {{ name }}="{{ os.environ[name] }}"
{% endfor %}
{# set #}
{% for name, value in task_env.vars_set.items() %}
export {{ name }}="{{ value|as_str_env }}"
{% endfor %}
{# prepend #}
{% for name, value in task_env.vars_prepend.items() %}
export {{ name }}={{ value|as_str_env }}{{ os.pathsep }}${{ name }}
{% endfor %}
{# append #}
{% for name, value in task_env.vars_append.items() %}
export {{ name }}=${{ name }}{{ os.pathsep }}{{ value|as_str_env }}
{% endfor %}
{% endif %}
{% endblock %}
{% block conda -%}
{% if task_env.conda_activate %}

# Conda
{% if task_env.conda_setup %}
{{ task_env.conda_setup }}
{% endif %}
conda activate {{ task_env.conda_activate }}
{% endif %}
{% endblock %}

{% block custom -%}
{# Custom configuration block #}
{% endblock %}

The sentinel.sh template#

This template is used to create the sentinel job script that monitors all workflow jobs when using a scheduler (SLURM, PBS Pro). The sentinel automatically kills jobs on failure and terminates non-blocking jobs when all blocking jobs complete.

Default sentinel.sh jinja template#
{% extends "!job.sh" %}

{% block run %}

{% set jobids = job_blocking_status.keys()|list %}
{% set blocking_jobs = [] %}
{% set non_blocking_jobs = [] %}
{% for jobid, is_blocking in job_blocking_status.items() %}
  {% if is_blocking %}
    {% do blocking_jobs.append(jobid) %}
  {% else %}
    {% do non_blocking_jobs.append(jobid) %}
  {% endif %}
{% endfor %}
{% set num_blocking = blocking_jobs|length %}
{% set num_non_blocking = non_blocking_jobs|length %}
{% set num_total = jobids|length %}

CHECK_INTERVAL={{ check_interval|default(10) }}

echo "========== Woom Sentinel =========="
echo "Monitoring: {{ num_total }} jobs ({{ num_blocking }} blocking) | Interval: ${CHECK_INTERVAL}s"
echo ""

# Job ids
JOBIDS=({{ jobids|join(' ') }})

# Non-blocking jobs
NON_BLOCKING_JOBS=({{ non_blocking_jobs|join(' ') }})

# Blocking status dictionary
declare -A IS_BLOCKING
{% for jobid, is_blocking in job_blocking_status.items() %}
IS_BLOCKING[{{ jobid }}]={{ "1" if is_blocking else "0" }}
{% endfor %}

# Status files
declare -A STATUS_FILES
{% for jobid, path in status_files.items() %}
STATUS_FILES[{{ jobid }}]="{{ path }}"
{% endfor %}

# Set default status to PENDING
declare -A status
for j in "${JOBIDS[@]}"; do status[$j]="PENDING"; done

# Array sizes
NUM_BLOCKING={{ num_blocking }}
NUM_NON_BLOCKING={{ num_non_blocking }}
NUM_TOTAL={{ num_total }}


check_status() {
    local jobid="$1"

    # Priority 1: Check status file (authoritative if exists)
    if [ -f "${STATUS_FILES[$jobid]}" ]; then
        local exit_code=$(cat "${STATUS_FILES[$jobid]}")
        echo "from status file: $exit_code" >> job.out
        if [ "$exit_code" = "0" ]; then
            echo "SUCCESS"
        else
            echo "FAILED"
        fi
        return 0
    fi

    # Priority 2: Query scheduler
{% if host.scheduler == 'slurm' -%}
    if squeue -j "$jobid" -h &>/dev/null; then
        squeue -j "$jobid" -h -o "%T" 2>/dev/null | tr -d ' '
    else
        local status=$(sacct -j "$jobid" -n -X -o State 2>/dev/null | head -1 | tr -d ' ')
        [ -n "$status" ] && echo "$status" || echo "UNKNOWN"
    fi
{% elif host.scheduler == 'pbspro' -%}
    if qstat "$jobid" &>/dev/null; then
        qstat "$jobid" 2>/dev/null | tail -1 | awk '{print $5}'
        echo "from qstat file: " $(qstat "$jobid" 2>/dev/null | tail -1 | awk '{print $5}') >> job.out
    else
        local qstat_history=$(qstat -x -f "$jobid" 2>/dev/null)

        if [ -z "$qstat_history" ]; then
            echo "UNKNOWN"
            return 0
        fi

        local job_state=$(echo "$qstat_history" | grep "job_state" | cut -d= -f2 | tr -d ' ')
        echo "from qstat history file: $job_state" >> job.out
        if [ "$job_state" = "F" ]; then
            local exit_status=$(echo "$qstat_history" | grep "Exit_status" | cut -d= -f2 | tr -d ' ')
            [ "$exit_status" = "0" ] && echo "SUCCESS" || echo "FAILED"
            echo "from qstat history file with F: $exit_status" >> job.out
        else
            echo "$job_state"
        fi
    fi
{% endif %}
}

kill_all() {
    echo -e "\n\n========== FAILURE: $1 ==========" >&2
    echo "Killing all jobs..." >&2
    local n=0
{% if host.scheduler == 'slurm' -%}
    for j in "${JOBIDS[@]}"; do
        [ "$j" != "$SLURM_JOB_ID" ] && scancel "$j" 2>/dev/null && echo "  ✗ $j" >&2 && echo 1 > "${STATUS_FILES[$j]}" && n=$((n+1))
    done
{% elif host.scheduler == 'pbspro' -%}
    local my_id=$(echo $PBS_JOBID | cut -d. -f1)
    for j in "${JOBIDS[@]}"; do
        local jid=$(echo $j | cut -d. -f1)
        [ "$jid" != "$my_id" ] && qdel -W force "$j" 2>/dev/null && echo "  ✗ $j" >&2 && echo 1 > "${STATUS_FILES[$j]}" && n=$((n+1))
    done
{% endif %}
    echo "Killed $n jobs" >&2
    echo "===================================" >&2
    exit 1
}

terminate_non_blocking_jobs() {
    [ $NUM_NON_BLOCKING -eq 0 ] && return 0

    echo -e "\n==================================="
    echo "Terminating non-blocking jobs..."
    local n=0
{% if host.scheduler == 'slurm' -%}
    for j in "${NON_BLOCKING_JOBS[@]}"; do
        [ "${status[$j]}" = "SUCCESS" ] && continue

        if [ "$j" != "$SLURM_JOB_ID" ]; then
            # Mark job as intentionally terminating
            touch "${STATUS_FILES[$j]%.status}.terminating"
            scancel --signal=TERM "$j" 2>/dev/null && {
                echo "  ✓ $j"
                echo 0 > "${STATUS_FILES[$j]}"
                n=$((n+1))
            }
        fi
    done
{% elif host.scheduler == 'pbspro' -%}
    local my_id=$(echo $PBS_JOBID | cut -d. -f1)
    for j in "${NON_BLOCKING_JOBS[@]}"; do
        [ "${status[$j]}" = "SUCCESS" ] && continue

        local jid=$(echo $j | cut -d. -f1)
        if [ "$jid" != "$my_id" ]; then
            # Mark job as intentionally terminating
            touch "${STATUS_FILES[$j]%.status}.terminating"
            qdel "$j" 2>/dev/null && {
                echo "  ✓ $j"
                echo 0 > "${STATUS_FILES[$j]}"
                n=$((n+1))
            }
        fi
    done
{% endif %}
    echo "Terminated $n non-blocking jobs"
    echo "==================================="
}

echo "Monitoring started..."
rm -rf job.out

while true; do
    running=0 pending=0 blocking_done=0 total_done=0

    for j in "${JOBIDS[@]}"; do
        if [ "${status[$j]}" = "SUCCESS" ]; then
            total_done=$((total_done+1))
            [ "${IS_BLOCKING[$j]}" = "1" ] && blocking_done=$((blocking_done+1))
            continue
        fi

        s=$(check_status "$j")
        echo after check status $j $s

        case "$s" in
            SUCCESS)
                [ "${status[$j]}" != "SUCCESS" ] && echo "[$(date +'%H:%M:%S')] ✓ $j"
                status[$j]="SUCCESS"
                total_done=$((total_done+1))
                [ "${IS_BLOCKING[$j]}" = "1" ] && blocking_done=$((blocking_done+1))
                ;;
            FAILED{% if host.scheduler == 'slurm' %}|TIMEOUT|CANCELLED|NODE_FAIL|PREEMPTED|OUT_OF_MEMORY{% endif %})
                echo "[$(date +'%H:%M:%S')] ✗ $j ($s)"
                kill_all "$j"
                ;;
            {% if host.scheduler == 'slurm' -%}
            RUNNING|COMPLETING)
            {% elif host.scheduler == 'pbspro' -%}
            R)
            {% endif -%}
                [ "${status[$j]}" = "PENDING" ] && echo "[$(date +'%H:%M:%S')] → $j"
                status[$j]="RUNNING"
                running=$((running+1))
                ;;
            *)
                pending=$((pending+1))
                ;;
        esac
    done

    echo -ne "\r[$(date +'%H:%M:%S')] Blocking: $blocking_done/$NUM_BLOCKING | Total: $total_done/$NUM_TOTAL"
    [ $running -gt 0 ] && echo -ne " | $running run"
    [ $pending -gt 0 ] && echo -ne " | $pending pend"
    echo -ne "     "

    [ $blocking_done -eq $NUM_BLOCKING ] && {
        echo -e "\n\n========== SUCCESS: All blocking jobs completed =========="
        terminate_non_blocking_jobs
        exit 0
    }

    sleep $CHECK_INTERVAL
done
{% endblock %}

{% block post_run -%}
{% endblock %}