Jinja templates#
These templates are filled with the Input context and the Input environment variables that are generated by the workflow for each task.
The job.sh template#
This template is used by woom.tasks.Task.render_content() to create the job script.
Default
job.sh jinja template#{% block header -%}
#!/bin/bash
# Prolog
set -eo pipefail
# - handler for graceful termination
on_sigterm() {
echo "Received termination signal, cleaning up..."
# Just exit cleanly, let on_exit handle status
exit 0
}
# - handler for killing termination
on_sigkill() {
echo "Received kill signal, cleaning up..." >&2
# Just exit cleanly, let on_exit handle status
exit 1
}
# - handler for exit (always called)
on_exit() {
status=$?
if [ ! -f "{{ task_submission_dir }}/job.terminating" ]; then
echo $status > "{{ task_submission_dir }}/job.status"
fi
exit $status
}
trap on_sigterm SIGKILL
trap on_sigkill SIGTERM SIGINT
trap on_exit EXIT
{% endblock %}
{% block env -%}
{% include "env.sh" %}
{% endblock %}
{% block pre_run -%}
{% if task_run_dir %}
# Go to run dir
{ mkdir -p {{ task_run_dir }}; cd {{ task_run_dir }}; } || exit 1
{% endif %}
{% endblock %}
{% block run -%}
# Run the commandline(s)
{{ task.commandline }}
{% endblock %}
{% block post_run -%}
{% if task.artifacts %}
# Check artifacts
{% for name, path in task.artifacts.items() -%}
{% if path is string %}
test -f "{{ path }}" || { echo artifact not created: {{ name }}={{ path }}; exit 1; }
{% else %}
{% for path_ in path -%}
test -f "{{ path_ }}" || { echo artifact not created: {{ name }}={{ path_ }}; exit 1; }
{% endfor %}
{% endif %}
{% endfor %}
{% endif %}
{% endblock %}
The env.sh template#
This template is used by woom.env.EnvConfig.render() to format the environment declaration in The job.sh template.
Default
env.sh jinja template#{% block raw_text -%}
{% if task_env.raw_text %}
# Raw init env
{{ task_env.raw_text }}
{% endif %}
{% endblock %}
{% block modules -%}
{% if task_env.module_load %}
# Environment modules
{% if task_env.module_setup %}
{{ task_env.module_setup }}
{% endif %}
{% if task_env.module_use %}
module use {{ task_env.module_use }}
{% endif %}
{% if task_env.module_load %}
module load {{ task_env.module_load }}
{% endif %}
{% endif %}
{% endblock %}
{% block uv -%}
{% if workflow_dir is defined %}
{% set venv_activate = os.path.join(workflow_dir, ".venv", "bin", "activate") %}
{% if task_env.uv_venv is true and os.path.exists(venv_activate) %}
# UV virtual environment
source {{ venv_activate }}
{% endif %}
{% endif %}
{% endblock %}
{% block env_vars -%}
{% if task_env.has_vars() %}
# Environment variables
{# forward #}
{% for name in task_env.vars_forward %}
export {{ name }}="{{ os.environ[name] }}"
{% endfor %}
{# set #}
{% for name, value in task_env.vars_set.items() %}
export {{ name }}="{{ value|as_str_env }}"
{% endfor %}
{# prepend #}
{% for name, value in task_env.vars_prepend.items() %}
export {{ name }}={{ value|as_str_env }}{{ os.pathsep }}${{ name }}
{% endfor %}
{# append #}
{% for name, value in task_env.vars_append.items() %}
export {{ name }}=${{ name }}{{ os.pathsep }}{{ value|as_str_env }}
{% endfor %}
{% endif %}
{% endblock %}
{% block conda -%}
{% if task_env.conda_activate %}
# Conda
{% if task_env.conda_setup %}
{{ task_env.conda_setup }}
{% endif %}
conda activate {{ task_env.conda_activate }}
{% endif %}
{% endblock %}
{% block custom -%}
{# Custom configuration block #}
{% endblock %}
The sentinel.sh template#
This template is used to create the sentinel job script that monitors all workflow jobs when using a scheduler (SLURM, PBS Pro). The sentinel automatically kills jobs on failure and terminates non-blocking jobs when all blocking jobs complete.
Default
sentinel.sh jinja template#{% extends "!job.sh" %}
{% block run %}
{% set jobids = job_blocking_status.keys()|list %}
{% set blocking_jobs = [] %}
{% set non_blocking_jobs = [] %}
{% for jobid, is_blocking in job_blocking_status.items() %}
{% if is_blocking %}
{% do blocking_jobs.append(jobid) %}
{% else %}
{% do non_blocking_jobs.append(jobid) %}
{% endif %}
{% endfor %}
{% set num_blocking = blocking_jobs|length %}
{% set num_non_blocking = non_blocking_jobs|length %}
{% set num_total = jobids|length %}
CHECK_INTERVAL={{ check_interval|default(10) }}
echo "========== Woom Sentinel =========="
echo "Monitoring: {{ num_total }} jobs ({{ num_blocking }} blocking) | Interval: ${CHECK_INTERVAL}s"
echo ""
# Job ids
JOBIDS=({{ jobids|join(' ') }})
# Non-blocking jobs
NON_BLOCKING_JOBS=({{ non_blocking_jobs|join(' ') }})
# Blocking status dictionary
declare -A IS_BLOCKING
{% for jobid, is_blocking in job_blocking_status.items() %}
IS_BLOCKING[{{ jobid }}]={{ "1" if is_blocking else "0" }}
{% endfor %}
# Status files
declare -A STATUS_FILES
{% for jobid, path in status_files.items() %}
STATUS_FILES[{{ jobid }}]="{{ path }}"
{% endfor %}
# Set default status to PENDING
declare -A status
for j in "${JOBIDS[@]}"; do status[$j]="PENDING"; done
# Array sizes
NUM_BLOCKING={{ num_blocking }}
NUM_NON_BLOCKING={{ num_non_blocking }}
NUM_TOTAL={{ num_total }}
check_status() {
local jobid="$1"
# Priority 1: Check status file (authoritative if exists)
if [ -f "${STATUS_FILES[$jobid]}" ]; then
local exit_code=$(cat "${STATUS_FILES[$jobid]}")
echo "from status file: $exit_code" >> job.out
if [ "$exit_code" = "0" ]; then
echo "SUCCESS"
else
echo "FAILED"
fi
return 0
fi
# Priority 2: Query scheduler
{% if host.scheduler == 'slurm' -%}
if squeue -j "$jobid" -h &>/dev/null; then
squeue -j "$jobid" -h -o "%T" 2>/dev/null | tr -d ' '
else
local status=$(sacct -j "$jobid" -n -X -o State 2>/dev/null | head -1 | tr -d ' ')
[ -n "$status" ] && echo "$status" || echo "UNKNOWN"
fi
{% elif host.scheduler == 'pbspro' -%}
if qstat "$jobid" &>/dev/null; then
qstat "$jobid" 2>/dev/null | tail -1 | awk '{print $5}'
echo "from qstat file: " $(qstat "$jobid" 2>/dev/null | tail -1 | awk '{print $5}') >> job.out
else
local qstat_history=$(qstat -x -f "$jobid" 2>/dev/null)
if [ -z "$qstat_history" ]; then
echo "UNKNOWN"
return 0
fi
local job_state=$(echo "$qstat_history" | grep "job_state" | cut -d= -f2 | tr -d ' ')
echo "from qstat history file: $job_state" >> job.out
if [ "$job_state" = "F" ]; then
local exit_status=$(echo "$qstat_history" | grep "Exit_status" | cut -d= -f2 | tr -d ' ')
[ "$exit_status" = "0" ] && echo "SUCCESS" || echo "FAILED"
echo "from qstat history file with F: $exit_status" >> job.out
else
echo "$job_state"
fi
fi
{% endif %}
}
kill_all() {
echo -e "\n\n========== FAILURE: $1 ==========" >&2
echo "Killing all jobs..." >&2
local n=0
{% if host.scheduler == 'slurm' -%}
for j in "${JOBIDS[@]}"; do
[ "$j" != "$SLURM_JOB_ID" ] && scancel "$j" 2>/dev/null && echo " ✗ $j" >&2 && echo 1 > "${STATUS_FILES[$j]}" && n=$((n+1))
done
{% elif host.scheduler == 'pbspro' -%}
local my_id=$(echo $PBS_JOBID | cut -d. -f1)
for j in "${JOBIDS[@]}"; do
local jid=$(echo $j | cut -d. -f1)
[ "$jid" != "$my_id" ] && qdel -W force "$j" 2>/dev/null && echo " ✗ $j" >&2 && echo 1 > "${STATUS_FILES[$j]}" && n=$((n+1))
done
{% endif %}
echo "Killed $n jobs" >&2
echo "===================================" >&2
exit 1
}
terminate_non_blocking_jobs() {
[ $NUM_NON_BLOCKING -eq 0 ] && return 0
echo -e "\n==================================="
echo "Terminating non-blocking jobs..."
local n=0
{% if host.scheduler == 'slurm' -%}
for j in "${NON_BLOCKING_JOBS[@]}"; do
[ "${status[$j]}" = "SUCCESS" ] && continue
if [ "$j" != "$SLURM_JOB_ID" ]; then
# Mark job as intentionally terminating
touch "${STATUS_FILES[$j]%.status}.terminating"
scancel --signal=TERM "$j" 2>/dev/null && {
echo " ✓ $j"
echo 0 > "${STATUS_FILES[$j]}"
n=$((n+1))
}
fi
done
{% elif host.scheduler == 'pbspro' -%}
local my_id=$(echo $PBS_JOBID | cut -d. -f1)
for j in "${NON_BLOCKING_JOBS[@]}"; do
[ "${status[$j]}" = "SUCCESS" ] && continue
local jid=$(echo $j | cut -d. -f1)
if [ "$jid" != "$my_id" ]; then
# Mark job as intentionally terminating
touch "${STATUS_FILES[$j]%.status}.terminating"
qdel "$j" 2>/dev/null && {
echo " ✓ $j"
echo 0 > "${STATUS_FILES[$j]}"
n=$((n+1))
}
fi
done
{% endif %}
echo "Terminated $n non-blocking jobs"
echo "==================================="
}
echo "Monitoring started..."
rm -rf job.out
while true; do
running=0 pending=0 blocking_done=0 total_done=0
for j in "${JOBIDS[@]}"; do
if [ "${status[$j]}" = "SUCCESS" ]; then
total_done=$((total_done+1))
[ "${IS_BLOCKING[$j]}" = "1" ] && blocking_done=$((blocking_done+1))
continue
fi
s=$(check_status "$j")
echo after check status $j $s
case "$s" in
SUCCESS)
[ "${status[$j]}" != "SUCCESS" ] && echo "[$(date +'%H:%M:%S')] ✓ $j"
status[$j]="SUCCESS"
total_done=$((total_done+1))
[ "${IS_BLOCKING[$j]}" = "1" ] && blocking_done=$((blocking_done+1))
;;
FAILED{% if host.scheduler == 'slurm' %}|TIMEOUT|CANCELLED|NODE_FAIL|PREEMPTED|OUT_OF_MEMORY{% endif %})
echo "[$(date +'%H:%M:%S')] ✗ $j ($s)"
kill_all "$j"
;;
{% if host.scheduler == 'slurm' -%}
RUNNING|COMPLETING)
{% elif host.scheduler == 'pbspro' -%}
R)
{% endif -%}
[ "${status[$j]}" = "PENDING" ] && echo "[$(date +'%H:%M:%S')] → $j"
status[$j]="RUNNING"
running=$((running+1))
;;
*)
pending=$((pending+1))
;;
esac
done
echo -ne "\r[$(date +'%H:%M:%S')] Blocking: $blocking_done/$NUM_BLOCKING | Total: $total_done/$NUM_TOTAL"
[ $running -gt 0 ] && echo -ne " | $running run"
[ $pending -gt 0 ] && echo -ne " | $pending pend"
echo -ne " "
[ $blocking_done -eq $NUM_BLOCKING ] && {
echo -e "\n\n========== SUCCESS: All blocking jobs completed =========="
terminate_non_blocking_jobs
exit 0
}
sleep $CHECK_INTERVAL
done
{% endblock %}
{% block post_run -%}
{% endblock %}