flame-roles/slurm_qos/tasks/main.yml

171 lines
7.2 KiB
YAML

---
- name: Ensure Slurm QoS exist
when: slurm_qos_settings | default([]) | length > 0
block:
- name: Check Slurm QoS list
ansible.builtin.command: "sacctmgr -Pn show qos format=name"
register: qos_list
changed_when: false
failed_when: false
- name: Add Slurm QoS (if missing)
ansible.builtin.command: "sacctmgr -i add qos {{ item.name }}"
loop: "{{ slurm_qos_settings }}"
loop_control:
label: "{{ item.name }}"
when: qos_list.stdout is not search(item.name)
changed_when: true
- name: Modify Slurm QoS (if already exists)
ansible.builtin.command: >
sacctmgr -i modify qos where name={{ item.name }} set
{% if item.max_tres is defined and item.max_tres is not none %}MaxTRES={{ item.max_tres }}{% endif %}
{% if item.max_submit_jobs is defined and item.max_submit_jobs is not none %}MaxSubmitJobs={{ item.max_submit_jobs }}{% endif %}
{% if item.max_jobs_per_user is defined and item.max_jobs_per_user is not none %}MaxJobsPerUser={{ item.max_jobs_per_user }}{% endif %}
{% if item.grace_time is defined and item.grace_time is not none %}GraceTime={{ item.grace_time }}{% endif %}
{% if item.preempt is defined and item.preempt is not none %}Preempt={{ item.preempt }}{% endif %}
loop: "{{ slurm_qos_settings }}"
loop_control:
label: "{{ item.name }}"
when: qos_list.stdout is search(item.name)
changed_when: true
- name: Ensure Slurm accounts exist
when: slurm_qos_accounts | default([]) | length > 0
block:
- name: Get Slurm cluster list
ansible.builtin.command: "sacctmgr -Pn show cluster format=cluster"
register: cluster_list
changed_when: false
failed_when: false
- name: Set cluster fact if only one cluster is found
ansible.builtin.set_fact:
slurm_cluster_name: "{{ cluster_list.stdout_lines[0] }}"
when: cluster_list.stdout_lines | length == 1
- name: Check if Slurm account exists
ansible.builtin.command: "sacctmgr -Pn list account name={{ item.name }} format=account"
register: account_check
changed_when: false
failed_when: false
loop: "{{ slurm_qos_accounts }}"
loop_control:
label: "{{ item.name }}"
- name: Add Slurm account
ansible.builtin.command: >
sacctmgr -i add account name={{ item.item.name }}
cluster={{ slurm_cluster_name }}
{% if item.item.organization is defined %}Organization={{ item.item.organization }}{% endif %}
{% if item.item.description is defined %}Description="{{ item.item.description }}"{% endif %}
{% if item.item.parent is defined %}Parent={{ item.item.parent }}{% endif %}
loop: "{{ account_check.results }}"
loop_control:
label: "{{ item.item.name }}"
when: item.stdout is not search(item.item.name)
changed_when: true
- name: Ensure Slurm users exist with partition limits
when: slurm_qos_users | default([]) | length > 0
block:
- name: Check existing Slurm users
ansible.builtin.command: "sacctmgr -Pn list users format=user"
register: user_list
changed_when: false
failed_when: false
- name: Add Slurm user if not already present
ansible.builtin.command: >
sacctmgr -i add user name={{ item.username }}
cluster={{ slurm_cluster_name }}
account={{ item.sponsor | default('orcd') }}
{% if item.partitions is defined and item.partitions | length > 0 %}
partition={{ item.partitions | map(attribute='name') | join(',') }}
{% else %}
partition=debug,preempt
{% endif %}
{% if item.qos_list is defined and item.qos_list | length > 0 %}
QOS={{ item.qos_list | join(',') }}
{% else %}
QOS=debug_qos,preempt_qos
{% endif %}
{% if item.qos_default is defined %}
DefaultQOS={{ item.qos_default }}
{% else %}
DefaultQOS=preempt_qos
{% endif %}
{% if itm.comment is defined %}Comment="{{ item.comment }}"{% endif %}
loop: "{{ slurm_qos_users }}"
loop_control:
label: "{{ item.username }}"
when: user_list.stdout is not search(item.username)
changed_when: true
- name: Check existing partition associations
ansible.builtin.command: |
sacctmgr -Pn show assoc where user={{ item.username }} cluster={{ slurm_cluster_name }} account={{ item.sponsor | default('orcd') }} format=Partition,GrpTRES,MaxNodes
register: assoc_check
loop: "{{ slurm_qos_users }}"
loop_control:
label: "{{ item.username }}"
when: user_list.stdout is search(item.username)
changed_when: false
failed_when: false
- name: Ensure partition associations exist for existing users
ansible.builtin.command: >
sacctmgr -i add user name={{ item.0.username }}
cluster={{ slurm_cluster_name }}
account={{ item.0.sponsor | default('orcd') }}
partition={{ item.1.name }}
{% if item.1.name == 'admin' %}
QOS=admin_qos DefaultQOS=admin_qos
{% else %}
QOS={{ item.0.qos_list | join(',') }} DefaultQOS={{ item.0.qos_default | default('preempt_qos') }}
{% endif %}
loop: "{{ slurm_qos_users | subelements('partitions') }}"
loop_control:
label: "{{ item.0.username }} - {{ item.1.name }}"
when:
- user_list.stdout is search(item.0.username)
- assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout_lines') | flatten | join(',') is not search(item.1.name)
changed_when: true
- name: Debug partition-specific limits
ansible.builtin.debug:
msg: >
For {{ item.0.username }} - {{ item.1.name }}:
grptres={{ grptres | default('unset') }},
max_nodes={{ max_nodes | default('unset') }}
vars:
partition_defaults: "{{ slurm_qos_partition_defaults.partitions[item.1.name] }}"
grptres: "{{ item.1.grptres | default(partition_defaults.grptres) }}"
max_nodes: "{{ item.1.max_nodes | default(partition_defaults.max_nodes) }}"
loop: "{{ slurm_qos_users | subelements('partitions') }}"
loop_control:
label: "{{ item.0.username }} - {{ item.1.name }}"
when: user_list.stdout is search(item.0.username)
- name: Update partition-specific limits (GrpTres only)
ansible.builtin.command: >
sacctmgr -i modify user where
name={{ item.0.username }}
cluster={{ slurm_cluster_name }}
account={{ item.0.sponsor | default('orcd') }}
partition={{ item.1.name }}
set
{% if grptres is defined and grptres is not none %} GrpTRES={{ grptres }}{% endif %}
vars:
partition_defaults: "{{ slurm_qos_partition_defaults.partitions[item.1.name] }}"
grptres: "{{ item.1.grptres | default(partition_defaults.grptres) }}"
max_nodes: "{{ item.1.max_nodes | default(partition_defaults.max_nodes) }}"
loop: "{{ slurm_qos_users | subelements('partitions') }}"
loop_control:
label: "{{ item.0.username }} - {{ item.1.name }}"
when:
- user_list.stdout is search(item.0.username)
- grptres is defined and grptres is not none
changed_when: true
failed_when: false