184 lines
7.5 KiB
YAML
184 lines
7.5 KiB
YAML
---
|
|
- name: Ensure Slurm QoS exist
|
|
when: slurm_qos_settings | default([]) | length > 0
|
|
block:
|
|
- name: Check Slurm QoS list
|
|
ansible.builtin.command: "sacctmgr -Pn show qos format=name"
|
|
register: qos_list
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Add Slurm QoS (if missing)
|
|
ansible.builtin.command: >
|
|
sacctmgr -i add qos {{ item.name }}
|
|
loop: "{{ slurm_qos_settings }}"
|
|
loop_control:
|
|
label: "{{ item.name }}"
|
|
when: qos_list.stdout is not search(item.name)
|
|
changed_when: true
|
|
|
|
- name: Modify Slurm QoS (if already exists)
|
|
ansible.builtin.command: >
|
|
sacctmgr -i modify qos where name={{ item.name }} set
|
|
{% if item.MaxTRES is defined %}MaxTRES={{ item.MaxTRES }}{% endif %}
|
|
{% if item.MaxTRESPU is defined %}MaxTRESPU={{ item.MaxTRESPU }}{% endif %}
|
|
{% if item.MaxSubmitJobs is defined %}MaxSubmitJobs={{ item.MaxSubmitJobs }}{% endif %}
|
|
{% if item.MaxJobsPerUser is defined %}MaxJobsPerUser={{ item.MaxJobsPerUser }}{% endif %}
|
|
{% if item.GraceTime is defined %}GraceTime={{ item.GraceTime }}{% endif %}
|
|
{% if item.Preempt is defined %}Preempt={{ item.Preempt }}{% endif %}
|
|
loop: "{{ slurm_qos_settings }}"
|
|
loop_control:
|
|
label: "{{ item.name }}"
|
|
when: qos_list.stdout is search(item.name)
|
|
changed_when: true
|
|
|
|
- name: Ensure Slurm accounts exist
|
|
when: slurm_qos_accounts | default([]) | length > 0
|
|
block:
|
|
- name: Get Slurm cluster list
|
|
ansible.builtin.command: "sacctmgr -Pn show cluster format=cluster"
|
|
register: cluster_list
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Set cluster fact if only one cluster is found
|
|
ansible.builtin.set_fact:
|
|
slurm_cluster_name: "{{ cluster_list.stdout_lines[0] }}"
|
|
when: cluster_list.stdout_lines | length == 1
|
|
|
|
- name: Check if Slurm account exists
|
|
ansible.builtin.command: "sacctmgr -Pn list account name={{ item.name }} format=account"
|
|
register: account_check
|
|
changed_when: false
|
|
failed_when: false
|
|
loop: "{{ slurm_qos_accounts }}"
|
|
loop_control:
|
|
label: "{{ item.name }}"
|
|
|
|
- name: Add Slurm account
|
|
ansible.builtin.command: >
|
|
sacctmgr -i add account name={{ item.item.name }}
|
|
cluster={{ slurm_cluster_name }}
|
|
{% if item.item.organization is defined %}Organization={{ item.item.organization }}{% endif %}
|
|
{% if item.item.description is defined %}Description="{{ item.item.description }}"{% endif %}
|
|
{% if item.item.parent is defined %}Parent={{ item.item.parent }}{% endif %}
|
|
loop: "{{ account_check.results }}"
|
|
loop_control:
|
|
label: "{{ item.item.name }}"
|
|
when: item.stdout is not search(item.item.name)
|
|
changed_when: true
|
|
|
|
- name: Ensure Slurm users exist with partition limits
|
|
when: slurm_qos_users | default([]) | length > 0
|
|
block:
|
|
- name: Check existing Slurm users
|
|
ansible.builtin.command: "sacctmgr -Pn list users format=user"
|
|
register: user_list
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Add Slurm user if not already present
|
|
ansible.builtin.command: >
|
|
sacctmgr -i add user name={{ item.username }}
|
|
cluster={{ slurm_cluster_name }}
|
|
account={{ item.sponsor | default('orcd') }}
|
|
{% if item.partitions is defined and item.partitions | length > 0 %}
|
|
partition={{ item.partitions | map(attribute='name') | join(',') }}
|
|
{% else %}
|
|
partition=debug,preempt
|
|
{% endif %}
|
|
{% if item.qos_list is defined and item.qos_list | length > 0 %}
|
|
QOS={{ item.qos_list | join(',') }}
|
|
{% else %}
|
|
QOS=debug_qos,preempt_qos
|
|
{% endif %}
|
|
{% if item.qos_default is defined %}
|
|
DefaultQOS={{ item.qos_default }}
|
|
{% else %}
|
|
DefaultQOS=preempt_qos
|
|
{% endif %}
|
|
{% if item.comment is defined %}Comment="{{ item.comment }}"{% endif %}
|
|
loop: "{{ slurm_qos_users }}"
|
|
loop_control:
|
|
label: "{{ item.username }}"
|
|
when: user_list.stdout is not search(item.username)
|
|
changed_when: true
|
|
register: user_creation_result
|
|
failed_when: user_creation_result.rc != 0
|
|
|
|
- name: Check existing partition associations
|
|
ansible.builtin.command: >
|
|
sacctmgr -Pn show assoc where user={{ item.username }} cluster={{ slurm_cluster_name }} account={{ item.sponsor | default('orcd') }} format=Partition,QOS,DefaultQOS
|
|
register: assoc_check
|
|
loop: "{{ slurm_qos_users }}"
|
|
loop_control:
|
|
label: "{{ item.username }}"
|
|
when: user_list.stdout is search(item.username)
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Ensure partition associations exist for existing users
|
|
ansible.builtin.command: >
|
|
sacctmgr -i add user name={{ item.0.username }}
|
|
cluster={{ slurm_cluster_name }}
|
|
account={{ item.0.sponsor | default('orcd') }}
|
|
partition={{ item.1.name }}
|
|
{% if item.1.name == 'admin' %}
|
|
QOS=admin_qos DefaultQOS=admin_qos
|
|
{% endif %}
|
|
loop: "{{ slurm_qos_users | subelements('partitions') }}"
|
|
loop_control:
|
|
label: "{{ item.0.username }} - {{ item.1.name }}"
|
|
when:
|
|
- user_list.stdout is search(item.0.username)
|
|
- assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout') | first | default('') is not search(item.1.name)
|
|
changed_when: true
|
|
|
|
- name: Update QoS for existing admin partition associations
|
|
ansible.builtin.command: >
|
|
sacctmgr -i modify user where
|
|
name={{ item.0.username }}
|
|
cluster={{ slurm_cluster_name }}
|
|
account={{ item.0.sponsor | default('orcd') }}
|
|
partition={{ item.1.name }}
|
|
set QOS=admin_qos DefaultQOS=admin_qos
|
|
loop: "{{ slurm_qos_users | subelements('partitions') }}"
|
|
loop_control:
|
|
label: "{{ item.0.username }} - {{ item.1.name }}"
|
|
when:
|
|
- user_list.stdout is search(item.0.username)
|
|
- item.1.name == 'admin'
|
|
- assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout') | first | default('') is search(item.1.name)
|
|
changed_when: true
|
|
|
|
- name: Set partition-specific node limits
|
|
ansible.builtin.command: >
|
|
sacctmgr -i modify user where
|
|
name={{ item.0.username }}
|
|
cluster={{ slurm_cluster_name }}
|
|
account={{ item.0.sponsor | default('orcd') }}
|
|
partition={{ item.1.name }}
|
|
set
|
|
{% if item.1.max_nodes is defined and item.1.max_nodes | int == -1 %}
|
|
GrpTRES=
|
|
{% elif item.1.max_nodes is defined and item.1.max_nodes | int >= 0 %}
|
|
GrpTRES=node={{ item.1.max_nodes }}
|
|
{% else %}
|
|
GrpTRES={{ item.1.grptres | default(
|
|
slurm_qos_partition_defaults[item.1.name].grptres if item.1.name in slurm_qos_partition_defaults else
|
|
'node=' + (
|
|
item.1.max_nodes | default(
|
|
slurm_qos_partition_defaults[item.1.name].max_nodes if item.1.name in slurm_qos_partition_defaults else
|
|
'4' if item.1.name == 'preempt' else
|
|
'2' if item.1.name == 'debug' else
|
|
'' if item.1.name == 'admin' else
|
|
''
|
|
)
|
|
) | string
|
|
) }}
|
|
{% endif %}
|
|
loop: "{{ slurm_qos_users | subelements('partitions') }}"
|
|
loop_control:
|
|
label: "{{ item.0.username }} - {{ item.1.name }}"
|
|
changed_when: true
|