major update
This commit is contained in:
parent
792105e03b
commit
ad0ffa4a4e
|
@ -1,25 +1,50 @@
|
||||||
---
|
---
|
||||||
- name: Ensure Slurm QoS exist
|
- name: Validate QoS settings
|
||||||
|
ansible.builtin.assert:
|
||||||
|
that:
|
||||||
|
- item.name is defined
|
||||||
|
fail_msg: "QoS entry missing 'name': {{ item }}"
|
||||||
|
loop: "{{ slurm_qos_settings }}"
|
||||||
|
when: slurm_qos_assert_vars | bool
|
||||||
|
|
||||||
|
- name: Validate account settings
|
||||||
|
ansible.builtin.assert:
|
||||||
|
that:
|
||||||
|
- item.name is defined
|
||||||
|
fail_msg: "Account entry missing 'name': {{ item }}"
|
||||||
|
loop: "{{ slurm_qos_accounts }}"
|
||||||
|
when: slurm_qos_assert_vars | bool
|
||||||
|
|
||||||
|
- name: Validate user settings
|
||||||
|
ansible.builtin.assert:
|
||||||
|
that:
|
||||||
|
- item.username is defined
|
||||||
|
fail_msg: "User entry missing 'username': {{ item }}"
|
||||||
|
loop: "{{ slurm_qos_users }}"
|
||||||
|
when: slurm_qos_assert_vars | bool
|
||||||
|
|
||||||
|
- name: Ensure Slurm QoS configured
|
||||||
when: slurm_qos_settings | default([]) | length > 0
|
when: slurm_qos_settings | default([]) | length > 0
|
||||||
block:
|
block:
|
||||||
- name: Check Slurm QoS list
|
- name: Check existing Slurm QoS
|
||||||
ansible.builtin.command: "sacctmgr -Pn show qos format=name"
|
ansible.builtin.command: "sacctmgr -Pn show qos format=name"
|
||||||
register: qos_list
|
register: qos_list
|
||||||
changed_when: false
|
changed_when: false
|
||||||
failed_when: false
|
failed_when: false
|
||||||
|
|
||||||
- name: Add Slurm QoS (if missing)
|
- name: Add missing Slurm QoS
|
||||||
ansible.builtin.command: "sacctmgr -i add qos {{ item.name }}"
|
ansible.builtin.command: "sacctmgr -i add qos {{ item.name }}"
|
||||||
loop: "{{ slurm_qos_settings }}"
|
loop: "{{ slurm_qos_settings }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
label: "{{ item.name }}"
|
label: "{{ item.name }}"
|
||||||
when: qos_list.stdout is not search(item.name)
|
when: qos_list.stdout is not search(item.name)
|
||||||
changed_when: true
|
register: qos_add
|
||||||
|
changed_when: qos_add.rc == 0
|
||||||
|
|
||||||
- name: Modify Slurm QoS (if already exists)
|
- name: Configure Slurm QoS
|
||||||
ansible.builtin.command: >
|
ansible.builtin.command: >
|
||||||
sacctmgr -i modify qos where name={{ item.name }} set
|
sacctmgr -i modify qos where name={{ item.name }} set
|
||||||
{% if item.max_tres is defined and item.max_tres is not none %}MaxTRES={{ item.max_tres }}{% endif %}
|
{% if item.max_tres_per_user is defined and item.max_tres_per_user is not none %}MaxTRESPU={{ item.max_tres_per_user }}{% endif %}
|
||||||
{% if item.max_submit_jobs is defined and item.max_submit_jobs is not none %}MaxSubmitJobs={{ item.max_submit_jobs }}{% endif %}
|
{% if item.max_submit_jobs is defined and item.max_submit_jobs is not none %}MaxSubmitJobs={{ item.max_submit_jobs }}{% endif %}
|
||||||
{% if item.max_jobs_per_user is defined and item.max_jobs_per_user is not none %}MaxJobsPerUser={{ item.max_jobs_per_user }}{% endif %}
|
{% if item.max_jobs_per_user is defined and item.max_jobs_per_user is not none %}MaxJobsPerUser={{ item.max_jobs_per_user }}{% endif %}
|
||||||
{% if item.grace_time is defined and item.grace_time is not none %}GraceTime={{ item.grace_time }}{% endif %}
|
{% if item.grace_time is defined and item.grace_time is not none %}GraceTime={{ item.grace_time }}{% endif %}
|
||||||
|
@ -27,8 +52,9 @@
|
||||||
loop: "{{ slurm_qos_settings }}"
|
loop: "{{ slurm_qos_settings }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
label: "{{ item.name }}"
|
label: "{{ item.name }}"
|
||||||
when: qos_list.stdout is search(item.name)
|
register: qos_modify
|
||||||
changed_when: true
|
changed_when: qos_modify.rc == 0
|
||||||
|
failed_when: qos_modify.rc != 0 and "Nothing modified" not in qos_modify.stdout
|
||||||
|
|
||||||
- name: Ensure Slurm accounts exist
|
- name: Ensure Slurm accounts exist
|
||||||
when: slurm_qos_accounts | default([]) | length > 0
|
when: slurm_qos_accounts | default([]) | length > 0
|
||||||
|
@ -44,6 +70,11 @@
|
||||||
slurm_cluster_name: "{{ cluster_list.stdout_lines[0] }}"
|
slurm_cluster_name: "{{ cluster_list.stdout_lines[0] }}"
|
||||||
when: cluster_list.stdout_lines | length == 1
|
when: cluster_list.stdout_lines | length == 1
|
||||||
|
|
||||||
|
- name: Fail if multiple clusters detected without explicit config
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Multiple clusters found. Set 'slurm_cluster_name' explicitly."
|
||||||
|
when: cluster_list.stdout_lines | length > 1 and slurm_cluster_name is not defined
|
||||||
|
|
||||||
- name: Check if Slurm account exists
|
- name: Check if Slurm account exists
|
||||||
ansible.builtin.command: "sacctmgr -Pn list account name={{ item.name }} format=account"
|
ansible.builtin.command: "sacctmgr -Pn list account name={{ item.name }} format=account"
|
||||||
register: account_check
|
register: account_check
|
||||||
|
@ -66,7 +97,7 @@
|
||||||
when: item.stdout is not search(item.item.name)
|
when: item.stdout is not search(item.item.name)
|
||||||
changed_when: true
|
changed_when: true
|
||||||
|
|
||||||
- name: Ensure Slurm users exist with partition limits
|
- name: Ensure Slurm users and partitions configured
|
||||||
when: slurm_qos_users | default([]) | length > 0
|
when: slurm_qos_users | default([]) | length > 0
|
||||||
block:
|
block:
|
||||||
- name: Check existing Slurm users
|
- name: Check existing Slurm users
|
||||||
|
@ -75,96 +106,103 @@
|
||||||
changed_when: false
|
changed_when: false
|
||||||
failed_when: false
|
failed_when: false
|
||||||
|
|
||||||
- name: Add Slurm user if not already present
|
- name: Add new Slurm users if missing with default preempt partition
|
||||||
ansible.builtin.command: >
|
ansible.builtin.command: >
|
||||||
sacctmgr -i add user name={{ item.username }}
|
sacctmgr -i add user name={{ item.username }}
|
||||||
cluster={{ slurm_cluster_name }}
|
cluster={{ slurm_cluster_name }}
|
||||||
account={{ item.sponsor | default('orcd') }}
|
account={{ item.sponsor | default('orcd') }}
|
||||||
{% if item.partitions is defined and item.partitions | length > 0 %}
|
partition=preempt
|
||||||
partition={{ item.partitions | map(attribute='name') | join(',') }}
|
QOS=preempt_qos
|
||||||
{% else %}
|
DefaultQOS={{ item.qos_default | default('preempt_qos') }}
|
||||||
partition=debug,preempt
|
{% if item.comment is defined %}Comment="{{ item.comment }}"{% endif %}
|
||||||
{% endif %}
|
|
||||||
{% if item.qos_list is defined and item.qos_list | length > 0 %}
|
|
||||||
QOS={{ item.qos_list | join(',') }}
|
|
||||||
{% else %}
|
|
||||||
QOS=debug_qos,preempt_qos
|
|
||||||
{% endif %}
|
|
||||||
{% if item.qos_default is defined %}
|
|
||||||
DefaultQOS={{ item.qos_default }}
|
|
||||||
{% else %}
|
|
||||||
DefaultQOS=preempt_qos
|
|
||||||
{% endif %}
|
|
||||||
{% if itm.comment is defined %}Comment="{{ item.comment }}"{% endif %}
|
|
||||||
loop: "{{ slurm_qos_users }}"
|
loop: "{{ slurm_qos_users }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
label: "{{ item.username }}"
|
label: "{{ item.username }}"
|
||||||
when: user_list.stdout is not search(item.username)
|
when: user_list.stdout is not search(item.username)
|
||||||
changed_when: true
|
register: user_add
|
||||||
|
changed_when: user_add.rc == 0
|
||||||
|
|
||||||
- name: Check existing partition associations
|
- name: Configure Slurm user attributes
|
||||||
ansible.builtin.command: |
|
ansible.builtin.command: >
|
||||||
sacctmgr -Pn show assoc where user={{ item.username }} cluster={{ slurm_cluster_name }} account={{ item.sponsor | default('orcd') }} format=Partition,GrpTRES,MaxNodes
|
sacctmgr -i modify user where name={{ item.username }}
|
||||||
register: assoc_check
|
cluster={{ slurm_cluster_name }}
|
||||||
|
account={{ item.sponsor | default('orcd') }} set
|
||||||
|
{% if item.comment is defined %}Comment="{{ item.comment }}"{% endif %}
|
||||||
loop: "{{ slurm_qos_users }}"
|
loop: "{{ slurm_qos_users }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
label: "{{ item.username }}"
|
label: "{{ item.username }}"
|
||||||
when: user_list.stdout is search(item.username)
|
register: user_modify
|
||||||
|
changed_when: user_modify.rc == 0
|
||||||
|
failed_when: user_modify.rc != 0 and "Nothing modified" not in user_modify.stdout
|
||||||
|
|
||||||
|
- name: Check existing partition associations for users
|
||||||
|
ansible.builtin.command: >
|
||||||
|
sacctmgr -Pn show assoc where user={{ item.username }}
|
||||||
|
cluster={{ slurm_cluster_name }}
|
||||||
|
account={{ item.sponsor | default('orcd') }}
|
||||||
|
format=partition
|
||||||
|
loop: "{{ slurm_qos_users }}"
|
||||||
|
loop_control:
|
||||||
|
label: "{{ item.username }}"
|
||||||
|
register: assoc_check
|
||||||
changed_when: false
|
changed_when: false
|
||||||
failed_when: false
|
failed_when: false
|
||||||
|
|
||||||
- name: Ensure partition associations exist for existing users
|
- name: Add missing partition associations for users
|
||||||
ansible.builtin.command: >
|
ansible.builtin.command: >
|
||||||
sacctmgr -i add user name={{ item.0.username }}
|
sacctmgr -i add user name={{ item.0.username }}
|
||||||
cluster={{ slurm_cluster_name }}
|
cluster={{ slurm_cluster_name }}
|
||||||
account={{ item.0.sponsor | default('orcd') }}
|
account={{ item.0.sponsor | default('orcd') }}
|
||||||
partition={{ item.1.name }}
|
partition={{ item.1.name }}
|
||||||
{% if item.1.name == 'admin' %}
|
{% if item.1.qos is defined %}
|
||||||
QOS=admin_qos DefaultQOS=admin_qos
|
QOS={{ item.1.qos }}
|
||||||
|
DefaultQOS={{ item.1.qos }}
|
||||||
{% else %}
|
{% else %}
|
||||||
QOS={{ item.0.qos_list | join(',') }} DefaultQOS={{ item.0.qos_default | default('preempt_qos') }}
|
QOS={{ slurm_partition_qos_defaults[item.1.name] | default(item.0.qos_default | default('preempt_qos')) }}
|
||||||
|
DefaultQOS={{ slurm_partition_qos_defaults[item.1.name] | default(item.0.qos_default | default('preempt_qos')) }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
loop: "{{ slurm_qos_users | subelements('partitions') }}"
|
loop: "{{ slurm_qos_users | subelements('partitions', skip_missing=True) }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
label: "{{ item.0.username }} - {{ item.1.name }}"
|
label: "{{ item.0.username }} - {{ item.1.name }}"
|
||||||
when:
|
when: >
|
||||||
- user_list.stdout is search(item.0.username)
|
assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout') | join('') is not search(item.1.name)
|
||||||
- assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout_lines') | flatten | join(',') is not search(item.1.name)
|
register: assoc_add
|
||||||
changed_when: true
|
changed_when: assoc_add.rc == 0
|
||||||
|
|
||||||
- name: Debug partition-specific limits
|
- name: Ensure partition associations for users (defined partitions)
|
||||||
ansible.builtin.debug:
|
|
||||||
msg: >
|
|
||||||
For {{ item.0.username }} - {{ item.1.name }}:
|
|
||||||
grptres={{ grptres | default('unset') }},
|
|
||||||
max_nodes={{ max_nodes | default('unset') }}
|
|
||||||
vars:
|
|
||||||
partition_defaults: "{{ slurm_qos_partition_defaults.partitions[item.1.name] }}"
|
|
||||||
grptres: "{{ item.1.grptres | default(partition_defaults.grptres) }}"
|
|
||||||
max_nodes: "{{ item.1.max_nodes | default(partition_defaults.max_nodes) }}"
|
|
||||||
loop: "{{ slurm_qos_users | subelements('partitions') }}"
|
|
||||||
loop_control:
|
|
||||||
label: "{{ item.0.username }} - {{ item.1.name }}"
|
|
||||||
when: user_list.stdout is search(item.0.username)
|
|
||||||
|
|
||||||
- name: Update partition-specific limits (GrpTres only)
|
|
||||||
ansible.builtin.command: >
|
ansible.builtin.command: >
|
||||||
sacctmgr -i modify user where
|
sacctmgr -i modify user where
|
||||||
name={{ item.0.username }}
|
name={{ item.0.username }}
|
||||||
cluster={{ slurm_cluster_name }}
|
cluster={{ slurm_cluster_name }}
|
||||||
account={{ item.0.sponsor | default('orcd') }}
|
account={{ item.sponsor | default('orcd') }}
|
||||||
partition={{ item.1.name }}
|
partition={{ item.1.name }} set
|
||||||
set
|
{% if item.1.qos is defined %}
|
||||||
{% if grptres is defined and grptres is not none %} GrpTRES={{ grptres }}{% endif %}
|
QOS={{ item.1.qos }}
|
||||||
vars:
|
DefaultQOS={{ item.1.qos }}
|
||||||
partition_defaults: "{{ slurm_qos_partition_defaults.partitions[item.1.name] }}"
|
{% else %}
|
||||||
grptres: "{{ item.1.grptres | default(partition_defaults.grptres) }}"
|
QOS={{ slurm_partition_qos_defaults[item.1.name] | default(item.0.qos_default | default('preempt_qos')) }}
|
||||||
max_nodes: "{{ item.1.max_nodes | default(partition_defaults.max_nodes) }}"
|
DefaultQOS={{ slurm_partition_qos_defaults[item.1.name] | default(item.0.qos_default | default('preempt_qos')) }}
|
||||||
loop: "{{ slurm_qos_users | subelements('partitions') }}"
|
{% endif %}
|
||||||
|
loop: "{{ slurm_qos_users | subelements('partitions', skip_missing=True) }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
label: "{{ item.0.username }} - {{ item.1.name }}"
|
label: "{{ item.0.username }} - {{ item.1.name }}"
|
||||||
when:
|
register: assoc_modify
|
||||||
- user_list.stdout is search(item.0.username)
|
changed_when: assoc_modify.rc == 0
|
||||||
- grptres is defined and grptres is not none
|
failed_when: assoc_modify.rc != 0 and "Nothing modified" not in assoc_modify.stdout
|
||||||
changed_when: true
|
|
||||||
failed_when: false
|
- name: Ensure default preempt partition association
|
||||||
|
ansible.builtin.command: >
|
||||||
|
sacctmgr -i modify user where
|
||||||
|
name={{ item.username }}
|
||||||
|
cluster={{ slurm_cluster_name }}
|
||||||
|
account={{ item.sponsor | default('orcd') }}
|
||||||
|
partition=preempt set
|
||||||
|
QOS=preempt_qos
|
||||||
|
DefaultQOS={{ item.qos_default | default('preempt_qos') }}
|
||||||
|
loop: "{{ slurm_qos_users }}"
|
||||||
|
loop_control:
|
||||||
|
label: "{{ item.username }}"
|
||||||
|
when: item.partitions is not defined or item.partitions | length == 0
|
||||||
|
register: default_assoc_modify
|
||||||
|
changed_when: default_assoc_modify.rc == 0
|
||||||
|
failed_when: default_assoc_modify.rc != 0 and "Nothing modified" not in default_assoc_modify.stdout
|
||||||
|
|
Loading…
Reference in New Issue