maxnode fix and allow for new partitions

This commit is contained in:
dvosler 2025-03-01 05:46:38 -05:00
parent 80e7a64579
commit 792105e03b
1 changed files with 33 additions and 46 deletions

View File

@ -9,8 +9,7 @@
failed_when: false failed_when: false
- name: Add Slurm QoS (if missing) - name: Add Slurm QoS (if missing)
ansible.builtin.command: > ansible.builtin.command: "sacctmgr -i add qos {{ item.name }}"
sacctmgr -i add qos {{ item.name }}
loop: "{{ slurm_qos_settings }}" loop: "{{ slurm_qos_settings }}"
loop_control: loop_control:
label: "{{ item.name }}" label: "{{ item.name }}"
@ -20,12 +19,11 @@
- name: Modify Slurm QoS (if already exists) - name: Modify Slurm QoS (if already exists)
ansible.builtin.command: > ansible.builtin.command: >
sacctmgr -i modify qos where name={{ item.name }} set sacctmgr -i modify qos where name={{ item.name }} set
{% if item.MaxTRES is defined %}MaxTRES={{ item.MaxTRES }}{% endif %} {% if item.max_tres is defined and item.max_tres is not none %}MaxTRES={{ item.max_tres }}{% endif %}
{% if item.MaxTRESPU is defined %}MaxTRESPU={{ item.MaxTRESPU }}{% endif %} {% if item.max_submit_jobs is defined and item.max_submit_jobs is not none %}MaxSubmitJobs={{ item.max_submit_jobs }}{% endif %}
{% if item.MaxSubmitJobs is defined %}MaxSubmitJobs={{ item.MaxSubmitJobs }}{% endif %} {% if item.max_jobs_per_user is defined and item.max_jobs_per_user is not none %}MaxJobsPerUser={{ item.max_jobs_per_user }}{% endif %}
{% if item.MaxJobsPerUser is defined %}MaxJobsPerUser={{ item.MaxJobsPerUser }}{% endif %} {% if item.grace_time is defined and item.grace_time is not none %}GraceTime={{ item.grace_time }}{% endif %}
{% if item.GraceTime is defined %}GraceTime={{ item.GraceTime }}{% endif %} {% if item.preempt is defined and item.preempt is not none %}Preempt={{ item.preempt }}{% endif %}
{% if item.Preempt is defined %}Preempt={{ item.Preempt }}{% endif %}
loop: "{{ slurm_qos_settings }}" loop: "{{ slurm_qos_settings }}"
loop_control: loop_control:
label: "{{ item.name }}" label: "{{ item.name }}"
@ -97,18 +95,16 @@
{% else %} {% else %}
DefaultQOS=preempt_qos DefaultQOS=preempt_qos
{% endif %} {% endif %}
{% if item.comment is defined %}Comment="{{ item.comment }}"{% endif %} {% if itm.comment is defined %}Comment="{{ item.comment }}"{% endif %}
loop: "{{ slurm_qos_users }}" loop: "{{ slurm_qos_users }}"
loop_control: loop_control:
label: "{{ item.username }}" label: "{{ item.username }}"
when: user_list.stdout is not search(item.username) when: user_list.stdout is not search(item.username)
changed_when: true changed_when: true
register: user_creation_result
failed_when: user_creation_result.rc != 0
- name: Check existing partition associations - name: Check existing partition associations
ansible.builtin.command: > ansible.builtin.command: |
sacctmgr -Pn show assoc where user={{ item.username }} cluster={{ slurm_cluster_name }} account={{ item.sponsor | default('orcd') }} format=Partition,QOS,DefaultQOS sacctmgr -Pn show assoc where user={{ item.username }} cluster={{ slurm_cluster_name }} account={{ item.sponsor | default('orcd') }} format=Partition,GrpTRES,MaxNodes
register: assoc_check register: assoc_check
loop: "{{ slurm_qos_users }}" loop: "{{ slurm_qos_users }}"
loop_control: loop_control:
@ -125,33 +121,33 @@
partition={{ item.1.name }} partition={{ item.1.name }}
{% if item.1.name == 'admin' %} {% if item.1.name == 'admin' %}
QOS=admin_qos DefaultQOS=admin_qos QOS=admin_qos DefaultQOS=admin_qos
{% else %}
QOS={{ item.0.qos_list | join(',') }} DefaultQOS={{ item.0.qos_default | default('preempt_qos') }}
{% endif %} {% endif %}
loop: "{{ slurm_qos_users | subelements('partitions') }}" loop: "{{ slurm_qos_users | subelements('partitions') }}"
loop_control: loop_control:
label: "{{ item.0.username }} - {{ item.1.name }}" label: "{{ item.0.username }} - {{ item.1.name }}"
when: when:
- user_list.stdout is search(item.0.username) - user_list.stdout is search(item.0.username)
- assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout') | first | default('') is not search(item.1.name) - assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout_lines') | flatten | join(',') is not search(item.1.name)
changed_when: true changed_when: true
- name: Update QoS for existing admin partition associations - name: Debug partition-specific limits
ansible.builtin.command: > ansible.builtin.debug:
sacctmgr -i modify user where msg: >
name={{ item.0.username }} For {{ item.0.username }} - {{ item.1.name }}:
cluster={{ slurm_cluster_name }} grptres={{ grptres | default('unset') }},
account={{ item.0.sponsor | default('orcd') }} max_nodes={{ max_nodes | default('unset') }}
partition={{ item.1.name }} vars:
set QOS=admin_qos DefaultQOS=admin_qos partition_defaults: "{{ slurm_qos_partition_defaults.partitions[item.1.name] }}"
grptres: "{{ item.1.grptres | default(partition_defaults.grptres) }}"
max_nodes: "{{ item.1.max_nodes | default(partition_defaults.max_nodes) }}"
loop: "{{ slurm_qos_users | subelements('partitions') }}" loop: "{{ slurm_qos_users | subelements('partitions') }}"
loop_control: loop_control:
label: "{{ item.0.username }} - {{ item.1.name }}" label: "{{ item.0.username }} - {{ item.1.name }}"
when: when: user_list.stdout is search(item.0.username)
- user_list.stdout is search(item.0.username)
- item.1.name == 'admin'
- assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout') | first | default('') is search(item.1.name)
changed_when: true
- name: Set partition-specific node limits - name: Update partition-specific limits (GrpTres only)
ansible.builtin.command: > ansible.builtin.command: >
sacctmgr -i modify user where sacctmgr -i modify user where
name={{ item.0.username }} name={{ item.0.username }}
@ -159,25 +155,16 @@
account={{ item.0.sponsor | default('orcd') }} account={{ item.0.sponsor | default('orcd') }}
partition={{ item.1.name }} partition={{ item.1.name }}
set set
{% if item.1.max_nodes is defined and item.1.max_nodes | int == -1 %} {% if grptres is defined and grptres is not none %} GrpTRES={{ grptres }}{% endif %}
GrpTRES= vars:
{% elif item.1.max_nodes is defined and item.1.max_nodes | int >= 0 %} partition_defaults: "{{ slurm_qos_partition_defaults.partitions[item.1.name] }}"
GrpTRES=node={{ item.1.max_nodes }} grptres: "{{ item.1.grptres | default(partition_defaults.grptres) }}"
{% else %} max_nodes: "{{ item.1.max_nodes | default(partition_defaults.max_nodes) }}"
GrpTRES={{ item.1.grptres | default(
slurm_qos_partition_defaults[item.1.name].grptres if item.1.name in slurm_qos_partition_defaults else
'node=' + (
item.1.max_nodes | default(
slurm_qos_partition_defaults[item.1.name].max_nodes if item.1.name in slurm_qos_partition_defaults else
'4' if item.1.name == 'preempt' else
'2' if item.1.name == 'debug' else
'' if item.1.name == 'admin' else
''
)
) | string
) }}
{% endif %}
loop: "{{ slurm_qos_users | subelements('partitions') }}" loop: "{{ slurm_qos_users | subelements('partitions') }}"
loop_control: loop_control:
label: "{{ item.0.username }} - {{ item.1.name }}" label: "{{ item.0.username }} - {{ item.1.name }}"
when:
- user_list.stdout is search(item.0.username)
- grptres is defined and grptres is not none
changed_when: true changed_when: true
failed_when: false