From 792105e03be6983a8b236372c8050ea605bf7be2 Mon Sep 17 00:00:00 2001 From: dvosler Date: Sat, 1 Mar 2025 05:46:38 -0500 Subject: [PATCH] maxnode fix and allow for new partitions --- slurm_qos/tasks/main.yml | 79 +++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 46 deletions(-) diff --git a/slurm_qos/tasks/main.yml b/slurm_qos/tasks/main.yml index 4b2b262..a909786 100644 --- a/slurm_qos/tasks/main.yml +++ b/slurm_qos/tasks/main.yml @@ -9,8 +9,7 @@ failed_when: false - name: Add Slurm QoS (if missing) - ansible.builtin.command: > - sacctmgr -i add qos {{ item.name }} + ansible.builtin.command: "sacctmgr -i add qos {{ item.name }}" loop: "{{ slurm_qos_settings }}" loop_control: label: "{{ item.name }}" @@ -20,12 +19,11 @@ - name: Modify Slurm QoS (if already exists) ansible.builtin.command: > sacctmgr -i modify qos where name={{ item.name }} set - {% if item.MaxTRES is defined %}MaxTRES={{ item.MaxTRES }}{% endif %} - {% if item.MaxTRESPU is defined %}MaxTRESPU={{ item.MaxTRESPU }}{% endif %} - {% if item.MaxSubmitJobs is defined %}MaxSubmitJobs={{ item.MaxSubmitJobs }}{% endif %} - {% if item.MaxJobsPerUser is defined %}MaxJobsPerUser={{ item.MaxJobsPerUser }}{% endif %} - {% if item.GraceTime is defined %}GraceTime={{ item.GraceTime }}{% endif %} - {% if item.Preempt is defined %}Preempt={{ item.Preempt }}{% endif %} + {% if item.max_tres is defined and item.max_tres is not none %}MaxTRES={{ item.max_tres }}{% endif %} + {% if item.max_submit_jobs is defined and item.max_submit_jobs is not none %}MaxSubmitJobs={{ item.max_submit_jobs }}{% endif %} + {% if item.max_jobs_per_user is defined and item.max_jobs_per_user is not none %}MaxJobsPerUser={{ item.max_jobs_per_user }}{% endif %} + {% if item.grace_time is defined and item.grace_time is not none %}GraceTime={{ item.grace_time }}{% endif %} + {% if item.preempt is defined and item.preempt is not none %}Preempt={{ item.preempt }}{% endif %} loop: "{{ slurm_qos_settings }}" loop_control: label: "{{ item.name }}" @@ -97,18 +95,16 @@ {% else %} DefaultQOS=preempt_qos {% endif %} - {% if item.comment is defined %}Comment="{{ item.comment }}"{% endif %} + {% if itm.comment is defined %}Comment="{{ item.comment }}"{% endif %} loop: "{{ slurm_qos_users }}" loop_control: label: "{{ item.username }}" when: user_list.stdout is not search(item.username) changed_when: true - register: user_creation_result - failed_when: user_creation_result.rc != 0 - name: Check existing partition associations - ansible.builtin.command: > - sacctmgr -Pn show assoc where user={{ item.username }} cluster={{ slurm_cluster_name }} account={{ item.sponsor | default('orcd') }} format=Partition,QOS,DefaultQOS + ansible.builtin.command: | + sacctmgr -Pn show assoc where user={{ item.username }} cluster={{ slurm_cluster_name }} account={{ item.sponsor | default('orcd') }} format=Partition,GrpTRES,MaxNodes register: assoc_check loop: "{{ slurm_qos_users }}" loop_control: @@ -125,33 +121,33 @@ partition={{ item.1.name }} {% if item.1.name == 'admin' %} QOS=admin_qos DefaultQOS=admin_qos + {% else %} + QOS={{ item.0.qos_list | join(',') }} DefaultQOS={{ item.0.qos_default | default('preempt_qos') }} {% endif %} loop: "{{ slurm_qos_users | subelements('partitions') }}" loop_control: label: "{{ item.0.username }} - {{ item.1.name }}" when: - user_list.stdout is search(item.0.username) - - assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout') | first | default('') is not search(item.1.name) + - assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout_lines') | flatten | join(',') is not search(item.1.name) changed_when: true - - name: Update QoS for existing admin partition associations - ansible.builtin.command: > - sacctmgr -i modify user where - name={{ item.0.username }} - cluster={{ slurm_cluster_name }} - account={{ item.0.sponsor | default('orcd') }} - partition={{ item.1.name }} - set QOS=admin_qos DefaultQOS=admin_qos + - name: Debug partition-specific limits + ansible.builtin.debug: + msg: > + For {{ item.0.username }} - {{ item.1.name }}: + grptres={{ grptres | default('unset') }}, + max_nodes={{ max_nodes | default('unset') }} + vars: + partition_defaults: "{{ slurm_qos_partition_defaults.partitions[item.1.name] }}" + grptres: "{{ item.1.grptres | default(partition_defaults.grptres) }}" + max_nodes: "{{ item.1.max_nodes | default(partition_defaults.max_nodes) }}" loop: "{{ slurm_qos_users | subelements('partitions') }}" loop_control: label: "{{ item.0.username }} - {{ item.1.name }}" - when: - - user_list.stdout is search(item.0.username) - - item.1.name == 'admin' - - assoc_check.results | selectattr('item.username', 'equalto', item.0.username) | map(attribute='stdout') | first | default('') is search(item.1.name) - changed_when: true + when: user_list.stdout is search(item.0.username) - - name: Set partition-specific node limits + - name: Update partition-specific limits (GrpTres only) ansible.builtin.command: > sacctmgr -i modify user where name={{ item.0.username }} @@ -159,25 +155,16 @@ account={{ item.0.sponsor | default('orcd') }} partition={{ item.1.name }} set - {% if item.1.max_nodes is defined and item.1.max_nodes | int == -1 %} - GrpTRES= - {% elif item.1.max_nodes is defined and item.1.max_nodes | int >= 0 %} - GrpTRES=node={{ item.1.max_nodes }} - {% else %} - GrpTRES={{ item.1.grptres | default( - slurm_qos_partition_defaults[item.1.name].grptres if item.1.name in slurm_qos_partition_defaults else - 'node=' + ( - item.1.max_nodes | default( - slurm_qos_partition_defaults[item.1.name].max_nodes if item.1.name in slurm_qos_partition_defaults else - '4' if item.1.name == 'preempt' else - '2' if item.1.name == 'debug' else - '' if item.1.name == 'admin' else - '' - ) - ) | string - ) }} - {% endif %} + {% if grptres is defined and grptres is not none %} GrpTRES={{ grptres }}{% endif %} + vars: + partition_defaults: "{{ slurm_qos_partition_defaults.partitions[item.1.name] }}" + grptres: "{{ item.1.grptres | default(partition_defaults.grptres) }}" + max_nodes: "{{ item.1.max_nodes | default(partition_defaults.max_nodes) }}" loop: "{{ slurm_qos_users | subelements('partitions') }}" loop_control: label: "{{ item.0.username }} - {{ item.1.name }}" + when: + - user_list.stdout is search(item.0.username) + - grptres is defined and grptres is not none changed_when: true + failed_when: false