From fbb1e289ad956cfd708138d941fc7349713b30b7 Mon Sep 17 00:00:00 2001 From: daniel vosler Date: Tue, 21 Jan 2025 15:08:21 -0500 Subject: [PATCH] Add gpu_power_limit.sh --- gpu_power_limit.sh | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 gpu_power_limit.sh diff --git a/gpu_power_limit.sh b/gpu_power_limit.sh new file mode 100644 index 0000000..ad57540 --- /dev/null +++ b/gpu_power_limit.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# dvosler -- 2024-11-01 +# http://www.apache.org/licenses/LICENSE-2.0 + +set_power_limit() { + local LIMIT=$1 + + # LIMIT must be number or a percentage (e.g., 300 or 15%) + if ! [[ "$LIMIT" =~ ^[0-9]+$ || "$LIMIT" =~ ^[0-9]+%$ ]]; then + echo "Error: Invalid power limit '$LIMIT'. Please provide a numeric value (e.g., 300) or a percentage (e.g., 15%)." + exit 1 + fi + + if [[ "$LIMIT" == *% ]]; then + local PERCENTAGE_REDUCTION=${LIMIT%\%} + echo "Reducing power limit by $PERCENTAGE_REDUCTION% for all GPUs" + + nvidia-smi --query-gpu=index --format=csv,noheader | \ + while read -r GPU; do + DEFAULT_POWER_LIMIT=$(nvidia-smi -i "$GPU" --query-gpu=power.default_limit --format=csv,noheader,nounits) + MIN_POWER_LIMIT=$(nvidia-smi -i "$GPU" --query-gpu=power.min_limit --format=csv,noheader,nounits) + + NEW_POWER_LIMIT=$(echo "$DEFAULT_POWER_LIMIT * (100 - $PERCENTAGE_REDUCTION) / 100" | bc) + + # Ensure the new power limit is not below the minimum power limit + if (( $(echo "$NEW_POWER_LIMIT < $MIN_POWER_LIMIT" | bc -l) )); then + NEW_POWER_LIMIT=$MIN_POWER_LIMIT + echo "New power limit for GPU $GPU is below minimum. Setting to minimum power limit: $MIN_POWER_LIMIT W" + else + echo "Setting power limit to $NEW_POWER_LIMIT W for GPU $GPU" + fi + + nvidia-smi -i "$GPU" -pl "$NEW_POWER_LIMIT" + echo "Set power limit to $NEW_POWER_LIMIT W for GPU $GPU" + done + + else + local NEW_POWER_LIMIT=$LIMIT + echo "Setting power limit to $NEW_POWER_LIMIT W for all GPUs" + + nvidia-smi --query-gpu=index --format=csv,noheader | \ + while read -r GPU; do + MIN_POWER_LIMIT=$(nvidia-smi -i "$GPU" --query-gpu=power.min_limit --format=csv,noheader,nounits) + + # Ensure the new power limit is not below the minimum power limit + if (( $(echo "$NEW_POWER_LIMIT < $MIN_POWER_LIMIT" | bc -l) )); then + NEW_POWER_LIMIT=$MIN_POWER_LIMIT + echo "New power limit for GPU $GPU is below minimum. Setting to minimum power limit: $MIN_POWER_LIMIT W" + else + echo "Setting power limit to $NEW_POWER_LIMIT W for GPU $GPU" + fi + + nvidia-smi -i "$GPU" -pl "$NEW_POWER_LIMIT" + echo "Set power limit to $NEW_POWER_LIMIT W for GPU $GPU" + done + fi +} + +reset_power_limit() { + echo "Resetting power limit to default for all GPUs" + + nvidia-smi --query-gpu=index --format=csv,noheader | \ + while read -r GPU; do + DEFAULT_POWER_LIMIT=$(nvidia-smi -i "$GPU" --query-gpu=power.default_limit --format=csv,noheader,nounits) + nvidia-smi -i "$GPU" -pl "$DEFAULT_POWER_LIMIT" + echo "Reset power limit to $DEFAULT_POWER_LIMIT W for GPU $GPU" + done + + echo "Power limit reset to default for all GPUs." +} + +print_power_limits() { + echo "Fetching set, max, and min power limits for all GPUs" + + nvidia-smi --query-gpu=index --format=csv,noheader | \ + while read -r GPU; do + CURRENT_MAX_POWER_LIMIT=$(nvidia-smi -i "$GPU" --query-gpu=power.limit --format=csv,noheader,nounits) + MAX_POWER_LIMIT=$(nvidia-smi -i "$GPU" --query-gpu=power.default_limit --format=csv,noheader,nounits) + MIN_POWER_LIMIT=$(nvidia-smi -i "$GPU" --query-gpu=power.min_limit --format=csv,noheader,nounits) + + echo "GPU $GPU:" + echo " Set Power Limit: ${CURRENT_MAX_POWER_LIMIT} W" + echo " Max Power Limit: ${MAX_POWER_LIMIT} W" + echo " Min Power Limit: ${MIN_POWER_LIMIT} W" + echo "" + done +} + +if [ "$1" == "set" ] && [ -n "$2" ]; then + set_power_limit $2 +elif [ "$1" == "reset" ]; then + reset_power_limit +elif [ "$1" == "print" ]; then + print_power_limits +else + echo "Usage: $0 {set | reset | print}" + exit 1 +fi \ No newline at end of file