[kubernetes]-pod启动报错too many open files

导语:ulimt已经设置过,但pod启动报错too many open files

image-20240725164857302

记录一个容器内很方便查看inotify相关配置的脚本

#!/bin/bash

# Get the procs sorted by the number of inotify watches
# @author Carl-Erik Kopseng
# @latest https://github.com/fatso83/dotfiles/blob/master/utils/scripts/inotify-consumers
# Discussion leading up to answer: https://unix.stackexchange.com/questions/15509/whos-consuming-my-inotify-resources
#
#
########################################## Notice ##########################################
###        Since Fall 2022 you should prefer using the following C++ version             ###
###                    https://github.com/mikesart/inotify-info                          ###
############################################################################################
#
#
# The fastest version of this script is here: https://github.com/fatso83/dotfiles/commit/inotify-consumers-v1-fastest
# Later PRs introduced significant slowdowns at the cost of better output, but it is insignificant on most machines
# See this for details: https://github.com/fatso83/dotfiles/pull/10#issuecomment-1122374716

main(){
    printf "\n%${WLEN}s  %${WLEN}s\n" "INOTIFY" "INSTANCES"
    printf "%${WLEN}s  %${WLEN}s\n" "WATCHES" "PER   "
    printf "%${WLEN}s  %${WLEN}s  %s\n" " COUNT " "PROCESS "    "PID USER         COMMAND"
    printf -- "------------------------------------------------------------\n"
    generateData
}

usage(){
    cat << EOF
Usage: $0 [--help|--limits]
    -l, --limits    Will print the current related limits and how to change them
    -h, --help      Show this help

FYI:  Check out Michael Sartain's C++ take on this script. The resulting native executable 
      is much faster, modern and feature rich. It can be found at 
      https://github.com/mikesart/inotify-info
      
      Requires building, but is well worth the few seconds :)
EOF
}

limits(){
    printf "\nCurrent limits\n-------------\n"
    sysctl fs.inotify.max_user_instances fs.inotify.max_user_watches

    cat <<- EOF
Changing settings permanently
-----------------------------
echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf
sudo sysctl -p # re-read config
EOF
}

generateData() {
    local -i PROC
    local -i PID
    local -i CNT
    local -i INSTANCES
    local -i TOT
    local -i TOTINSTANCES
    # read process list into cache
    local PSLIST="$(ps ax -o pid,user=WIDE-COLUMN,command $COLSTRING)"
    local INOTIFY="$(find /proc/[0-9]*/fdinfo -type f 2>/dev/null | xargs grep ^inotify 2>/dev/null)"
    local INOTIFYCNT="$(echo "$INOTIFY" | cut -d "/" -s --output-delimiter=" "  -f 3 |uniq -c | sed -e 's/:.*//')"
    # unique instances per process is denoted by number of inotify FDs
    local INOTIFYINSTANCES="$(echo "$INOTIFY" | cut -d "/" -s --output-delimiter=" "   -f 3,5 | sed -e 's/:.*//'| uniq |awk '{print $1}' |uniq -c)"
    local INOTIFYUSERINSTANCES="$(echo "$INOTIFY" | cut -d "/" -s --output-delimiter=" "   -f 3,5 | sed -e 's/:.*//' | uniq |
    	     while read PID FD; do echo $PID $FD $(grep -e "^ *${PID} " <<< "$PSLIST"|awk '{print $2}'); done | cut -d" "  -f 3 | sort | uniq -c |sort -nr)"
    set -e

    cat <<< "$INOTIFYCNT" |
        {
            while read -rs CNT PROC; do   # count watches of processes found
                echo "${PROC},${CNT},$(echo "$INOTIFYINSTANCES" | grep " ${PROC}$" |awk '{print $1}')"
            done
        } |
        grep -v ",0," |                  # remove entires without watches
        sort -n -t "," -k 2,3 -r |         # sort to begin with highest numbers
        {                                # group commands so that $TOT is visible in the printf
	    IFS=","
            while read -rs PID CNT INSTANCES; do   # show watches and corresponding process info
                printf "%$(( WLEN - 2 ))d  %$(( WLEN - 2 ))d     %s\n" "$CNT" "$INSTANCES" "$(grep -e "^ *${PID} " <<< "$PSLIST")"
                TOT=$(( TOT + CNT ))
		TOTINSTANCES=$(( TOTINSTANCES + INSTANCES))
            done
	    # These stats should be per-user as well, since inotify limits are per-user..
            printf "\n%$(( WLEN - 2 ))d  %s\n" "$TOT" "WATCHES TOTAL COUNT"
# the total across different users is somewhat meaningless, not printing for now.
#            printf "\n%$(( WLEN - 2 ))d  %s\n" "$TOTINSTANCES" "TOTAL INSTANCES COUNT"
        }
    echo ""
    echo "INotify instances per user (e.g. limits specified by fs.inotify.max_user_instances): "
    echo ""
    (
      echo "INSTANCES    USER"
      echo "-----------  ------------------"
      echo "$INOTIFYUSERINSTANCES"
    ) | column -t
    echo ""
    exit 0
}

# get terminal width
declare -i COLS=$(tput cols 2>/dev/null || echo 80)
declare -i WLEN=10
declare COLSTRING="--columns $(( COLS - WLEN ))" # get terminal width

if [ "$1" = "--limits" -o "$1" = "-l" ]; then
    limits
    exit 0
fi

if [ "$1" = "--help" -o "$1" = "-h" ]; then
    usage
    exit 0
fi

# added this line and moved some declarations to allow for the full display instead of a truncated version
if [ "$1" = "--full" -o "$1" = "-f" ]; then
    unset COLSTRING
    main
fi

if [ -n "$1" ]; then
    printf "\nUnknown parameter '$1'\n" >&2
    usage
    exit 1
fi
main

执行脚本查看容器内外相关限制

./inotify-consumers

image-20240725164435960

最终调整调度的节点解决。

# 修改算法的配置文件,共享内存调整成5G,去掉资源限制、绑定GPU机器
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/hostname
                operator: In
                values:
                - idc-k8sgpu-3



sysctl -w fs.inotify.max_user_watches=2099999999
sysctl -w fs.inotify.max_user_instances=2099999999
sysctl -w fs.inotify.max_queued_events=2099999999
sysctl -w fs.inotify.max_user_watches = 1048576
sysctl -w fs.inotify.max_user_instances = 12800

sysctl fs.inotify


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

爷来辣

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值