http
script
# 设置被测试物理机地址,需启动Chaosd服务
export CHAOSD_SERVER=127.0.0.1
# 设置被测试物理机网卡名称,模拟网络故障时使用
export CHAOSD_NODE_DEVICE=eth1
# CPU压力测试,启动4个进程,各占用60%的CPU
echo '开始CPU压力测试'
CHAOSD_RESPONSE=$(curl -X POST $CHAOSD_SERVER:31767/api/attack/stress -H "Content-Type:application/json" -d '{"load":60, "action":"cpu","workers":4}')
echo $CHAOSD_RESPONSE
echo "此时可在服务端使用top进行验证"
sleep 60
# 消除故障
curl -X DELETE $CHAOSD_SERVER:31767/api/attack/$(echo $CHAOSD_RESPONSE | jq -r '.uid')
# 内存压力测试,占用2GiB内存
CHAOSD_RESPONSE=$(curl -X POST $CHAOSD_SERVER:31767/api/attack/stress -H "Content-Type:application/json" -d '{ "action":"mem","size":"2GiB"}')
echo $CHAOSD_RESPONSE
echo "此时可在服务端使用top进行验证"
sleep 60
# 消除故障
curl -X DELETE $CHAOSD_SERVER:31767/api/attack/$(echo $CHAOSD_RESPONSE | jq -r '.uid')
# 先看下现在的网络状态,作为对比
ping -c 20 $CHAOSD_SERVER
# 测试网络丢包,此处设置为30%
CHAOSD_RESPONSE=$(curl -X POST $CHAOSD_SERVER:31767/api/attack/network -H "Content-Type:application/json" -d '{ "action":"loss","percent":"30","device":"$CHAOSD_NODE_DEVICE"}')
echo $CHAOSD_RESPONSE
ping -c 20 $CHAOSD_SERVER
echo "此时可在服务端使用ping进行验证"
sleep 30
# 消除故障
curl -X DELETE $CHAOSD_SERVER:31767/api/attack/$(echo $CHAOSD_RESPONSE | jq -r '.uid')
# 测试网络延迟,此处设置为1秒
CHAOSD_RESPONSE=$(curl -X POST $CHAOSD_SERVER:31767/api/attack/network -H "Content-Type:application/json" -d '{ "action":"delay","latency":"1s","device":"$CHAOSD_NODE_DEVICE"}')
echo $CHAOSD_RESPONSE
ping -c 20 $CHAOSD_SERVER
echo "此时可在服务端使用ping进行验证"
sleep 30
# 消除故障
curl -X DELETE $CHAOSD_SERVER:31767/api/attack/$(echo $CHAOSD_RESPONSE | jq -r '.uid'")
# 测试节点挂了
ssh $CHAOSD_SERVER "chaosd attack host shutdown"
补充说明
CPU压力测试
当施加的CPU压力超过物理资源时,实验仍然能继续进行,worker数量会和请求保持一致,只是load可能无法达到请求值
内存压力测试
当进行内存压力测试时,CPU资源也会被占用.有一个核的占用率会达到近100%
当施加的内存压力超过物理资源时,实验会失败,但是返回的结果仍然是成功,即使在服务端查看状态也是成功
当同时有多个压力测试时,总的内存占用量超过物理资源,实验仍然能继续进行,但是服务端进行压测的进程的pid会变,导致后面结束实验时可能会报错
网络丢包测试
在上面的实验参数情况下,不管从外到内,还是从内到外的ping都会出现丢包
根据我的实验,丢包都会大于设置值(不同于CPU是在设置值上下波动)
网络延迟测试
在上面的实验参数情况下,不管从外到内,还是从内到外的ping都会出现延迟
节点挂了测试
这个实验无法通过HTTP进行(这也以为着无法通过dashboard进行),只能本地执行
操作系统关了后当然不能自己启动,所有不同前面测试,这个测试只能手工恢复
CLI
#! /bin/bash
# 网卡名称
CHAOSD_NODE_DEVICE=ens192
# 注入CPU压力
chaosd attack stress cpu --load 80 -w 40 --uid cpu-cli
sleep 30s
chaosd recover cpu-cli
# 注入内存压力
chaosd attack stress mem -s 64GiB -uid mem-cli
sleep 30s
chaosd recover mem-cli
# 注入网络延时
chaosd attack network delay --latency 2s --device $CHAOSD_NODE_DEVICE --uid delay-cli
sleep 30s
chaosd recover delay-cli
# 注入网络丢包
chaosd attack network loss --percent 30 --device $CHAOSD_NODE_DEVICE --uid loss-cli
sleep 30s
chaosd recover loss-cli
# 注入磁盘写满
chaosd attack disk fill --percent 99 --uid fill-cli
sleep 30s
chaosd recover fill-cli
yaml
# 设置被测试物理机地址(不应当是本机,下面配置不对,只是示意),需启动Chaosd服务
export CHAOSD_SERVER=127.0.0.1
# 设置被测试物理机网卡名称,模拟网络故障时使用
export CHAOSD_NODE_DEVICE=eth0
# 设置所影响的网络包地址
export NETWORKCHAOS_TARGET=127.0.0.1
# 配置chaos-mesh部署在k8s的哪个命名空间下
export CHAOS_NAMESPACE=chaos-testing
name_postfix=$(date "+%m%d%H%M")
# CPU压力测试,启动4个进程,各占用60%的CPU
echo "apiVersion: chaos-mesh.org/v1alpha1
kind: PhysicalMachineChaos
metadata:
name: physical-stress-cpu-$name_postfix
namespace: $CHAOS_NAMESPACE
spec:
action: stress-cpu
address:
- $CHAOSD_SERVER:31767
stress-cpu:
load: 60
workers: 4
duration: '30s'">physical-stress-cpu.yaml
kubectl apply -f physical-stress-cpu.yaml
# 内存压力测试,占用2GiB内存
echo "apiVersion: chaos-mesh.org/v1alpha1
kind: PhysicalMachineChaos
metadata:
name: physical-stress-mem-$name_postfix
namespace: $CHAOS_NAMESPACE
spec:
action: stress-mem
address:
- $CHAOSD_SERVER:31767
stress-mem:
size: 2GiB
duration: '30s'">physical-stress-mem.yaml
kubectl apply -f physical-stress-mem.yaml
# 测试网络丢包,此处设置为30%
echo "apiVersion: chaos-mesh.org/v1alpha1
kind: PhysicalMachineChaos
metadata:
name: physical-network-loss-$name_postfix
namespace: $CHAOS_NAMESPACE
spec:
action: network-loss
address:
- $CHAOSD_SERVER:31767
network-loss:
device: $CHAOSD_NODE_DEVICE
ip-address: $NETWORKCHAOS_TARGET
percent: '30'
duration: '30s'">physical-network-loss.yaml
kubectl apply -f physical-network-loss.yaml
# 测试网络延时,此处设置为1秒
echo "apiVersion: chaos-mesh.org/v1alpha1
kind: PhysicalMachineChaos
metadata:
name: physical-network-delay-$name_postfix
namespace: $CHAOS_NAMESPACE
spec:
action: network-delay
address:
- $CHAOSD_SERVER:31767
network-delay:
device: $CHAOSD_NODE_DEVICE
ip-address: $NETWORKCHAOS_TARGET
latency: '1000ms'
duration: '30s'">physical-network-delay.yaml
kubectl apply -f physical-network-delay.yaml
# 测试节点挂了
#ssh $CHAOSD_SERVER "chaosd attack host shutdown"