tcpdump -nn -vvv -i calif8b001c7f6c -vvv -nn ! port 22 and ! port 2379 and ! port 6443 and ! port 10250 and ! arp and ! port 53 and ! port 9092 and ! host 192.168.226.144 -w dsc-pod.cap
日志收集
分布式日志数据统一收集,实现集中式查询和管理。日志收集可以用来日志查询、问题排查、故障恢复、故障自愈、应用日志分析、错误告警、性能分析、用户行为分析。
日志收集流程
K8S集群产生的日志数据通过filebeat等工具收集并写入到kafka,es消费kafka中的数据并在kibana中展示
K8S集群日志收集方式
- node节点收集,基于daemonset部署日志收集进程,实现json-file类型(标准输出/deb/stdout,错误输出/dev/stderr)日志收集
- 使用sidecar容器(一个pod多容器)收集当前pod内一个或者多个业务容器的日志(通常基于emptyDir实现业务容器与sidecar之间的日志共享)
- 在容器内置日志收集服务进程
日志收集实例一 daemonset收集jsonfile日志
基于daemonset运行日志收集服务,主要收集以下类型日志:node节点收集,基于daemonset部署日志收集进程,实现json-dile类型日志;宿主机系统日志等以日志文件形式保存的日志
K8S pod和系统写到json-file,daemonset读取进行收集发到kafka中
kafka和zookeeper部署
1、部署zookeeper
下载zookeeper包
wget https://dlcdn.apache.org/zookeeper/zookeeper-3.7.1/apache-zookeeper-3.7.1-bin.tar.gz
生成配置文件
cp zoo_sample.cfg zoo.cfg
修改配置
vi zoo.cfg
tickTime=2000#探测周期,2000ms探测一次
initLimit=10#探测次数,zook集群在第一次初始化时会耗时initLimit*tickTime
syncLimit=5#后期探测次数,5次不通就认为zook挂了
dataDir=/data/zookeeper#数据目录,将myid放到该目录下echo 2 > /data/zookeeper/myid
server.1=192.168.226.144:2888:3888
server.2=192.168.226.145:2888:3888
server.3=192.168.226.146:2888:3888
启动并验证
root@node2 logs]# /apps/zookeeper/bin/zkServer.sh start
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /apps/zookeeper/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@node2 logs]# /apps/zookeeper/bin/zkServer.sh status
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /apps/zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost. Client SSL: false.
Mode: leader
2、部署kafka
下载kafka
wget https://dlcdn.apache.org/kafka/3.2.0/kafka_2.12-3.2.0.tgz
修改配置
vim server.properties
broker.id=144 #必须修改,每个节点不能相同
listeners=PLAINTEXT://192.168.226.144:9092 #监听地址
log.dirs=/data/kafka-log #数据目录
zookeeper.connect=192.168.226.144:2181,192.168.226.145:2181,192.168.226.146:2181 #zook地址
zookeeper.connection.timeout.ms=60000 #链接zook超时时间,默认6s
启动kafka
/apps/kafka/bin/kafka-server-start.sh -daemon /apps/kafka/config/server.properties
停止kafka
/apps/kafka/bin/kafka-server-stop.sh
Elasticsearch部署
准备工作
adduser es #添加用户,es必须普通用户启动
chown -R es:es /apps/elasticsearch-7.12.1 #赋权
修改配置文件
cluster.name: my-es #集群名称
node.name: node-2 #节点名称,同集群内不能重复
path.data: /path/to/data
path.logs: /path/to/logs #数据目录和日志目录,必须赋权给普通用户
network.host: 192.168.226.152 #外网访问地址http.port: 9200 #访问端口
discovery.seed_hosts: ["192.168.226.151", "192.168.226.152"] #自动发现节点IP
cluster.initial_master_nodes: ["node-1", "node-2"] #主节点名称,配置后的节点才会选举为主节点
action.destructive_requires_name: true #安全配置,不能模糊匹配删除
vim /apps/elasticsearch-7.12.1/config/jvm.options
-Xms256m
-Xmx256m #修改内存,最大和最小保持一致
遇到问题
vim /etc/security/limits.conf
* soft nofile 65536
* hard nofile 65536
echo "vm.max_map_count=262144" > /etc/sysctl.conf
sysctl -p | grep max_map_count
验证
部署kibana
修改配置文件
vim /etc/kibana/kibana.yml
server.port: 5601
server.host: "192.168.226.151"
elasticsearch.hosts: ["http://192.168.226.151:9200"]
i18n.locale: "zh-CN"
基于daemonset部署日志收集进程
构建dockerfile
[root@k8s-master1 1.logstash-image-Dockerfile]# vim Dockerfile
FROM logstash:7.12.1
USER root
WORKDIR /usr/share/logstash
#RUN rm -rf config/logstash-sample.conf
ADD logstash.yml /usr/share/logstash/config/logstash.yml
ADD app1.conf /usr/share/logstash/pipeline/logstash.conf
[root@k8s-master1 1.logstash-image-Dockerfile]# vim logstash.yml #修改配置,因为默认是 elasticsearch:9200
http.host: "0.0.0.0"
#xpack.monitoring.elasticsearch.hosts: [ "http://elasticsearch:9200" ]
[root@k8s-master1 1.logstash-image-Dockerfile]# cat app1.conf #配置文件
input {
file {
path => "/var/lib/docker/containers/*/*-json.log"
start_position => "beginning"
type => "jsonfile-daemonset-applog"
}file {
path => "/var/log/*.log"
start_position => "beginning"
type => "jsonfile-daemonset-syslog"
}
}output {
if [type] == "jsonfile-daemonset-applog" {
kafka {
bootstrap_servers => "${KAFKA_SERVER}"
topic_id => "${TOPIC_ID}"
batch_size => 16384 #logstash每次向ES传输的数据量大小,单位为字节
codec => "${CODEC}"
} }if [type] == "jsonfile-daemonset-syslog" {
kafka {
bootstrap_servers => "${KAFKA_SERVER}"
topic_id => "${TOPIC_ID}"
batch_size => 16384
codec => "${CODEC}" #系统日志不是json格式
}}
}
编写yaml文件并启动pod
[root@k8s-master1 1.daemonset-logstash]# cat 2.DaemonSet-logstash.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: logstash-elasticsearch
namespace: kube-system
labels:
k8s-app: logstash-logging
spec:
selector:
matchLabels:
name: logstash-elasticsearch
template:
metadata:
labels:
name: logstash-elasticsearch
spec:
tolerations:
# this toleration is to have the daemonset runnable on master nodes
# remove it if your masters can't run pods
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: logstash-elasticsearch
image: k8s-harbor.com/public/logstash:v7.12.1-json-file-log-v4
env:
- name: "KAFKA_SERVER"
value: "192.168.226.144:9092,192.168.226.145:9092,192.168.226.146:9092"
- name: "TOPIC_ID"
value: "jsonfile-log-topic"
- name: "CODEC"
value: "json"
# resources:
# limits:
# cpu: 1000m
# memory: 1024Mi
# requests:
# cpu: 500m
# memory: 1024Mi
volumeMounts:
- name: varlog
mountPath: /var/log
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: false
terminationGracePeriodSeconds: 30
volumes:
- name: varlog
hostPath:
path: /var/log
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
部署logstash
注意:pod上的logstash用来收集pod的日志,这一步部署的logstash用来收集kafka的topic中日志发给es,然后在kibana展示
安装logstash
rpm -i logstash-7.12.1-x86_64.rpm
添加conf配置文件
[root@lvs-master apps]# cat /etc/logstash/conf.d/daemonset-log-to-es.conf
input {
kafka {
bootstrap_servers => "192.168.226.144:9092,192.168.226.145:9092,192.168.226.146:9092"
topics => ["jsonfile-log-topic"]
codec => "json"
}
}
output {
#if [fields][type] == "app1-access-log" {
if [type] == "jsonfile-daemonset-applog" {
elasticsearch {
hosts => ["192.168.226.151:9200"]
index => "jsonfile-daemonset-applog-%{+YYYY.MM.dd}"
}}
if [type] == "jsonfile-daemonset-syslog" {
elasticsearch {
hosts => ["192.168.226.151:9200"]
index => "jsonfile-daemonset-syslog-%{+YYYY.MM.dd}"
}}
}
systemctl restart logstash.service#重启
kibana添加索引
日志收集实例一 sidecar容器收集jsonfile日志
通过sidecar容器(一个pod多容器)收集当前pod内一个或者多个业务容器的日志,与daemonset区别是:如果一个节点有10个pod,业务容器+logstash共11个。而sidecar模式下业务容器+logstash共20个,性能消耗比较大。
构建dockerfile
[root@k8s-master1 2.sidecar-logstash]# cat 1.logstash-image-Dockerfile/Dockerfile
FROM logstash:7.12.1
USER root
WORKDIR /usr/share/logstash
#RUN rm -rf config/logstash-sample.conf
ADD logstash.yml /usr/share/logstash/config/logstash.yml
ADD app1.conf /usr/share/logstash/pipeline/logstash.conf
root@k8s-master1 2.sidecar-logstash]# cat 1.logstash-image-Dockerfile/app1.conf
input {
file {
path => "/var/log/applog/catalina.out"
start_position => "beginning"
type => "app1-sidecar-catalina-log"
}
file {
path => "/var/log/applog/localhost_access_log.*.txt"
start_position => "beginning"
type => "app1-sidecar-access-log"
}
}
output {
if [type] == "app1-sidecar-catalina-log" {
kafka {
bootstrap_servers => "${KAFKA_SERVER}"
topic_id => "${TOPIC_ID}"
batch_size => 16384 #logstash每次向ES传输的数据量大小,单位为字节
codec => "${CODEC}"
} }
if [type] == "app1-sidecar-access-log" {
kafka {
bootstrap_servers => "${KAFKA_SERVER}"
topic_id => "${TOPIC_ID}"
batch_size => 16384
codec => "${CODEC}"
}}
}
创建pod
[root@k8s-master1 2.sidecar-logstash]# cat 2.tomcat-app1.yaml
kind: Deployment
#apiVersion: extensions/v1beta1
apiVersion: apps/v1
metadata:
labels:
app: magedu-tomcat-app1-deployment-label
name: magedu-tomcat-app1-deployment #当前版本的deployment 名称
namespace: magedu
spec:
replicas: 1
selector:
matchLabels:
app: magedu-tomcat-app1-selector
template:
metadata:
labels:
app: magedu-tomcat-app1-selector
spec:
containers:
- name: sidecar-container
image: k8s-harbor.com/public/logstash:v7.12.1-sidecar
imagePullPolicy: Always
env:
- name: "KAFKA_SERVER"
value: "192.168.226.144:9092,192.168.226.145:9092,192.168.226.146:9092"
- name: "TOPIC_ID"
value: "tomcat-app2-topic"
- name: "CODEC"
value: "json"
volumeMounts:
- name: applogs
mountPath: /var/log/applog
- name: magedu-tomcat-app1-container
image: k8s-harbor.com/project/tomcat-app1:2022-06-01_21_45_38
imagePullPolicy: IfNotPresent
#imagePullPolicy: Always
ports:
- containerPort: 8080
protocol: TCP
name: http
env:
- name: "password"
value: "123456"
- name: "age"
value: "18"
resources:
limits:
cpu: 200m
memory: "200Mi"
requests:
cpu: 200m
memory: "200Mi"
volumeMounts:
- name: applogs
mountPath: /apps/tomcat/logs
# startupProbe:
# httpGet:
# path: /myapp/index.html
# port: 8080
# initialDelaySeconds: 5 #首次检测延迟5s
# failureThreshold: 3 #从成功转为失败的次数
# periodSeconds: 3 #探测间隔周期
# readinessProbe:
# httpGet:
#path: /monitor/monitor.html
# path: /myapp/index.html
# port: 8080
# initialDelaySeconds: 5
# periodSeconds: 3
# timeoutSeconds: 5
# successThreshold: 1
# failureThreshold: 3
# livenessProbe:
# httpGet:
#path: /monitor/monitor.html
# path: /myapp/index.html
# port: 8080
# initialDelaySeconds: 5
# periodSeconds: 3
# timeoutSeconds: 5
# successThreshold: 1
# failureThreshold: 3
volumes:
- name: applogs
emptyDir: {} #临时目录,将tomcat产生日志挂载到/apps/tomcat/logs,logstash读写/var/log/applog日志写到kafka中
验证
两种日志都写到kafka中
配置logstash配置输出到es
[root@lvs-master conf.d]# cat test.conf
input {
kafka {
bootstrap_servers => "192.168.226.144:9092,192.168.226.145:9092,192.168.226.146:9092"
topics => ["tomcat-app2-topic"]
codec => "json"
}
}
output {
#if [fields][type] == "app1-sidecar-access-log" {
if [type] == "app1-sidecar-access-log" {
elasticsearch {
hosts => ["192.168.226.151:9200"]
index => "sidecar-app1-accesslog-%{+YYYY.MM.dd}"
}}
if [type] == "app1-sidecar-catalina-log" {
elasticsearch {
hosts => ["192.168.226.151:9200"]
index => "sidecar-app1-catalinalog-%{+YYYY.MM.dd}"
}}
}
es中已经有对应索引
kibana中添加索引并展示
日志收集案例三 容器内置日志收集服务进程
每个容器内置一个日志收集进程如filebeat,与logstash比更加轻量,如果一个pod有10个容器,也会 有10个日志收集进程
数据流程:业务产生日志,filebeat收集日志发到kafka,logstash消费kafka中数据发送到es并创建对应索引,在kibana中创建索引并可视化展示
构建filebeat和业务镜像
#tomcat web1
FROM k8s-harbor.com/public/tomcat-base:v8.5.43
ADD catalina.sh /apps/tomcat/bin/catalina.sh
ADD server.xml /apps/tomcat/conf/server.xml
#ADD myapp/* /data/tomcat/webapps/myapp/
ADD app1.tar.gz /data/tomcat/webapps/myapp/
ADD run_tomcat.sh /apps/tomcat/bin/run_tomcat.sh
ADD filebeat.yml /etc/filebeat/filebeat.yml
RUN chown -R tomcat.tomcat /data/ /apps/
#ADD filebeat-7.5.1-x86_64.rpm /tmp/
#RUN cd /tmp && yum localinstall -y filebeat-7.5.1-amd64.deb
EXPOSE 8080 8443
CMD ["/apps/tomcat/bin/run_tomcat.sh"]
[root@k8s-master1 1.webapp-filebeat-image-Dockerfile]# cat filebeat.yml
filebeat.inputs:
- type: log
enabled: true
paths:
- /apps/tomcat/logs/catalina.out
fields:
type: filebeat-tomcat-catalina #filebeat支持添加字段
- type: log
enabled: true
paths:
- /apps/tomcat/logs/localhost_access_log.*.txt
fields:
type: filebeat-tomcat-accesslog
filebeat.config.modules:
path: ${path.config}/modules.d/*.yml
reload.enabled: false
setup.template.settings:
index.number_of_shards: 1
setup.kibana:
output.kafka:
hosts: ["192.168.226.144:9092","192.168.226.145:9092","192.168.226.146:9092"]
required_acks: 1 #返回码
topic: "filebeat-magedu-app1"
compression: gzip
max_message_bytes: 1000000
#output.redis:
# hosts: ["172.31.2.105:6379"] #filebeat支持写到redis
# key: "k8s-magedu-app1"
# db: 1
# timeout: 5
# password: "123456"
创建pod
[root@k8s-master1 3.container-filebeat-process]# cat 3.tomcat-app1.yaml
kind: Deployment
#apiVersion: extensions/v1beta1
apiVersion: apps/v1
metadata:
labels:
app: magedu-tomcat-app1-filebeat-deployment-label
name: magedu-tomcat-app1-filebeat-deployment
namespace: magedu
spec:
replicas: 1
selector:
matchLabels:
app: magedu-tomcat-app1-filebeat-selector
template:
metadata:
labels:
app: magedu-tomcat-app1-filebeat-selector
spec:
containers:
- name: magedu-tomcat-app1-filebeat-container
image: k8s-harbor.com/public/tomcat-app1:20220529
#imagePullPolicy: IfNotPresent
imagePullPolicy: Always
ports:
- containerPort: 8080
protocol: TCP
name: http
env:
- name: "password"
value: "123456"
- name: "age"
value: "18"
resources:
limits:
cpu: 200m
memory: "200Mi"
requests:
cpu: 200m
memory: "200Mi"
[root@k8s-master1 3.container-filebeat-process]# cat 4.tomcat-service.yaml
---
kind: Service
apiVersion: v1
metadata:
labels:
app: magedu-tomcat-app1-filebeat-service-label
name: magedu-tomcat-app1-filebeat-service
namespace: magedu
spec:
type: NodePort
ports:
- name: http
port: 80
protocol: TCP
targetPort: 8080
nodePort: 30092
selector:
app: magedu-tomcat-app1-filebeat-selector
验证kafka数据
通过logstash将数据写入es
[root@k8s-master1 3.container-filebeat-process]# cat logstash-filebeat-process-kafka-to-es.conf
input {
kafka {
bootstrap_servers => "192.168.226.144:9092,192.168.226.145:9092,192.168.226.146:9092"
topics => ["filebeat-magedu-app1"]
codec => "json"
}
}
output {
if [fields][type] == "filebeat-tomcat-catalina" { #filebeat定制字段
elasticsearch {
hosts => ["192.168.226.151:9200"]
index => "filebeat-tomcat-catalina-%{+YYYY.MM.dd}"
}}
if [fields][type] == "filebeat-tomcat-accesslog" {
elasticsearch {
hosts => ["192.168.226.151:9200"]
index => "filebeat-tomcat-accesslog-%{+YYYY.MM.dd}"
}}
}
kibana展示
K8S中网络组件 flannel
由CoreOS开源的针对k8s的网络服务,其目的是解决K8S中各主机上的pod相互通信的问题,其借助于etcd维护网络地址分配,并为每个node节点分配一个不同的IP地址段。
Flannel网络模型(后端), Flannel目前有三种方式实现UDP/VXLAN/host-gw
UDP:早期版本的Flannel使用UDP封装完成报文的跨越主机转发,其安全性及性能略有不足
VXLAN: Linux内核在在2012年底的v3.7.0之后加入了VXLAN协议支持,因此新版本的Flannel也有UDP转换为VXLAN,VXLAN本质上是一种tunnef(隧道)协议,用来基于3层网络实现虚拟的2层网络,目前flannel的网络模型已经是基于VXLAN的叠加(覆盖)网络,目前推荐使用vxlan作为其网络模型。
Host-gw:也就是Host GateWay,通过在node节点上创建到达各目标容器地址的路由表而完成报文的转发,因此这种方式要求各node 节点本身必须处于同一个局域网(二层网络)中,因此不适用于网络变动频繁或比较大型的网络环境,但是其性能较好.
Flannel组件的解释:
Cni0:网桥设备,每创建一个pod都会创建一对veth pair,其中一端是pod中的eth0,另一端是Cni0网桥中的端口(网卡),Pod中从网卡eth0发出的流量都会发送到Cni0网桥设备的端口(网卡)上,Cni0设备获得的ip地址是该节点分配到的网段的第一个地址。
Flannel.1(二层设备): overlay网络的设备,用来进行vxlan报文的处理(封包和解包),不同node之间的pod数据流量都从overlayi设备以隧道的形式发送到对端。
数据流程
pod(eh0)-对端网卡-cni0-flannel.1-vxlan(内核封装overlay)-eth0(源宿主机,源端口为UDP 随机,目的端口为目的主机UDP 8472)-对端主机eth0(UDP 8472)-flannel.1-cni0-对端网卡--eth0(pod)
通过mac地址进行通信,源IP和目的IP不会变
K8S中网络组件 Calico
相比flannel可以实现IP地址进行通信,公有云不支持BGP协议一般使用flannel的vxlan,私有云使用calico
calico是一个纯三层的网络解决方案,为容器提供多node间的访问通信,calico将每一个node节点都当做为一个路由器(router),各节点通过BGP(Border Gateway Protocol)边界网关协议学习并在node节点生成路由规则,从而将不同node节点上的pod连接起来进行通信。
[root@k8s-node1 ~]# calicoctl node status #命令查看calico学习的路由表
Calico process is running.IPv4 BGP status
+-----------------+-------------------+-------+----------+-------------+
| PEER ADDRESS | PEER TYPE | STATE | SINCE | INFO |
+-----------------+-------------------+-------+----------+-------------+
| 192.168.226.144 | node-to-node mesh | up | 06:26:25 | Established |
| 192.168.226.146 | node-to-node mesh | up | 06:26:25 | Established |
+-----------------+-------------------+-------+----------+-------------+IPv6 BGP status
No IPv6 peers found.
node-to-node mesh:每个主机之间都有去往其他节点的路由
calico封装类型:
calico支持两种类型的封装:VXLAN和IP-in-IP,VXLAN在IP中没有IP的某些环境中受支持(例如Azure),VXLTANI的每数据包开销稍高,因为报头较大,但除非您运行的是网络密集型工作负载,否则您通常不会注意到这种差异。这两种封装之间的另一个小差异是Calico的VXLAN实现不使用BGP,calico的IP-in-IP是在calico节琼之间使用BGP协议实现跨子网。
BGP是一个去中心化的协议,它通过自动学习和维护路由表实现网络的可用性,但是并不是所有的网络都支持BGP,另外为了跨网络实现更大规模的网络管理,calico还支持IP-in-IP的叠加模型,简称IPIP,IPIP可以实现跨不同网段建立路由通信,但是会存在安全性问题,其在内核内置,可以通过Calico的配置文件设置是否启用IPIP,在公司内部如果k8s的node节点没有跨越网段建议关闭IPIP。
IPIP是一种将各Node的路由之间做一个tunnel,再把两个网络连接起来的模式。启用IPIP模式时, calico将在各Node上创建一个名为"tunl0“(ifconfig查看)的虚拟网络接口。
BGP模式则直接使用物理机作为虚拟路由路(vRouter),不再创建额外的tunnel。
calico核心组件
- Felix(route -n查看):calico的agent,运行在每一台node节点上,筹主要是维护路由规则、汇报当前节点状态以确保pod的跨主机通信。
- BGP Client:每台node都运行,其主要负责监听node节点上由felix生成的路由信息,然后通过BGP协议广播至其他剩余的node节点,从而相互学习路由实现pod通信。
- Route Reflector:集中式的路由反射器, calico v3.3开始支持,当calico BGP客户端将路由从其FIB(Forward Information
- dataBase,转发信息库)通告到Route Reflector时,Route Reflector会将这些路由通告给部著集群中的其他节点,Route Reflector专门用于管理BGP网络路由规则,不会产生pod数据通信。
抓包验证
流程:eth0(容器)-calic7e289c0440-tun0(直接转发)-eth0(IPinIP,源主机)-eth0(目的主机)-tun0-calif8b001c7f6c-eth0(容器)
源pod网卡对
[root@magedu-tomcat-app1-filebeat-deployment-8554c8454d-6rlwv /]# ethtool -S eth0
NIC statistics:
peer_ifindex: 9
对calic7e289c0440@if4进行抓包
[root@node2 ~]# ip link show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 00:0c:29:77:1d:3c brd ff:ff:ff:ff:ff:ff
3: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN mode DEFAULT group default
link/ether 02:42:99:35:7d:2d brd ff:ff:ff:ff:ff:ff
4: cali245ee849431@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether ee:ee:ee:ee:ee:ee brd ff:ff:ff:ff:ff:ff link-netnsid 0
5: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether e6:b1:8b:14:8e:c3 brd ff:ff:ff:ff:ff:ff
6: kube-ipvs0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default
link/ether 4a:93:d0:49:92:ab brd ff:ff:ff:ff:ff:ff
7: tunl0@NONE: <NOARP,UP,LOWER_UP> mtu 1440 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/ipip 0.0.0.0 brd 0.0.0.0
9: calic7e289c0440@if4: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether ee:ee:ee:ee:ee:ee brd ff:ff:ff:ff:ff:ff link-netnsid 1
确定目的pod网卡对
[root@magedu-tomcat-app1-filebeat-deployment-8554c8454d-7tr9z /]# ethtool -S eth0
NIC statistics:
peer_ifindex: 11
[root@k8s-node1 ~]# ip link show
11: calif8b001c7f6c@if4: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether ee:ee:ee:ee:ee:ee brd ff:ff:ff:ff:ff:ff link-netnsid 4
源pod发起请求,让报文到达宿主机与pod对应的网卡
curl 10.200.36.88:8080/myapp/index.html
报文到达在宿主机与pod对应的网卡进行抓包
tcpdump -nn -vvv -i calic7e289c0440 -vvv -nn ! port 22 and ! port 2379 and ! port 6443 and ! port 10250 and ! arp and ! port 53 and ! port 9092
此时报文中源IP为pod的eth0的ip,源mac为pod的eth0的mac,目的IP为10.200.36.88,下一跳为网关地址169.254.1.1,目的mac为ee:ee:ee:ee:ee:ee(默认网关169.254.1.1的MAC地址),源端口随机产生目的端口8080
[root@magedu-tomcat-app1-filebeat-deployment-8554c8454d-6rlwv /]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 169.254.1.1 0.0.0.0 UG 0 0 0 eth0
169.254.1.1 0.0.0.0 255.255.255.255 UH 0 0 0 eth0
[root@magedu-tomcat-app1-filebeat-deployment-8554c8454d-6rlwv /]# arp -a
gateway (169.254.1.1) at ee:ee:ee:ee:ee:ee [ether] on eth0
[root@magedu-tomcat-app1-filebeat-deployment-8554c8454d-6rlwv /]#
从169.254.1.1出去直接到宿主机。宿主机进行源地址替换,替换成宿主机的地址
报文到达tunl0并进行抓包
tcpdump -nn -vvv -i tunl0 -vvv -nn ! port 22 and ! port 2379 and ! port 6443 and ! port 10250 and ! arp and ! port 53 and ! port 9092 -w tunl0.cap
此时报文的源IP为源pod IP,目的IP为目的pod IP。没有MAC地址。
报文到达宿主机ens33并进行抓包
tcpdump -nn -vvv -i ens33 -vvv -nn ! port 22 and ! port 2379 and ! port 6443 and ! port 10250 and ! arp and ! port 53 and ! port 9092 and ! host 192.168.226.144 and host 192.168.226.146 -w eth0.cap
此时报文为IPinlP格式,外层为源宿主机和目的宿主机的源MAC目的MAC、源IP及目的IP,内部为源pod IP及目的pod的IP,没有使用MAC地址。
报文到达目的宿主机ens33
报文到达目的宿主机eth0,此时收到的是源宿主机的IPinIP报文,外层为源宿主机和目的宿主机的源MAC目的MAC、源IP及目的IP,内部为源pod IP及目的pod的IP,没有使用MAC地址,解封装后发现是去往目的pod
报文到达目的宿主机的tunl0
tcpdump -nn -vvv -i tunl0 -vvv -nn ! port 22 and ! port 2379 and ! port 6443 and ! port 10250 and ! arp and ! port 53 and ! port 9092 and ! host 192.168.226.144 -w dsc-tunl0.cap
报文到达目的宿主机tunl0,此时源IP为源pod lP,目的IP为目的pod的IP,没有MAC地址。
到达目的pod
tcpdump -nn -vvv -i calif8b001c7f6c -vvv -nn ! port 22 and ! port 2379 and ! port 6443 and ! port 10250 and ! arp and ! port 53 and ! port 9092 and ! host 192.168.226.144 -w dsc-pod.cap
报文到达目的宿主机calif8b001c7f6c网卡、此时源IP为源Pod IP,源MCA为tunl0 MAC,目的IP为目的Pod IP,目的MAC为目的Pod MAC,随后报文被转发被目的MAC(目的Pod)。
报文到达目的pod
tcpdump -i ens33 -vvv -nn
报文到达目的pod,目的pod接受请求并构建响应报文并原路返回给源pod.
K8S的网络策略
Pod可以通信的Pod是通过如下三个标识符的组合来辩识的:
- 允许或拒绝特定的pod请求目的namespace中的目的pod的所有或指定端口。
- 允许或拒绝特定的namespace请求目的namespace中的所有或特定的pod所有或指定端口。
- 允许或拒绝特定的源IP范围或IR地址请求目的pod的所有或特定端口。
案例一 特定namespace和标签访问
[root@k8s-master1 python-ns2]# cat case1-ingress-podSelector.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tomcat-access--networkpolicy
namespace: python
spec:
policyTypes:
- Ingress
podSelector:
matchLabels:
app: python-tomcat-app1-selector #对匹配到的目的Pod应用以下规则
ingress: #入栈规则,如果指定目标端口就是匹配全部端口和协议,协议TCP, UDP, or SCTP
- from:
- podSelector:
matchLabels:
#app: python-nginx-selector #如果存在多个matchLabel条件,如果存在多个matchLabel条件,是and的关系,即要同时满足条件A、条件B、条件X
project: "python1"
[root@k8s-master1 python-ns2]# calicoctl get networkpolicy -n python
NAMESPACE NAME
python knp.default.tomcat-access--networkpolicy
[root@k8s-master1 python-ns2]# calicoctl get networkpolicy knp.default.tomcat-access--networkpolicy -n python -o yaml
案例二 指定端口
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tomcat-access--networkpolicy
namespace: python
spec:
policyTypes:
- Ingress
podSelector:
matchLabels:
app: python-tomcat-app1-selector
ingress:
- from:
- podSelector:
matchLabels:
#app: python-nginx-selector #指定访问源的匹配条件,如果存在多个matchLabel条件,是and的关系,即要同时满足条件A、条件B、条件X
project: "python"
ports: #入栈规则,如果指定目标端口就是匹配全部端口和协议,协议TCP, UDP, or SCTP
- protocol: TCP
#port: 8080 #允许通过TCP协议访问目标pod的8080端口,但是其它没有允许的端口将全部禁止访问
port: 80
案例三 允许当前namespace访问指定端口
不加ports就默认全部放开
root@k8s-master1 python-ns2]# cat case3-ingress-podSelector-ns-MultiPort.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tomcat-access-networkpolicy
namespace: python
spec:
policyTypes:
- Ingress#一旦添加该限制,只能同一namespace
podSelector: #目标pod
matchLabels:
app: python-tomcat-app1-selector
ingress:
- from:
- podSelector: #匹配源pod,matchLabels: {}为不限制源pod即允许所有pod,写法等同于resources(不加就是不限制)
matchLabels: {}
ports: #入栈规则,如果指定目标端口就是匹配全部端口和协议,协议TCP, UDP, or SCTP
- protocol: TCP
#port: 8080 #允许通过TCP协议访问目标pod的8080端口,但是其它没有允许的端口将全部禁止访问
port: 80
- protocol: TCP
port: 3306
- protocol: TCP
port: 6379
案例四 限制namespace
同namespace才能访问
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tomcat-access--networkpolicy
namespace: python
spec:
policyTypes:
- Ingress
podSelector: #目标pod
matchLabels: {} #匹配所有目标pod
ingress:
- from:
- podSelector: #匹配源pod,matchLabels: {}为不限制源pod即允许所有pod,写法等同于resources(不加就是不限制)
matchLabels: {}
#ports: #入栈规则,如果指定目标端口就是匹配全部端口和协议,协议TCP, UDP, or SCTP
#- protocol: TCP
# port: {} #允许通过TCP协议访问目标pod的8080端口,但是其它没有允许的端口将全部禁止访问
#port: 80
#- protocol: TCP
# port: 3306
#- protocol: TCP
# port: 6379
案例五 基于IP地址限制
[root@k8s-master1 python-ns2]# cat case5-ingress-ipBlock.yaml.bak
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tomcat-access--networkpolicy
namespace: python
spec:
policyTypes:
- Ingress
podSelector: #目标pod
matchLabels:
app: python-tomcat-app1-selector
ingress:
- from:
# - podSelector: #匹配源pod,matchLabels: {}为不限制源pod即允许所有pod,写法等同于resources(不加就是不限制)
# matchLabels: {}
- ipBlock:
cidr: 10.200.0.0/16 #白名单,允许访问的地址范围,没有允许的将禁止访问目标pod
except:
- 10.200.219.0/24 #在以上范围内禁止访问的源IP地址
- 10.200.229.0/24 #在以上范围内禁止访问的源IP地址
- 10.200.104.42/32 #在以上范围内禁止访问的源IP地址
ports: #入栈规则,如果指定目标端口就是匹配全部端口和协议,协议TCP, UDP, or SCTP
- protocol: TCP
port: 8080 #允许通过TCP协议访问目标pod的8080端口,但是其它没有允许的端口将全部禁止访问
#port: 80
- protocol: TCP
port: 3306
- protocol: TCP
port: 6379
案例六 基于namespace限制
[root@k8s-master1 python-ns2]# cat case6-ingress-namespaceSelector.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tomcat-access--networkpolicy
namespace: python
spec:
policyTypes:
- Ingress
podSelector: #目标pod
matchLabels: {} #允许访问python namespace 中的所有pod
# app: python-tomcat-app1-selector #可以只允许访问python namespace中指定的pod
ingress:
- from:
# - podSelector: #匹配源pod,matchLabels: {}为不限制源pod即允许所有pod,写法等同于resources(不加就是不限制)
# matchLabels: {}
# - ipBlock:
# cidr: 10.200.0.0/16 #指定禁止访问的源网段
# except:
# - 10.200.218.0/24 #在以上范围内禁止访问的源IP地址
- namespaceSelector:
# matchLabels: {} #允许所有namespace访问python namespace指定的目标端口或指定的pod加指定端口
matchLabels:
nsname: linux #只允许指定的namespace访问
- namespaceSelector:
matchLabels:
nsname: python #只允许指定的namespace访问
ports: #入栈规则,如果指定目标端口就是匹配全部端口和协议,协议TCP, UDP, or SCTP
- protocol: TCP
port: 8080 #允许通过TCP协议访问目标pod的8080端口,但是其它没有允许的端口将全部禁止访问
#port: 80
- protocol: TCP
port: 3306
- protocol: TCP
port: 6379
案例七 限制出口
1.基于Egress白名单,定义ns中匹配成功的pod可以访问ipBlock指定的地址和ports指定的端口。
⒉.匹配成功的pod访问未明确定义在Egress的白名单的其它IP的请求,将拒绝。
3.没有匹配成功的源pod,主动发起的出口访问请求不受影响。
[root@k8s-master1 python-ns2]# cat case7-Egress-ipBlock.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: egress-access-networkpolicy
namespace: python
spec:
policyTypes:
- Egress
podSelector: #目标pod选择器
matchLabels: #基于label匹配目标pod
app: python-tomcat-app1-selector #匹配python namespace中app的值为python-tomcat-app1-selector的pod,然后基于egress中的指定网络策略进行出口方向的网络限制
egress:
- to:
- ipBlock:
cidr: 10.200.0.0/16 #允许匹配到的pod出口访问的目的CIDR地址范围
- ipBlock:
cidr: 192.168.226.151/32 #允许匹配到的pod出口访问的目的主机
ports:
- protocol: TCP
port: 80 #允许匹配到的pod访问目的端口为80的访问
- protocol: TCP
port: 53 #允许匹配到的pod访问目的端口为53 即DNS的解析
#- protocol: UDP
# port: 53 #允许匹配到的pod访问目的端口为53 即DNS的解析
案例八 Pod出口方向目的Pod限制-只允许访问指定的pod及端口
1.匹配成功的源pod只能访问指定的目的pod的指定端口
2.其它没有允许的出口请求将禁止访问
访问pod的service是可以的
root@k8s-master1 python-ns2]# cat case8-Egress-PodSelector.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: egress-access-networkpolicy
namespace: python
spec:
policyTypes:
- Egress
podSelector: #目标pod选择器
matchLabels: #基于label匹配目标pod
app: python-nginx-selector #匹配python namespace中app的值为python-tomcat-app1-selector的pod,然后基于egress中的指定网络策略进行出口方向的网络限制
egress:
- to:
# - ipBlock:
# cidr: 10.200.0.0/16 #允许访问的目的CIDR地址范围
# - ipBlock:
# cidr: 172.31.7.106/32 #允许访问的目的主机地址
# - ipBlock:
# cidr: 10.200.218.4/32 #白名单,允许访问的目的主机地址
- podSelector: #匹配pod,matchLabels: {}为不限制源pod即允许所有pod,写法等同于resources(不加就是不限制)
matchLabels:
app: python-tomcat-app1-selector
ports:
- protocol: TCP
port: 8080 #允许80端口的访问
- protocol: TCP
port: 53 #允许DNS的解析
- protocol: UDP
port: 53
flannel:使用场景一般是公有云以及不需要配置网络策略的场景
UDP
vxlan+Directrouting
host-gw
calico:用于私有云以及需要实现网络策略的场景
calico+IPIP(BGP/vxlan):使用比较多
calico+vxlan 使用不多