Docker iptables数据包过滤流向分析
iptables log
# 事先关闭firewalld
[root@boy ~]# cat log.sh
#!/bin/bash
function insert(){
##### ebtables
ebtables -t broute -I BROUTING --log --log-prefix 'ctc/ebtable/broute-BROUTING' --log-level debug
ebtables -t nat -I PREROUTING --log --log-prefix 'ctc/ebtable/nat-PREROUTE' --log-level debug
ebtables -t nat -I OUTPUT --log --log-prefix 'ctc/ebtable/nat-OUTPUT' --log-level debug
ebtables -t nat -I POSTROUTING --log --log-prefix 'ctc/ebtable/nat-POSTROUTE' --log-level debug
ebtables -t filter -I INPUT --log --log-prefix 'ctc/ebtable/filter-input' --log-level debug
ebtables -t filter -I OUTPUT --log --log-prefix 'ctc/ebtable/filter-output' --log-level debug
ebtables -t filter -I FORWARD --log --log-prefix 'ctc/ebtable/filter-forward' --log-level debug
#### iptables
iptables -t raw -I PREROUTING -p icmp -j LOG --log-prefix 'iptable/raw-PREROUTE' --log-level debug
iptables -t mangle -I PREROUTING -p icmp -j LOG --log-prefix 'iptable/mangle-PREROUTE' --log-level debug
iptables -t nat -I PREROUTING -p icmp -j LOG --log-prefix 'iptable/nat-PREROUTE' --log-level debug
iptables -t mangle -I FORWARD -p icmp -j LOG --log-prefix 'iptable/mangle-FORWARD' --log-level debug
iptables -t filter -I FORWARD -p icmp -j LOG --log-prefix 'iptable/filter-FORWARD' --log-level debug
iptables -t mangle -I INPUT -p icmp -j LOG --log-prefix 'iptable/mangle-INPUT' --log-level debug
iptables -t filter -I INPUT -p icmp -j LOG --log-prefix 'iptable/filter-INPUT' --log-level debug
iptables -t raw -I OUTPUT -p icmp -j LOG --log-prefix 'iptable/raw-OUTPUT' --log-level debug
iptables -t mangle -I OUTPUT -p icmp -j LOG --log-prefix 'iptable/mangle-OUTPUT' --log-level debug
iptables -t nat -I OUTPUT -p icmp -j LOG --log-prefix 'iptable/nat-OUTPUT' --log-level debug
iptables -t filter -I OUTPUT -p icmp -j LOG --log-prefix 'iptable/filter-OUTPUT' --log-level debug
iptables -t mangle -I POSTROUTING -p icmp -j LOG --log-prefix 'iptable/mangle-POSTROUTE' --log-level debug
iptables -t nat -I POSTROUTING -p icmp -j LOG --log-prefix 'iptable/nat-POSTROUTE' --log-level debug
}
function delete(){
#### ebtables
ebtables -t broute -D BROUTING --log --log-prefix 'ctc/ebtable/broute-BROUTING' --log-level debug
ebtables -t nat -D PREROUTING --log --log-prefix 'ctc/ebtable/nat-PREROUTE' --log-level debug
ebtables -t nat -D OUTPUT --log --log-prefix 'ctc/ebtable/nat-OUTPUT' --log-level debug
ebtables -t nat -D POSTROUTING --log --log-prefix 'ctc/ebtable/nat-POSTROUTE' --log-level debug
ebtables -t filter -D INPUT --log --log-prefix 'ctc/ebtable/filter-input' --log-level debug
ebtables -t filter -D OUTPUT --log --log-prefix 'ctc/ebtable/filter-output' --log-level debug
ebtables -t filter -D FORWARD --log --log-prefix 'ctc/ebtable/filter-forward' --log-level debug
#### iptables
iptables -t raw -D PREROUTING -p icmp -j LOG --log-prefix 'iptable/raw-PREROUTE' --log-level debug
iptables -t mangle -D PREROUTING -p icmp -j LOG --log-prefix 'iptable/mangle-PREROUTE' --log-level debug
iptables -t nat -D PREROUTING -p icmp -j LOG --log-prefix 'iptable/nat-PREROUTE' --log-level debug
iptables -t mangle -D FORWARD -p icmp -j LOG --log-prefix 'iptable/mangle-FORWARD' --log-level debug
iptables -t filter -D FORWARD -p icmp -j LOG --log-prefix 'iptable/filter-FORWARD' --log-level debug
iptables -t mangle -D INPUT -p icmp -j LOG --log-prefix 'iptable/mangle-INPUT' --log-level debug
iptables -t filter -D INPUT -p icmp -j LOG --log-prefix 'iptable/filter-INPUT' --log-level debug
iptables -t raw -D OUTPUT -p icmp -j LOG --log-prefix 'iptable/raw-OUTPUT' --log-level debug
iptables -t mangle -D OUTPUT -p icmp -j LOG --log-prefix 'iptable/mangle-OUTPUT' --log-level debug
iptables -t nat -D OUTPUT -p icmp -j LOG --log-prefix 'iptable/nat-OUTPUT' --log-level debug
iptables -t filter -D OUTPUT -p icmp -j LOG --log-prefix 'iptable/filter-OUTPUT' --log-level debug
iptables -t mangle -D POSTROUTING -p icmp -j LOG --log-prefix 'iptable/mangle-POSTROUTE' --log-level debug
iptables -t nat -D POSTROUTING -p icmp -j LOG --log-prefix 'iptable/nat-POSTROUTE' --log-level debug
}
case $1 in
"insert")
insert
;;
"delete")
delete
;;
*)
exit 1
;;
esac
# 运行脚本
bash log.sh insert
Container to Container
# 启动container1
[root@boy ~]# docker run -it busybox sh
/ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
4: eth0@if5: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue
link/ether 02:42:ac:11:00:02 brd ff:ff:ff:ff:ff:ff
inet 172.17.0.2/16 brd 172.17.255.255 scope global eth0
valid_lft forever preferred_lft forever
# 启动container2
[root@boy ~]# docker run -it busybox sh
/ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
6: eth0@if7: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue
link/ether 02:42:ac:11:00:03 brd ff:ff:ff:ff:ff:ff
inet 172.17.0.3/16 brd 172.17.255.255 scope global eth0
valid_lft forever preferred_lft forever
# container1 ping container2 一次(提前ping了一次,并将ARP相关信息清除了)
/ # ping -c1 172.17.0.3
PING 172.17.0.3 (172.17.0.3): 56 data bytes
64 bytes from 172.17.0.3: seq=0 ttl=64 time=0.173 ms
--- 172.17.0.3 ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max = 0.173/0.173/0.173 ms
# 查看MAC信息
[root@boy ~]# ip link show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 00:0c:29:ec:1c:2d brd ff:ff:ff:ff:ff:ff
3: docker0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 02:42:ff:e7:b5:d2 brd ff:ff:ff:ff:ff:ff
5: veth694b3b7@if4: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT group default
link/ether ae:fa:af:36:61:5f brd ff:ff:ff:ff:ff:ff link-netnsid 0
7: veth8f77b15@if6: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT group default
link/ether 1e:05:0c:18:ae:26 brd ff:ff:ff:ff:ff:ff link-netnsid 1
[root@boy ~]# bridge fdb
01:00:5e:00:00:01 dev ens33 self permanent
33:33:00:00:00:01 dev ens33 self permanent
33:33:ff:b4:91:65 dev ens33 self permanent
33:33:00:00:00:01 dev docker0 self permanent
01:00:5e:00:00:01 dev docker0 self permanent
33:33:ff:e7:b5:d2 dev docker0 self permanent
02:42:ff:e7:b5:d2 dev docker0 vlan 1 master docker0 permanent
02:42:ff:e7:b5:d2 dev docker0 master docker0 permanent
ae:fa:af:36:61:5f dev veth694b3b7 master docker0 permanent
02:42:ac:11:00:02 dev veth694b3b7 master docker0
ae:fa:af:36:61:5f dev veth694b3b7 vlan 1 master docker0 permanent
33:33:00:00:00:01 dev veth694b3b7 self permanent
01:00:5e:00:00:01 dev veth694b3b7 self permanent
33:33:ff:36:61:5f dev veth694b3b7 self permanent
1e:05:0c:18:ae:26 dev veth8f77b15 vlan 1 master docker0 permanent
1e:05:0c:18:ae:26 dev veth8f77b15 master docker0 permanent
02:42:ac:11:00:03 dev veth8f77b15 master docker0
33:33:00:00:00:01 dev veth8f77b15 self permanent
01:00:5e:00:00:01 dev veth8f77b15 self permanent
33:33:ff:18:ae:26 dev veth8f77b15 self permanent
[root@boy ~]# brctl show
bridge name bridge id STP enabled interfaces
docker0 8000.0242ffe7b5d2 no veth694b3b7
veth8f77b15
[root@boy ~]# brctl showmacs docker0
port no mac addr is local? ageing timer
1 02:42:ac:11:00:02 no 229.76
2 02:42:ac:11:00:03 no 229.76
2 1e:05:0c:18:ae:26 yes 0.00
2 1e:05:0c:18:ae:26 yes 0.00
1 ae:fa:af:36:61:5f yes 0.00
1 ae:fa:af:36:61:5f yes 0.00
# 查看路由表
[root@boy ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.0.2 0.0.0.0 UG 100 0 0 ens33
172.17.0.0 0.0.0.0 255.255.0.0 U 0 0 0 docker0
192.168.0.0 0.0.0.0 255.255.255.0 U 100 0 0 ens33
# 查看过滤信息
[root@boy ~]# dmesg
####################################### ICMP REQUEST ###############################################
# MAC dest 已事先通过ARP学习到,此时container1将流量发送到本机网卡对端的veth(连在Docker0上),然后触发过滤
[ 3281.370286] ctc/ebtable/broute-BROUTING IN=veth694b3b7 OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ac:11:00:03 proto = 0x0800
[ 3281.370290] ctc/ebtable/nat-PREROUTE IN=veth694b3b7 OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ac:11:00:03 proto = 0x0800
# /proc/sys/net/bridge/bridge-nf-call-iptables 由于这个值为1,ebtables虽然是二层,但是会自己主动调iptables,此时container1将数据包转发给Docker0后,在iptables过滤中认为这个数据包是由Docker0进来的,由于还未进行routing,此时不知道数据包要转发到何处
[ 3281.370297] iptable/raw-PREROUTEIN=docker0 OUT= PHYSIN=veth694b3b7 MAC=02:42:ac:11:00:03:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.3 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=11201 DF PROTO=ICMP TYPE=8 CODE=0 ID=11 SEQ=0
[ 3281.370302] iptable/mangle-PREROUTEIN=docker0 OUT= PHYSIN=veth694b3b7 MAC=02:42:ac:11:00:03:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.3 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=11201 DF PROTO=ICMP TYPE=8 CODE=0 ID=11 SEQ=0
[ 3281.370305] iptable/nat-PREROUTEIN=docker0 OUT= PHYSIN=veth694b3b7 MAC=02:42:ac:11:00:03:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.3 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=11201 DF PROTO=ICMP TYPE=8 CODE=0 ID=11 SEQ=0
# 二层进行forward,转发给container2
[ 3281.370312] ctc/ebtable/filter-forward IN=veth694b3b7 OUT=veth8f77b15 MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ac:11:00:03 proto = 0x0800
# 在iptables中,这里认为IN OUT都是Docker0,需注意,PHYSIN=container1 PHYSIOUT=container2
[ 3281.370315] iptable/mangle-FORWARDIN=docker0 OUT=docker0 PHYSIN=veth694b3b7 PHYSOUT=veth8f77b15 MAC=02:42:ac:11:00:03:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.3 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=11201 DF PROTO=ICMP TYPE=8 CODE=0 ID=11 SEQ=0
[ 3281.370318] iptable/filter-FORWARDIN=docker0 OUT=docker0 PHYSIN=veth694b3b7 PHYSOUT=veth8f77b15 MAC=02:42:ac:11:00:03:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.3 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=11201 DF PROTO=ICMP TYPE=8 CODE=0 ID=11 SEQ=0
# 在ebtables POSTROUTING时,将IN删除了
[ 3281.370320] ctc/ebtable/nat-POSTROUTE IN= OUT=veth8f77b15 MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ac:11:00:03 proto = 0x0800
# 在iptables POSTROUTING时,将IN删除了
[ 3281.370322] iptable/mangle-POSTROUTEIN= OUT=docker0 PHYSIN=veth694b3b7 PHYSOUT=veth8f77b15 SRC=172.17.0.2 DST=172.17.0.3 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=11201 DF PROTO=ICMP TYPE=8 CODE=0 ID=11 SEQ=0
[ 3281.370324] iptable/nat-POSTROUTEIN= OUT=docker0 PHYSIN=veth694b3b7 PHYSOUT=veth8f77b15 SRC=172.17.0.2 DST=172.17.0.3 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=11201 DF PROTO=ICMP TYPE=8 CODE=0 ID=11 SEQ=0
########################################## ICMP REPLY ################################################
# REPLY过程和REQUEST过程几乎一样
[ 3281.370348] ctc/ebtable/broute-BROUTING IN=veth8f77b15 OUT= MAC source = 02:42:ac:11:00:03 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 3281.370349] ctc/ebtable/nat-PREROUTE IN=veth8f77b15 OUT= MAC source = 02:42:ac:11:00:03 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 3281.370352] iptable/raw-PREROUTEIN=docker0 OUT= PHYSIN=veth8f77b15 MAC=02:42:ac:11:00:02:02:42:ac:11:00:03:08:00 SRC=172.17.0.3 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=43632 PROTO=ICMP TYPE=0 CODE=0 ID=11 SEQ=0
[ 3281.370355] iptable/mangle-PREROUTEIN=docker0 OUT= PHYSIN=veth8f77b15 MAC=02:42:ac:11:00:02:02:42:ac:11:00:03:08:00 SRC=172.17.0.3 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=43632 PROTO=ICMP TYPE=0 CODE=0 ID=11 SEQ=0
#### 由于NAT PREROUTING在request时参与过滤了,系统留有缓存,此时数据并不会来这过滤,而是根据缓存进行DNAT转换(如果有的话)
[ 3281.370357] ctc/ebtable/filter-forward IN=veth8f77b15 OUT=veth694b3b7 MAC source = 02:42:ac:11:00:03 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 3281.370359] iptable/mangle-FORWARDIN=docker0 OUT=docker0 PHYSIN=veth8f77b15 PHYSOUT=veth694b3b7 MAC=02:42:ac:11:00:02:02:42:ac:11:00:03:08:00 SRC=172.17.0.3 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=43632 PROTO=ICMP TYPE=0 CODE=0 ID=11 SEQ=0
[ 3281.370362] iptable/filter-FORWARDIN=docker0 OUT=docker0 PHYSIN=veth8f77b15 PHYSOUT=veth694b3b7 MAC=02:42:ac:11:00:02:02:42:ac:11:00:03:08:00 SRC=172.17.0.3 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=43632 PROTO=ICMP TYPE=0 CODE=0 ID=11 SEQ=0
### 由于NAT POSTROUTING在request时参与过滤了,系统留有缓存,此时数据并不会来这过滤,而是根据缓存进行SNAT转换(如果有的话)
[ 3281.370363] ctc/ebtable/nat-POSTROUTE IN= OUT=veth694b3b7 MAC source = 02:42:ac:11:00:03 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 3281.370365] iptable/mangle-POSTROUTEIN= OUT=docker0 PHYSIN=veth8f77b15 PHYSOUT=veth694b3b7 SRC=172.17.0.3 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=43632 PROTO=ICMP TYPE=0 CODE=0 ID=11 SEQ=0
# 注:
在FORWARD时,iptables中的规则IN OUT都是Docker0
PING REQUEST流量流向图
PING REPLY流量流向图
- 回来的封包不会进到NAT TABLE,主要是这些封包被 Conntion Track(Conntrack) 给处理过了,接下来都不会进入NAT处理
- ebtables内会偷偷呼叫iptables来进行处理,这部分是个动态开关,可以透过/proc/sys/net/bridge/bridge-nf-call-iptables来告诉kernel不要呼叫iptables
Host to Container
# 启动container1
[root@boy ~]# docker run -it busybox sh
/ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
8: eth0@if9: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue
link/ether 02:42:ac:11:00:02 brd ff:ff:ff:ff:ff:ff
inet 172.17.0.2/16 brd 172.17.255.255 scope global eth0
valid_lft forever preferred_lft forever
# host ping container 一次(提前ping一次,并将ARP相关信息清除了)
[root@boy ~]# ping -c1 172.17.0.2
PING 172.17.0.2 (172.17.0.2) 56(84) bytes of data.
64 bytes from 172.17.0.2: icmp_seq=1 ttl=64 time=0.103 ms
--- 172.17.0.2 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.103/0.103/0.103/0.000 ms
# 查看MAC信息
[root@boy ~]# ip link show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 00:0c:29:ec:1c:2d brd ff:ff:ff:ff:ff:ff
3: docker0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 02:42:ff:e7:b5:d2 brd ff:ff:ff:ff:ff:ff
9: vethe9baedc@if8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT group default
link/ether 3e:94:a0:26:91:fa brd ff:ff:ff:ff:ff:ff link-netnsid 0
[root@boy ~]# bridge fdb
01:00:5e:00:00:01 dev ens33 self permanent
33:33:00:00:00:01 dev ens33 self permanent
33:33:ff:b4:91:65 dev ens33 self permanent
33:33:00:00:00:01 dev docker0 self permanent
01:00:5e:00:00:01 dev docker0 self permanent
33:33:ff:e7:b5:d2 dev docker0 self permanent
02:42:ff:e7:b5:d2 dev docker0 vlan 1 master docker0 permanent
02:42:ff:e7:b5:d2 dev docker0 master docker0 permanent
02:42:ac:11:00:02 dev vethe9baedc master docker0
3e:94:a0:26:91:fa dev vethe9baedc vlan 1 master docker0 permanent
3e:94:a0:26:91:fa dev vethe9baedc master docker0 permanent
33:33:00:00:00:01 dev vethe9baedc self permanent
01:00:5e:00:00:01 dev vethe9baedc self permanent
33:33:ff:26:91:fa dev vethe9baedc self permanent
[root@boy ~]# brctl show
bridge name bridge id STP enabled interfaces
docker0 8000.0242ffe7b5d2 no vethe9baedc
[root@boy ~]# brctl showmacs docker0
port no mac addr is local? ageing timer
1 02:42:ac:11:00:02 no 91.99
1 3e:94:a0:26:91:fa yes 0.00
1 3e:94:a0:26:91:fa yes 0.00
# 查看路由表
[root@boy ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.0.2 0.0.0.0 UG 100 0 0 ens33
172.17.0.0 0.0.0.0 255.255.0.0 U 0 0 0 docker0
192.168.0.0 0.0.0.0 255.255.255.0 U 100 0 0 ens33
################################# ICMP REQUEST #######################################################
# 根据路由表,可以知道该数据包需要从docke0转发出去,内部构造的流量并没有设置IN=xx
[ 6717.619904] iptable/raw-OUTPUTIN= OUT=docker0 SRC=172.17.0.1 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=23020 DF PROTO=ICMP TYPE=8 CODE=0 ID=2413 SEQ=1
[ 6717.619914] iptable/mangle-OUTPUTIN= OUT=docker0 SRC=172.17.0.1 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=23020 DF PROTO=ICMP TYPE=8 CODE=0 ID=2413 SEQ=1
[ 6717.619917] iptable/nat-OUTPUTIN= OUT=docker0 SRC=172.17.0.1 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=23020 DF PROTO=ICMP TYPE=8 CODE=0 ID=2413 SEQ=1
[ 6717.619920] iptable/filter-OUTPUTIN= OUT=docker0 SRC=172.17.0.1 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=23020 DF PROTO=ICMP TYPE=8 CODE=0 ID=2413 SEQ=1
# 这个可以做SNAT,如果有需要的话
[ 6717.619922] iptable/mangle-POSTROUTEIN= OUT=docker0 SRC=172.17.0.1 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=23020 DF PROTO=ICMP TYPE=8 CODE=0 ID=2413 SEQ=1
[ 6717.619924] iptable/nat-POSTROUTEIN= OUT=docker0 SRC=172.17.0.1 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=23020 DF PROTO=ICMP TYPE=8 CODE=0 ID=2413 SEQ=1
[ 6717.619954] ctc/ebtable/nat-OUTPUT IN= OUT=vethe9baedc MAC source = 02:42:ff:e7:b5:d2 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 6717.619957] ctc/ebtable/filter-output IN= OUT=vethe9baedc MAC source = 02:42:ff:e7:b5:d2 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 6717.619958] ctc/ebtable/nat-POSTROUTE IN= OUT=vethe9baedc MAC source = 02:42:ff:e7:b5:d2 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
################################## ICMP REPLAY #######################################################
[ 6717.619996] ctc/ebtable/broute-BROUTING IN=vethe9baedc OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ff:e7:b5:d2 proto = 0x0800
# MAC dest 02:42:ff:e7:b5:d2,为本机地址(容器的MAC由于NAMESPACE隔离,对于物理机来说是不可见的),所以不会进行FORWARD,所以OUT会为空
[ 6717.619998] ctc/ebtable/nat-PREROUTE IN=vethe9baedc OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ff:e7:b5:d2 proto = 0x0800
[ 6717.620002] iptable/raw-PREROUTEIN=docker0 OUT= PHYSIN=vethe9baedc MAC=02:42:ff:e7:b5:d2:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.1 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=46632 PROTO=ICMP TYPE=0 CODE=0 ID=2413 SEQ=1
# 由于DST 172.17.0.1 为本机IP,故而不会进行FORWARD
[ 6717.620005] iptable/mangle-PREROUTEIN=docker0 OUT= PHYSIN=vethe9baedc MAC=02:42:ff:e7:b5:d2:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.1 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=46632 PROTO=ICMP TYPE=0 CODE=0 ID=2413 SEQ=1
# 由于DST 172.17.0.1为本机地址,此时不会进行FORWARD,即不会被转发出去,所以OUT为空
[ 6717.620007] ctc/ebtable/filter-input IN=vethe9baedc OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ff:e7:b5:d2 proto = 0x0800
[ 6717.620011] iptable/mangle-INPUTIN=docker0 OUT= PHYSIN=vethe9baedc MAC=02:42:ff:e7:b5:d2:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.1 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=46632 PROTO=ICMP TYPE=0 CODE=0 ID=2413 SEQ=1
[ 6717.620014] iptable/filter-INPUTIN=docker0 OUT= PHYSIN=vethe9baedc MAC=02:42:ff:e7:b5:d2:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=172.17.0.1 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=46632 PROTO=ICMP TYPE=0 CODE=0 ID=2413 SEQ=1
PING REQUEST流量流向图
PING REPLY流量流向图
Container to WAN
# 启动container1
[root@boy ~]# docker run -it busybox:1.28 sh
/ # ping -c1 114.114.114.114
PING 114.114.114.114 (114.114.114.114): 56 data bytes
64 bytes from 114.114.114.114: seq=0 ttl=127 time=26.485 ms
--- 114.114.114.114 ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max = 26.485/26.485/26.485 ms
# 查看MAC信息
[root@boy ~]# ip link show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 00:0c:29:ec:1c:2d brd ff:ff:ff:ff:ff:ff
3: docker0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 02:42:ff:e7:b5:d2 brd ff:ff:ff:ff:ff:ff
17: veth67de56d@if16: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT group default
link/ether ba:97:f0:e8:ea:0b brd ff:ff:ff:ff:ff:ff link-netnsid 0
[root@boy ~]# bridge fdb
01:00:5e:00:00:01 dev ens33 self permanent
33:33:00:00:00:01 dev ens33 self permanent
33:33:ff:b4:91:65 dev ens33 self permanent
33:33:00:00:00:01 dev docker0 self permanent
01:00:5e:00:00:01 dev docker0 self permanent
33:33:ff:e7:b5:d2 dev docker0 self permanent
02:42:ff:e7:b5:d2 dev docker0 vlan 1 master docker0 permanent
02:42:ff:e7:b5:d2 dev docker0 master docker0 permanent
ba:97:f0:e8:ea:0b dev veth67de56d vlan 1 master docker0 permanent
02:42:ac:11:00:02 dev veth67de56d master docker0
ba:97:f0:e8:ea:0b dev veth67de56d master docker0 permanent
33:33:00:00:00:01 dev veth67de56d self permanent
01:00:5e:00:00:01 dev veth67de56d self permanent
33:33:ff:e8:ea:0b dev veth67de56d self permanent
[root@boy ~]# brctl show
bridge name bridge id STP enabled interfaces
docker0 8000.0242ffe7b5d2 no veth67de56d
[root@boy ~]# brctl showmacs docker0
port no mac addr is local? ageing timer
1 02:42:ac:11:00:02 no 75.02
1 ba:97:f0:e8:ea:0b yes 0.00
1 ba:97:f0:e8:ea:0b yes 0.00
# 查看路由表
[root@boy ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.0.2 0.0.0.0 UG 100 0 0 ens33
172.17.0.0 0.0.0.0 255.255.0.0 U 0 0 0 docker0
192.168.0.0 0.0.0.0 255.255.255.0 U 100 0 0 ens33
###################################### ICMP REQUEST ###################################################
[ 8712.864782] ctc/ebtable/broute-BROUTING IN=veth67de56d OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ff:e7:b5:d2 proto = 0x0800
[ 8712.864787] ctc/ebtable/nat-PREROUTE IN=veth67de56d OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ff:e7:b5:d2 proto = 0x0800
[ 8712.864815] iptable/raw-PREROUTEIN=docker0 OUT= PHYSIN=veth67de56d MAC=02:42:ff:e7:b5:d2:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=114.114.114.114 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=58746 DF PROTO=ICMP TYPE=8 CODE=0 ID=2048 SEQ=0
[ 8712.864822] iptable/mangle-PREROUTEIN=docker0 OUT= PHYSIN=veth67de56d MAC=02:42:ff:e7:b5:d2:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=114.114.114.114 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=58746 DF PROTO=ICMP TYPE=8 CODE=0 ID=2048 SEQ=0
[ 8712.864826] iptable/nat-PREROUTEIN=docker0 OUT= PHYSIN=veth67de56d MAC=02:42:ff:e7:b5:d2:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=114.114.114.114 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=58746 DF PROTO=ICMP TYPE=8 CODE=0 ID=2048 SEQ=0
# 由于ping的网段不属于contaienr网段,它默认会将数据包发送给gateway: Docker0,此时目的MAC需要填写网关的MAC
[ 8712.864833] ctc/ebtable/filter-input IN=veth67de56d OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ff:e7:b5:d2 proto = 0x0800
# 由于114.114.114.114和192.168.0.10不在同一个网段,需要转发给网关,即需要从ens33出去
[ 8712.864842] iptable/mangle-FORWARDIN=docker0 OUT=ens33 PHYSIN=veth67de56d MAC=02:42:ff:e7:b5:d2:02:42:ac:11:00:02:08:00 SRC=172.17.0.2 DST=114.114.114.114 LEN=84 TOS=0x00 PREC=0x00 TTL=63 ID=58746 DF PROTO=ICMP TYPE=8 CODE=0 ID=2048 SEQ=0
# 在出去的时候把IN去掉了,此时还有一个MASQUERADE(根据OUT自动将SRC修改为其IP),将SRC源地址修改为:192.168.0.10
[ 8712.864845] iptable/mangle-POSTROUTEIN= OUT=ens33 PHYSIN=veth67de56d SRC=172.17.0.2 DST=114.114.114.114 LEN=84 TOS=0x00 PREC=0x00 TTL=63 ID=58746 DF PROTO=ICMP TYPE=8 CODE=0 ID=2048 SEQ=0
######################################### ICMP REPLY ####################################################
# 由于是ens33收到的包,不是bridge收到的包,前面不会进行ebtables过滤
[ 8712.913251] iptable/raw-PREROUTEIN=ens33 OUT= MAC=00:0c:29:ec:1c:2d:00:50:56:f7:bd:36:08:00 SRC=114.114.114.114 DST=192.168.0.10 LEN=84 TOS=0x00 PREC=0x00 TTL=128 ID=62292 PROTO=ICMP TYPE=0 CODE=0 ID=2048 SEQ=0
[ 8712.913286] iptable/mangle-PREROUTEIN=ens33 OUT= MAC=00:0c:29:ec:1c:2d:00:50:56:f7:bd:36:08:00 SRC=114.114.114.114 DST=192.168.0.10 LEN=84 TOS=0x00 PREC=0x00 TTL=128 ID=62292 PROTO=ICMP TYPE=0 CODE=0 ID=2048 SEQ=0
# 个人理解:这里由于conntrack导致不会经过NAT PREROUTING过滤,之前是由于MASQUERADE将源地址从172.17.0.2->192.168.0.10,现在这一次连接需要将目的IP从192.168.0.10 -> 172.17.0.2,根据路由表可知,需要转发给Docker0
[ 8712.913303] iptable/mangle-FORWARDIN=ens33 OUT=docker0 MAC=00:0c:29:ec:1c:2d:00:50:56:f7:bd:36:08:00 SRC=114.114.114.114 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=127 ID=62292 PROTO=ICMP TYPE=0 CODE=0 ID=2048 SEQ=0
[ 8712.913313] iptable/mangle-POSTROUTEIN= OUT=docker0 SRC=114.114.114.114 DST=172.17.0.2 LEN=84 TOS=0x00 PREC=0x00 TTL=127 ID=62292 PROTO=ICMP TYPE=0 CODE=0 ID=2048 SEQ=0
[ 8712.913352] ctc/ebtable/nat-OUTPUT IN= OUT=veth67de56d MAC source = 02:42:ff:e7:b5:d2 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 8712.913358] ctc/ebtable/filter-output IN= OUT=veth67de56d MAC source = 02:42:ff:e7:b5:d2 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 8712.913361] ctc/ebtable/nat-POSTROUTE IN= OUT=veth67de56d MAC source = 02:42:ff:e7:b5:d2 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
PING REQUEST流量流向图
PING REPLY流量流向图