https://github.com/coreos/flannel
源码编译:make dist/flanneld-amd64
Flannel支持多种Backend协议,官方推荐使用以下Backend:
- VXLAN,性能损耗大概在20~30%
- host-gw, 性能损耗大概10%,要求Host之间二层直连,因此只适用于小集群
Flannel的工作原理
Flannel实质上是一种“覆盖网络(overlay network)”,也就是将TCP数据包装在另一种网络包里面进行路由转发和通信,目前已经支持UDP、VxLAN、AWS VPC和GCE路由等数据转发方式。 默认的节点间数据通信方式是UDP转发,
- 数据从源容器中发出后,经由所在主机的Docker0虚拟网卡转发到flannel0虚拟网卡,这是个P2P的虚拟网卡,flanneld服务监听在网卡的另外一端。
- Flannel通过Etcd服务维护了一张节点间的路由
- 源主机的flanneld服务将原本的数据内容UDP封装后根据自己的路由表投递给目的节点的flanneld服务,数据到达以后被解包,然后直 接进入目的节点的flannel0虚拟网卡,然后被转发到目的主机的Docker0虚拟网卡,最后就像本机容器通信一下的有Docker0路由到达目标容 器。
flannel支持常用的是UPD,直接封装到udp,这种网络模式损耗非常大50%左右,封装vxlan也有30%左右的损耗
host-gw通过路由的方式,类似于calico不使用BGP。这种网络损耗最少。4.2内核加入的ipvlan性能应该是最好的,本质是网卡多个IP。
1、main函数
(1.1)
- 找到使用哪个interface,参数iface明确指定,参数iface-regex模糊匹配
- 如果没有指定则使用默认的,调用GetDefaultGatewayIface()函数,如果Destination为0.0.0.0/0,则使用改网卡
- 如果指定或者模糊匹配网卡,则取出IP地址
// Work out which interface to use
var extIface *backend.ExternalInterface
var err error
// Check the default interface only if no interfaces are specified
if len(opts.iface) == 0 && len(opts.ifaceRegex) == 0 {
extIface, err = LookupExtIface("", "")
if err != nil {
log.Error("Failed to find any valid interface to use: ", err)
os.Exit(1)
}
} else {
// Check explicitly specified interfaces
for _, iface := range opts.iface {
extIface, err = LookupExtIface(iface, "")
if err != nil {
log.Infof("Could not find valid interface matching %s: %s", iface, err)
}
if extIface != nil {
break
}
}
// Check interfaces that match any specified regexes
if extIface == nil {
for _, ifaceRegex := range opts.ifaceRegex {
extIface, err = LookupExtIface("", ifaceRegex)
if err != nil {
log.Infof("Could not find valid interface matching %s: %s", ifaceRegex, err)
}
if extIface != nil {
break
}
}
}
}
(1.2)newSubnetManager函数
- newSubnetManager函数创建子网管理,如没设置kubernetes子网管理,则使用etcd管理子网
sm, err := newSubnetManager()
if err != nil {
log.Error("Failed to create SubnetManager: ", err)
os.Exit(1)
}
log.Infof("Created subnet manager: %s", sm.Name())
(1.3)
- 使用信号量
// Register for SIGINT and SIGTERM
log.Info("Installing signal handlers")
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, os.Interrupt, syscall.SIGTERM)
(1.4)getConfig函数
- getConfig函数获取网络配置,使用etcd管理子网,最终调用etcd接口读取配置
func getConfig(ctx context.Context, sm subnet.Manager) (*subnet.Config, error) {
// Retry every second until it succeeds
for {
config, err := sm.GetNetworkConfig(ctx)
if err != nil {
log.Errorf("Couldn't fetch network config: %s", err)
} else if config == nil {
log.Warningf("Couldn't find network config: %s", err)
} else {
log.Infof("Found network config - Backend type: %s", config.BackendType)
return config, nil
}
select {
case <-ctx.Done():
return nil, errCanceled
case <-time.After(1 * time.Second):
fmt.Println("timed out")
}
}
}
(1.5)
- 创建后端管理,以及注册网络
- 每一個後端都調用init函數註冊到var constructors = make(map[string]BackendCtor)這個map中,則使用host-gw爲例
// Create a backend manager then use it to create the backend and register the network with it.
bm := backend.NewManager(ctx, sm, extIface)
be, err := bm.GetBackend(config.BackendType)
if err != nil {
log.Errorf("Error fetching backend: %s", err)
cancel()
wg.Wait()
os.Exit(1)
}
bn, err := be.RegisterNetwork(ctx, wg, config)
if err != nil {
log.Errorf("Error registering network: %s", err)
cancel()
wg.Wait()
os.Exit(1)
}
(1.6) RegisterNetwork函數
- RegisterNetwork設置路由
- AcquireLease函數,調用etcd讀取subnet路徑,就是租用一個子網
func (be *HostgwBackend) RegisterNetwork(ctx context.Context, wg sync.WaitGroup, config *subnet.Config) (backend.Network, error) {
n := &backend.RouteNetwork{
SimpleNetwork: backend.SimpleNetwork{
ExtIface: be.extIface,
},
SM: be.sm,
BackendType: "host-gw",
Mtu: be.extIface.Iface.MTU,
LinkIndex: be.extIface.Iface.Index,
}
n.GetRoute = func(lease *subnet.Lease) *netlink.Route {
return &netlink.Route{
Dst: lease.Subnet.ToIPNet(),
Gw: lease.Attrs.PublicIP.ToIP(),
LinkIndex: n.LinkIndex,
}
}
attrs := subnet.LeaseAttrs{
PublicIP: ip.FromIP(be.extIface.ExtAddr),
BackendType: "host-gw",
}
l, err := be.sm.AcquireLease(ctx, &attrs)
switch err {
case nil:
n.SubnetLease = l
case context.Canceled, context.DeadlineExceeded:
return nil, err
default:
return nil, fmt.Errorf("failed to acquire lease: %v", err)
}
return n, nil
}
(1.7)
- 建立iptables规则
// Set up ipMasq if needed
if opts.ipMasq {
if err = recycleIPTables(config.Network, bn.Lease()); err != nil {
log.Errorf("Failed to recycle IPTables rules, %v", err)
cancel()
wg.Wait()
os.Exit(1)
}
go network.SetupAndEnsureIPTables(network.MasqRules(config.Network, bn.Lease()), opts.iptablesResyncSeconds)
}
(1.8) WriteSubnetFile函数
- 将FLANNEL_NETWORK FLANNEL_SUBNET FLANNEL_MTU FLANNEL_IPMASQ写入文件
if err := WriteSubnetFile(opts.subnetFile, config.Network, opts.ipMasq, bn); err != nil {
// Continue, even though it failed.
log.Warningf("Failed to write subnet file: %s", err)
} else {
log.Infof("Wrote subnet file to %s", opts.subnetFile)
}
(1.9) Run函数
- Run函数第2章节讲解
log.Info("Running backend.")
wg.Add(1)
go func() {
bn.Run(ctx)
wg.Done()
}()
2、Run函数
根据host-gw模式,则调用到route_network.go中
func (n *RouteNetwork) Run(ctx context.Context) {
wg := sync.WaitGroup{}
log.Info("Watching for new subnet leases")
evts := make(chan []subnet.Event)
wg.Add(1)
go func() {
subnet.WatchLeases(ctx, n.SM, n.SubnetLease, evts)
wg.Done()
}()
n.routes = make([]netlink.Route, 0, 10)
wg.Add(1)
go func() {
n.routeCheck(ctx)
wg.Done()
}()
defer wg.Wait()
for {
select {
case evtBatch := <-evts:
n.handleSubnetEvents(evtBatch)
case <-ctx.Done():
return
}
}
}
(2.1) WatchLeases函数
- WatchLeases函数最终调用etcd,放入evts channel事件里
func WatchLeases(ctx context.Context, sm Manager, ownLease *Lease, receiver chan []Event) {
lw := &leaseWatcher{
ownLease: ownLease,
}
var cursor interface{}
for {
res, err := sm.WatchLeases(ctx, cursor)
if err != nil {
if err == context.Canceled || err == context.DeadlineExceeded {
return
}
log.Errorf("Watch subnets: %v", err)
time.Sleep(time.Second)
continue
}
cursor = res.Cursor
var batch []Event
if len(res.Events) > 0 {
batch = lw.update(res.Events)
} else {
batch = lw.reset(res.Snapshot)
}
if len(batch) > 0 {
receiver <- batch
}
}
}
(2.2) handleSubnetEvents函数
讲解EventAdded这种类型,代码比较长,分节讲解
(2.2.1) netlink.RouteListFiltered函数
- 主要是整一个待添加的路由列表
n.addToRouteList(*route)
// Check if route exists before attempting to add it
routeList, err := netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{Dst: route.Dst}, netlink.RT_FILTER_DST)
if err != nil {
log.Warningf("Unable to list routes: %v", err)
}
(2.2.2)
- 如果目的地址一样但网关不一样,删了以便下面代码可以重建
if len(routeList) > 0 && !routeEqual(routeList[0], *route) {
// Same Dst different Gw or different link index. Remove it, correct route will be added below.
log.Warningf("Replacing existing route to %v via %v dev index %d with %v via %v dev index %d.", evt.Lease.Subnet, routeList[0].Gw, routeList[0].LinkIndex, evt.Lease.Subnet, evt.Lease.Attrs.PublicIP, route.LinkIndex)
if err := netlink.RouteDel(&routeList[0]); err != nil {
log.Errorf("Error deleting route to %v: %v", evt.Lease.Subnet, err)
continue
}
n.removeFromRouteList(routeList[0])
}
(2.2.3)
- 如果已经有记录则不做操作
- 如果没有这条路由则通过RouteAdd添加到本地路由表中
if len(routeList) > 0 && routeEqual(routeList[0], *route) {
// Same Dst and same Gw, keep it and do not attempt to add it.
log.Infof("Route to %v via %v dev index %d already exists, skipping.", evt.Lease.Subnet, evt.Lease.Attrs.PublicIP, routeList[0].LinkIndex)
} else if err := netlink.RouteAdd(route); err != nil {
log.Errorf("Error adding route to %v via %v dev index %d: %v", evt.Lease.Subnet, evt.Lease.Attrs.PublicIP, route.LinkIndex, err)
continue
}