https://github.com/projectcalico/cni-plugin,release-v3.8
calico 解决不同物理机上容器之间的通信,而 calico-plugin 是在 k8s 创建 Pod 时为 Pod 设置虚拟网卡(容器中的 eth0
和 lo
网卡),calico-plugin 是由两个静态的二进制文件组成,由 kubelet 以命令行的形式调用,这两个二进制的作用如下:
- calico-ipam:分配维护IP,依赖etcd
- calico:系统调用API来修改namespace中的网卡信息
calico插件配置
# cat /etc/cni/net.d/10-calico.conf
{
"name": "calico-k8s-network",
"cniVersion": "0.1.0",
"type": "calico",
"etcd_endpoints": "https://node1:2379,https://node2:2379,https://node3:2379",
"etcd_key_file": "/etc/calico/ssl/calico-key.pem",
"etcd_cert_file": "/etc/calico/ssl/calico.pem",
"etcd_ca_cert_file": "/etc/calico/ssl/ca.pem",
"log_level": "info",
"mtu": 1500,
"ipam": {
"type": "calico-ipam"
},
"policy": {
"type": "k8s"
},
"kubernetes": {
"kubeconfig": "/root/.kube/config"
}
}
{
"name": "k8s-pod-network",
"cniVersion": "0.3.0",
"plugins": [
{
"type": "calico",
"log_level": "info",
"datastore_type": "kubernetes",
"nodename": "master-node",
"mtu": 1440,
"ipam": {
"type": "calico-ipam"
},
"policy": {
"type": "k8s"
},
"kubernetes": {
"kubeconfig": "/etc/cni/net.d/calico-kubeconfig"
}
},
{
"type": "portmap",
"snat": true,
"capabilities": {"portMappings": true}
}
]
}
calico-plugin工作原理
kubelet在创建一个Pod,首先启动pause容器,然后为pause容器添加设置网络,也就是添加网卡,这里会通过CNI调起文件系统中的/opt/cni/bin/calico
,并将Pod信息通过标准输入(stdin)传递给 calico 进程,calico 通过修改系统中 Namespace
1、main函数
- 与其他plugin代码一样的套路,主要调用skel.PluginMain函数,主要分析cmdAdd函数
func main() {
// Set up logging formatting.
logrus.SetFormatter(&logutils.Formatter{})
// Install a hook that adds file/line no information.
logrus.AddHook(&logutils.ContextHook{})
// Display the version on "-v", otherwise just delegate to the skel code.
// Use a new flag set so as not to conflict with existing libraries which use "flag"
flagSet := flag.NewFlagSet("Calico", flag.ExitOnError)
version := flagSet.Bool("v", false, "Display version")
err := flagSet.Parse(os.Args[1:])
if err != nil {
fmt.Println(err)
os.Exit(1)
}
if *version {
fmt.Println(VERSION)
os.Exit(0)
}
if err := utils.AddIgnoreUnknownArgs(); err != nil {
os.Exit(1)
}
skel.PluginMain(cmdAdd, cmdDel, cniSpecVersion.All)
}
2、cmdAdd函数
从标准输入输出读取配置信息,获取 nodename
func cmdAdd(args *skel.CmdArgs) error {
// Unmarshal the network config, and perform validation
conf := types.NetConf{}
if err := json.Unmarshal(args.StdinData, &conf); err != nil {
return fmt.Errorf("failed to load netconf: %v", err)
}
utils.ConfigureLogging(conf.LogLevel)
if !conf.NodenameFileOptional {
// Configured to wait for the nodename file - don't start until it exists.
if _, err := os.Stat("/var/lib/calico/nodename"); err != nil {
s := "%s: check that the calico/node container is running and has mounted /var/lib/calico/"
return fmt.Errorf(s, err)
}
logrus.Debug("/var/lib/calico/nodename exists")
}
// Determine which node name to use.
nodename := utils.DetermineNodename(conf)
2.1 CreateClient 函数
配置从标准输入输出传入,解析一大堆 etcd 配置参数,kube config 配置文件,后端存储 type等
loadClientConfig 函数 load 客户端配置如果指定了文件,默认未制定文件则从环境变量读取配置
// LoadClientConfig loads the ClientConfig from the specified file (if specified)
// or from environment variables (if the file is not specified).
func LoadClientConfig(filename string) (*CalicoAPIConfig, error) {
// Override / merge with values loaded from the specified file.
if filename != "" {
b, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
c, err := LoadClientConfigFromBytes(b)
if err != nil {
return nil, fmt.Errorf("syntax error in %s: %v", filename, err)
}
return c, nil
}
return LoadClientConfigFromEnvironment()
}
2.1.1 创建一个客户端连接,可以为 etcd 或者 kubernetes
// New returns a connected client. The ClientConfig can either be created explicitly,
// or can be loaded from a config file or environment variables using the LoadClientConfig() function.
func New(config apiconfig.CalicoAPIConfig) (Interface, error) {
be, err := backend.NewClient(config)
if err != nil {
return nil, err
}
return client{
config: config,
backend: be,
resources: &resources{backend: be},
}, nil
}
2.1.2 NewClient 创建客户端连接,使用 etcdv3 或者 kubernetes
// NewClient creates a new backend datastore client.
func NewClient(config apiconfig.CalicoAPIConfig) (c bapi.Client, err error) {
log.Debugf("Using datastore type '%s'", config.Spec.DatastoreType)
switch config.Spec.DatastoreType {
case apiconfig.EtcdV3:
c, err = etcdv3.NewEtcdV3Client(&config.Spec.EtcdConfig)
case apiconfig.Kubernetes:
c, err = k8s.NewKubeClient(&config.Spec)
default:
err = errors.New(fmt.Sprintf("Unknown datastore type: %v",
config.Spec.DatastoreType))
}
return
}
2.2 查询 default 的 ClusterInformation 资源,而且 datastoreReady 值必须为 true
apiVersion: crd.projectcalico.org/v1
kind: ClusterInformation
metadata:
annotations:
projectcalico.org/metadata: '{"uid":"f87c290b-b0f5-11e9-8106-080027603363","creationTimestamp":"2019-07-28T05:10:09Z"}'
creationTimestamp: "2019-07-28T05:10:09Z"
generation: 1
name: default
resourceVersion: "2444411"
selfLink: /apis/crd.projectcalico.org/v1/clusterinformations/default
uid: f87c8c27-b0f5-11e9-a59b-080027603363
spec:
calicoVersion: v3.7.4
clusterGUID: 063ddadf474343059e1e446326172d97
clusterType: k8s,bgp,kdd
datastoreReady: true
ctx := context.Background()
ci, err := calicoClient.ClusterInformation().Get(ctx, "default", options.GetOptions{})
if err != nil {
return fmt.Errorf("error getting ClusterInformation: %v", err)
}
if *ci.Spec.DatastoreReady != true {
logrus.Info("Upgrade may be in progress, ready flag is not set")
return fmt.Errorf("Calico is currently not ready to process requests")
}
2.3 如果查询 endpoints,namespace name 都匹配则使用原来的 name
if len(endpoints.Items) > 0 {
logger.Debugf("List of WorkloadEndpoints %v", endpoints.Items)
for _, ep := range endpoints.Items {
match, err := wepIDs.WorkloadEndpointIdentifiers.NameMatches(ep.Name)
if err != nil {
// We should never hit this error, because it should have already been
// caught by CalculateWorkloadEndpointName.
return fmt.Errorf("invalid WorkloadEndpoint identifiers: %v", wepIDs.WorkloadEndpointIdentifiers)
}
if match {
logger.Debugf("Found a match for WorkloadEndpoint: %v", ep)
endpoint = &ep
// Assign the WEP name to wepIDs' WEPName field.
wepIDs.WEPName = endpoint.Name
// Put the endpoint name from the matched WEP in the identifiers.
wepIDs.Endpoint = ep.Spec.Endpoint
logger.Infof("Calico CNI found existing endpoint: %v", endpoint)
break
}
}
}
3 k8s.CmdAddK8s函数
// CmdAddK8s performs the "ADD" operation on a kubernetes pod
// Having kubernetes code in its own file avoids polluting the mainline code. It's expected that the kubernetes case will
// more special casing than the mainline code.
func CmdAddK8s(ctx context.Context, args *skel.CmdArgs, conf types.NetConf, epIDs utils.WEPIdentifiers, calicoClient calicoclient.Interface, endpoint *api.WorkloadEndpoint) (*current.Result, error) {
var err error
var result *current.Result
utils.ConfigureLogging(conf.LogLevel)
logger := logrus.WithFields(logrus.Fields{
"WorkloadEndpoint": epIDs.WEPName,
"ContainerID": epIDs.ContainerID,
"Pod": epIDs.Pod,
"Namespace": epIDs.Namespace,
})
3.1 IPAM 类型为 host-local
提取配置参数,包括 ipam route,如果未设置路由则使用默认路由,0.0.0.0/0
3.2 policy type 为 k8s
getK8sNSInfo 提取 namespace 的注解
getK8sPodInfo 提取 pod 的 label 注解 ports profiles
label | |
label | |
label | |
注解 | |
// Only attempt to fetch the labels and annotations from Kubernetes
// if the policy type has been set to "k8s". This allows users to
// run the plugin under Kubernetes without needing it to access the
// Kubernetes API
if conf.Policy.PolicyType == "k8s" {
var err error
annotNS, err = getK8sNSInfo(client, epIDs.Namespace)
if err != nil {
return nil, err
}
logger.WithField("NS Annotations", annotNS).Debug("Fetched K8s namespace annotations")
labels, annot, ports, profiles, generateName, err = getK8sPodInfo(client, epIDs.Pod, epIDs.Namespace)
3.3 如果 IPAM 类型为 calico-ipam 插件
提取注解中的 ippool,根据 key 为 ipv4 cni.projectcalico.org/ipv4pools,ipv6 为 cni.projectcalico.org/ipv6pools
// Check for calico IPAM specific annotations and set them if needed.
if conf.IPAM.Type == "calico-ipam" {
var v4pools, v6pools string
// Sets the Namespace annotation for IP pools as default
v4pools = annotNS["cni.projectcalico.org/ipv4pools"]
v6pools = annotNS["cni.projectcalico.org/ipv6pools"]
// Gets the POD annotation for IP Pools and overwrites Namespace annotation if it exists
v4poolpod := annot["cni.projectcalico.org/ipv4pools"]
if len(v4poolpod) != 0 {
v4pools = v4poolpod
}
v6poolpod := annot["cni.projectcalico.org/ipv6pools"]
if len(v6poolpod) != 0 {
v6pools = v6poolpod
}
3.4 如果注解中存在 ippool,则设置 ipam 地址池
if len(v4pools) > 0 {
if err := json.Unmarshal([]byte(v4pools), &v4PoolSlice); err != nil {
logger.WithField("IPv4Pool", v4pools).Error("Error parsing IPv4 IPPools")
return nil, err
}
if _, ok := stdinData["ipam"].(map[string]interface{}); !ok {
logger.Fatal("Error asserting stdinData type")
os.Exit(0)
}
stdinData["ipam"].(map[string]interface{})["ipv4_pools"] = v4PoolSlice
logger.WithField("ipv4_pools", v4pools).Debug("Setting IPv4 Pools")
}
3.5 提取注解信息,主要是提供固定 IP
cni.projectcalico.org/ipAddrs
:指定一个要分配给 Pod 的 IPv4和/ 或 IPv6 地址列表。 请求的 IP 地址将从 Calico IPAM 分配,并且必须存在于已配置的 IP pool 中
cni.projectcalico.org/ipAddrsNoIpam: 指定一个要分配给 Pod 的 IPv4 和/或 IPv6 地址列表,绕过 IPAM。 任何 IP 冲突和路由配置都必须由人工或其他系统处理。 Calico 仅处理那些属于 Calico IP pool 中的 IP 地址,将其路由分发到 Pod。 如果分配的 IP 地址不在 Calico IP pool 中,则须确保其他机制正确地处理该IP地址的路由
ipAddrsNoIpam := annot["cni.projectcalico.org/ipAddrsNoIpam"]
ipAddrs := annot["cni.projectcalico.org/ipAddrs"]
3.6 如果没有指定 IP,则调用 ipam.Exec 申请 IP 地址
cni.projectcalico.org/ipAddrs 和
cni.projectcalico.org/ipAddrsNoIpam 不能同时设置
// Switch based on which annotations are passed or not passed.
switch {
case ipAddrs == "" && ipAddrsNoIpam == "":
// Call the IPAM plugin.
result, err = utils.AddIPAM(conf, args, logger)
if err != nil {
return nil, err
}
case ipAddrs != "" && ipAddrsNoIpam != "":
// Can't have both ipAddrs and ipAddrsNoIpam annotations at the same time.
e := fmt.Errorf("can't have both annotations: 'ipAddrs' and 'ipAddrsNoIpam' in use at the same time")
logger.Error(e)
return nil, e
3.7 绕过 ipam 情况
调用 overrideIPAMResult 函数,简单的验证 ip 合法性,直接返回设置的 IP 地址
case ipAddrsNoIpam != "":
// Validate that we're allowed to use this feature.
if conf.IPAM.Type != "calico-ipam" {
e := fmt.Errorf("ipAddrsNoIpam is not compatible with configured IPAM: %s", conf.IPAM.Type)
logger.Error(e)
return nil, e
}
if !conf.FeatureControl.IPAddrsNoIpam {
e := fmt.Errorf("requested feature is not enabled: ip_addrs_no_ipam")
logger.Error(e)
return nil, e
}
// ipAddrsNoIpam annotation is set so bypass IPAM, and set the IPs manually.
overriddenResult, err := overrideIPAMResult(ipAddrsNoIpam, logger)
if err != nil {
return nil, err
}
logger.Debugf("Bypassing IPAM to set the result to: %+v", overriddenResult)
// Convert overridden IPAM result into current Result.
// This method fill in all the empty fields necessory for CNI output according to spec.
result, err = current.NewResultFromResult(overriddenResult)
if err != nil {
return nil, err
}
if len(result.IPs) == 0 {
return nil, errors.New("failed to build result")
}
3.8 指定 IP 地址的情况
如果 endpoint 已经存在时,释放先前的 IP
case ipAddrs != "":
// Validate that we're allowed to use this feature.
if conf.IPAM.Type != "calico-ipam" {
e := fmt.Errorf("ipAddrs is not compatible with configured IPAM: %s", conf.IPAM.Type)
logger.Error(e)
return nil, e
}
// If the endpoint already exists, we need to attempt to release the previous IP addresses here
// since the ADD call will fail when it tries to reallocate the same IPs. releaseIPAddrs assumes
// that Calico IPAM is in use, which is OK here since only Calico IPAM supports the ipAddrs
// annotation.
if endpoint != nil {
logger.Info("Endpoint already exists and ipAddrs is set. Release any old IPs")
if err := releaseIPAddrs(endpoint.Spec.IPNetworks, calicoClient, logger); err != nil {
return nil, fmt.Errorf("failed to release ipAddrs: %s", err)
}
}
// When ipAddrs annotation is set, we call out to the configured IPAM plugin
// requesting the specific IP addresses included in the annotation.
result, err = ipAddrsResult(ipAddrs, conf, args, logger)
if err != nil {
return nil, err
}
logger.Debugf("IPAM result set to: %+v", result)
}
3.9 配置 endpoint 为其赋值
// Configure the endpoint (creating if required).
if endpoint == nil {
logger.Debug("Initializing new WorkloadEndpoint resource")
endpoint = api.NewWorkloadEndpoint()
}
endpoint.Name = epIDs.WEPName
endpoint.Namespace = epIDs.Namespace
endpoint.Labels = labels
endpoint.GenerateName = generateName
endpoint.Spec.Endpoint = epIDs.Endpoint
endpoint.Spec.Node = epIDs.Node
endpoint.Spec.Orchestrator = epIDs.Orchestrator
endpoint.Spec.Pod = epIDs.Pod
endpoint.Spec.Ports = ports
endpoint.Spec.IPNetworks = []string{}
3.10 调用 DoNetworking 为其配置网络
创建 endpoint 的操作则是,调用 ip link add $contVethName type veth peer name $hostVethName 创建 veth pair
创建 169.254.1.1 的默认网络路由,将 host veth 端移到 host 的 namespace
第 6 章节详细讲解
// Whether the endpoint existed or not, the veth needs (re)creating.
hostVethName := k8sconversion.VethNameForWorkload(epIDs.Namespace, epIDs.Pod)
_, contVethMac, err := utils.DoNetworking(args, conf, result, logger, hostVethName, routes)
if err != nil {
logger.WithError(err).Error("Error setting up networking")
releaseIPAM()
return nil, err
}
分析 cni 的操作,不支持配置 "feature_control": { "ip_addrs_no_ipam": true }
// Default CNI behavior
// Validate enabled features
if conf.FeatureControl.IPAddrsNoIpam {
return errors.New("requested feature is not supported for this runtime: ip_addrs_no_ipam")
}
4. 如果存在相同的 namespace name 的 endpoint
不需要在创建 endpoint,也不需要创建 veth pair,只需要更新 profile
CreateResultFromEndpoint 函数从 workloadEndpoint 中抽出 IP 信息,作为返回的 IP 地址
endpointAlreadyExisted := endpoint != nil
if endpointAlreadyExisted {
// There is an existing endpoint - no need to create another.
// This occurs when adding an existing container to a new CNI network
// Find the IP address from the endpoint and use that in the response.
// Don't create the veth or do any networking.
// Just update the profile on the endpoint. The profile will be created if needed during the
// profile processing step.
foundProfile := false
for _, p := range endpoint.Spec.Profiles {
if p == profileID {
logger.Infof("Calico CNI endpoint already has profile: %s\n", profileID)
foundProfile = true
break
}
}
if !foundProfile {
logger.Infof("Calico CNI appending profile: %s\n", profileID)
endpoint.Spec.Profiles = append(endpoint.Spec.Profiles, profileID)
}
result, err = utils.CreateResultFromEndpoint(endpoint)
logger.WithField("result", result).Debug("Created result from endpoint")
if err != nil {
return err
}
}
4.1 CreateResultFromEndpoint 函数
从 workloadEndpoint 中抽出 IP 信息,作为返回的 IP 地址
// CreateResultFromEndpoint takes a WorkloadEndpoint, extracts IP information
// and populates that into a CNI Result.
func CreateResultFromEndpoint(wep *api.WorkloadEndpoint) (*current.Result, error) {
result := ¤t.Result{}
for _, v := range wep.Spec.IPNetworks {
parsedIPConfig := current.IPConfig{}
ipAddr, ipNet, err := net.ParseCIDR(v)
if err != nil {
return nil, err
}
parsedIPConfig.Address = *ipNet
if ipAddr.To4() != nil {
parsedIPConfig.Version = "4"
} else {
parsedIPConfig.Version = "6"
}
result.IPs = append(result.IPs, &parsedIPConfig)
}
return result, nil
}
第 5 章节分析不存在 endpoint 的情况,需要创建,请看下文分析
5. 不存在 endpoint 的情况
// There's no existing endpoint, so we need to do the following:
// 1) Call the configured IPAM plugin to get IP address(es)
// 2) Configure the Calico endpoint
// 3) Create the veth, configuring it on both the host and container namespace.
5.1 调用 calico ipam 或者 host-local,获取 IP,本文分析调用 calico ipam的情况
// 1) Run the IPAM plugin and make sure there's an IP address returned.
logger.WithFields(logrus.Fields{"paths": os.Getenv("CNI_PATH"),
"type": conf.IPAM.Type}).Debug("Looking for IPAM plugin in paths")
ipamResult, err := ipam.ExecAdd(conf.IPAM.Type, args.StdinData)
logger.WithField("IPAM result", ipamResult).Info("Got result from IPAM plugin")
if err != nil {
return err
}
5.2 把接口类型转换为 Result 结构体类型
// Convert IPAM result into current Result.
// IPAM result has a bunch of fields that are optional for an IPAM plugin
// but required for a CNI plugin, so this is to populate those fields.
// See CNI Spec doc for more details.
result, err = current.NewResultFromResult(ipamResult)
if err != nil {
utils.ReleaseIPAllocation(logger, conf, args)
return err
}
if len(result.IPs) == 0 {
utils.ReleaseIPAllocation(logger, conf, args)
return errors.New("IPAM plugin returned missing IP config")
}
5.3 创建 workloadendpoint 对象并赋值
apiVersion: projectcalico.org/v3
kind: WorkloadEndpoint
metadata:
creationTimestamp: 2019-08-05T07:56:42Z
generateName: mysql-hostpath-9ff8d9676-
labels:
app: wordpress-hostpath
pod-template-hash: 9ff8d9676
projectcalico.org/namespace: default
projectcalico.org/orchestrator: k8s
projectcalico.org/serviceaccount: default
tier: mysql-hostpath
name: master--node-k8s-mysql--hostpath--9ff8d9676--8njtp-eth0
namespace: default
resourceVersion: "2972911"
uid: 903386b5-b756-11e9-a1f8-080027603363
spec:
endpoint: eth0
interfaceName: cali40df26f67d0
ipNetworks:
- 192.170.77.171/32
node: master-node
orchestrator: k8s
pod: mysql-hostpath-9ff8d9676-8njtp
ports:
- name: mysql
port: 3306
protocol: TCP
profiles:
- kns.default
- ksa.default.default
// 2) Create the endpoint object
endpoint = api.NewWorkloadEndpoint()
endpoint.Name = wepIDs.WEPName
endpoint.Namespace = wepIDs.Namespace
endpoint.Spec.Endpoint = wepIDs.Endpoint
endpoint.Spec.Node = wepIDs.Node
endpoint.Spec.Orchestrator = wepIDs.Orchestrator
endpoint.Spec.ContainerID = wepIDs.ContainerID
endpoint.Labels = labels
endpoint.Spec.Profiles = []string{profileID}
5.4 DoNetworking 函数创建 veth pair hostVethName 和 contVethMac
创建 endpoint 的操作则是,调用 ip link add $contVethName type veth peer name $hostVethName 创建 veth pair
创建 169.254.1.1 的默认网络路由,将 host veth 端移到 host 的 namespace
第 6 章节详细讲解
// 3) Set up the veth
hostVethName, contVethMac, err := utils.DoNetworking(
args, conf, result, logger, "", utils.DefaultRoutes)
if err != nil {
// Cleanup IP allocation and return the error.
utils.ReleaseIPAllocation(logger, conf, args)
return err
}
5.5 CreateOrUpdate 函数
如果 workloadEndpoint 的 ResourceVersion 则先前存在则调用 Update 操作,否则调用 Create 操作,将 workloadEndpoint 资源信息存入后端存储中
// Write the endpoint object (either the newly created one, or the updated one with a new ProfileIDs).
if _, err := utils.CreateOrUpdate(ctx, calicoClient, endpoint); err != nil {
if !endpointAlreadyExisted {
// Only clean up the IP allocation if this was a new endpoint. Otherwise,
// we'd release the IP that is already attached to the existing endpoint.
utils.ReleaseIPAllocation(logger, conf, args)
}
return err
}
6. DoNetworking 函数
-
调用netlink.LinkAdd(veth) netlink.LinkSetUp(hostVeth) 创建一个网卡对veth,主机端 cali 开头,后面 11 位是容器的 id 开头。然后就是把网卡插入容器内设置 IP 和路由。一个在Linux的物机机上,一个在容器中,用于容器与物理机之间的通信
-
ip.AddRoute(r, gw, contVeth)添加路由
-
netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd()))将host端veth加入加入到namespace,相当于命令ip link set $link netns $ns
// DoNetworking performs the networking for the given config and IPAM result
func DoNetworking(
args *skel.CmdArgs,
conf types.NetConf,
result *current.Result,
logger *logrus.Entry,
desiredVethName string,
routes []*net.IPNet,
) (hostVethName, contVethMAC string, err error) {
// Select the first 11 characters of the containerID for the host veth.
hostVethName = "cali" + args.ContainerID[:Min(11, len(args.ContainerID))]
contVethName := args.IfName
var hasIPv4, hasIPv6 bool
// If a desired veth name was passed in, use that instead.
if desiredVethName != "" {
hostVethName = desiredVethName
}
6.1 如果 host 端 veth 存在在清理删除
// Clean up if hostVeth exists.
if oldHostVeth, err := netlink.LinkByName(hostVethName); err == nil {
if err = netlink.LinkDel(oldHostVeth); err != nil {
return "", "", fmt.Errorf("failed to delete old hostVeth %v: %v", hostVethName, err)
}
logger.Infof("Cleaning old hostVeth: %v", hostVethName)
}
6.2 在 namespace 中,创建 veth
相当于执行命令 ip link add $contVethName type veth peer name $hostVethName
err = ns.WithNetNSPath(args.Netns, func(hostNS ns.NetNS) error {
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: contVethName,
Flags: net.FlagUp,
MTU: conf.MTU,
},
PeerName: hostVethName,
}
if err := netlink.LinkAdd(veth); err != nil {
logger.Errorf("Error adding veth %+v: %s", veth, err)
return err
}
6.3 设置 host 端 veth mac 地址
11: cali40df26f67d0@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1440 qdisc noqueue state UP
link/ether ee:ee:ee:ee:ee:ee brd ff:ff:ff:ff:ff:ff link-netnsid 2
inet6 fe80::ecee:eeff:feee:eeee/64 scope link
valid_lft forever preferred_lft forever
if mac, err := net.ParseMAC("EE:EE:EE:EE:EE:EE"); err != nil {
logger.Infof("failed to parse MAC Address: %v. Using kernel generated MAC.", err)
} else {
// Set the MAC address on the host side interface so the kernel does not
// have to generate a persistent address which fails some times.
if err = netlink.LinkSetHardwareAddr(hostVeth, mac); err != nil {
logger.Warnf("failed to Set MAC of %q: %v. Using kernel generated MAC.", hostVethName, err)
}
}
6.4 设置设备 up 启动状态
相当于 ip link set $hostVeth up
// Explicitly set the veth to UP state, because netlink doesn't always do that on all the platforms with net.FlagUp.
// veth won't get a link local address unless it's set to UP state.
if err = netlink.LinkSetUp(hostVeth); err != nil {
return fmt.Errorf("failed to set %q up: %v", hostVethName, err)
}
6.5 对于 ipv4 版本设置路由
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 169.254.1.1 0.0.0.0 UG 0 0 0 eth0
169.254.1.1 0.0.0.0 255.255.255.255 UH 0 0 0 eth0
添加默认网关 169.254.1.1, 容器会查询下一跳 168.254.1.1
的 MAC 地址,通过 arp proxy 和修改容器路由表来实现
调用 ip.AddRoute 添加路由,相当于 ip route add 命令
// Do the per-IP version set-up. Add gateway routes etc.
if hasIPv4 {
// Add a connected route to a dummy next hop so that a default route can be set
gw := net.IPv4(169, 254, 1, 1)
gwNet := &net.IPNet{IP: gw, Mask: net.CIDRMask(32, 32)}
err := netlink.RouteAdd(
&netlink.Route{
LinkIndex: contVeth.Attrs().Index,
Scope: netlink.SCOPE_LINK,
Dst: gwNet,
},
)
if err != nil {
return fmt.Errorf("failed to add route inside the container: %v", err)
}
for _, r := range routes {
if r.IP.To4() == nil {
logger.WithField("route", r).Debug("Skipping non-IPv4 route")
continue
}
logger.WithField("route", r).Debug("Adding IPv4 route")
if err = ip.AddRoute(r, gw, contVeth); err != nil {
return fmt.Errorf("failed to add IPv4 route for %v via %v: %v", r, gw, err)
}
}
}
6.6 为容器端 veth 配置 ip 地址
相当于命令 ip addr add $addr dev $link
// Now add the IPs to the container side of the veth.
for _, addr := range result.IPs {
if err = netlink.AddrAdd(contVeth, &netlink.Addr{IPNet: &addr.Address}); err != nil {
return fmt.Errorf("failed to add IP addr to %q: %v", contVeth, err)
}
}
6.7 把 host veth 移到 host 的 namespace
// Now that the everything has been successfully set up in the container, move the "host" end of the
// veth into the host namespace.
if err = netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd())); err != nil {
return fmt.Errorf("failed to move veth to host netns: %v", err)
}
总结:
从标准输入输出获取配置参数
创建 endpoint 的操作则是,调用 ip link add $contVethName type veth peer name $hostVethName 创建 veth pair
创建 169.254.1.1 的默认网络路由,将 host veth 端移到 host 的 namespace
注解解释
cni.projectcalico.org/ipAddrs
:指定一个要分配给Pod的IPv4和/或IPv6地址列表。 请求的IP地址将从Calico IPAM分配,并且必须存在于已配置的IP pool中
cni.projectcalico.org/ipAddrsNoIpam: 指定一个要分配给Pod的IPv4和/或IPv6地址列表,绕过IPAM。 任何IP冲突和路由配置都必须由人工或其他系统处理。 Calico仅处理那些属于Calico IP pool 中的IP地址,将其路由分发到Pod。 如果分配的IP地址不在Calico IP pool中,则必须确保通过其他机制正确地处理该IP地址的路由。
cni.projectcalico.org/ipv4pools:已配置的IPv4 pool列表,可从中选择Pod的地址。Calico IPAM 支持为每个命名空间或者是每个pod,指定专用的IP pool资源
参考:
https://docs.projectcalico.org/v2.5/reference/cni-plugin/configuration