前言
host-local是kubernetes官方一个IPAM(IP地址管理器)插件,一般用于单机pod IP管理,在flannel和calico中都能看到它的身影。源码地址:https://github.com/containernetworking/plugins/tree/main/plugins/ipam/host-local
源码分析
host-local和其它CNI插件类似,只需要实现cmdCheck
、cmdAdd
和cmdDel
三个函数,这三个函数的入口都在main函数中:
func main() {
skel.PluginMain(cmdAdd, cmdCheck, cmdDel, version.All, bv.BuildString("host-local"))
}
cmdCheck
先来看看cmdCheck
的逻辑:先用allocator.LoadIPAMConfig
方法解析args.StdinData和args.Args数据成ipamConf,之后用dis.New
初始化一个store对象,再基于containerID和IfName,从store中寻找容器IP。如果未找到,返回error,找到则返回nil。
func cmdCheck(args *skel.CmdArgs) error {
ipamConf, _, err := allocator.LoadIPAMConfig(args.StdinData, args.Args)
if err != nil {
return err
}
// Look to see if there is at least one IP address allocated to the container
// in the data dir, irrespective of what that address actually is
store, err := disk.New(ipamConf.Name, ipamConf.DataDir)
if err != nil {
return err
}
defer store.Close()
containerIpFound := store.FindByID(args.ContainerID, args.IfName)
if containerIpFound == false {
return fmt.Errorf("host-local: Failed to find address added by container %v", args.ContainerID)
}
return nil
}
- 看看
allocator.LoadIPAMConfig
的实现:
func LoadIPAMConfig(bytes []byte, envArgs string) (*IPAMConfig, string, error) {
n := Net{}
if err := json.Unmarshal(bytes, &n); err != nil {
return nil, "", err
}
if n.IPAM == nil {
return nil, "", fmt.Errorf("IPAM config missing 'ipam' key")
}
// parse custom IP from env args
if envArgs != "" {
e := IPAMEnvArgs{}
err := types.LoadArgs(envArgs, &e)
if err != nil {
return nil, "", err
}
if e.IP.ToIP() != nil {
n.IPAM.IPArgs = []net.IP{e.IP.ToIP()}
}
}
// parse custom IPs from CNI args in network config
if n.Args != nil && n.Args.A != nil && len(n.Args.A.IPs) != 0 {
for _, i := range n.Args.A.IPs {
n.IPAM.IPArgs = append(n.IPAM.IPArgs, i.ToIP())
}
}
// parse custom IPs from runtime configuration
if len(n.RuntimeConfig.IPs) > 0 {
for _, i := range n.RuntimeConfig.IPs {
n.IPAM.IPArgs = append(n.IPAM.IPArgs, i.ToIP())
}
}
for idx := range n.IPAM.IPArgs {
if err := canonicalizeIP(&n.IPAM.IPArgs[idx]); err != nil {
return nil, "", fmt.Errorf("cannot understand ip: %v", err)
}
}
// If a single range (old-style config) is specified, prepend it to
// the Ranges array
if n.IPAM.Range != nil && n.IPAM.Range.Subnet.IP != nil {
n.IPAM.Ranges = append([]RangeSet{{*n.IPAM.Range}}, n.IPAM.Ranges...)
}
n.IPAM.Range = nil
// If a range is supplied as a runtime config, prepend it to the Ranges
if len(n.RuntimeConfig.IPRanges) > 0 {
n.IPAM.Ranges = append(n.RuntimeConfig.IPRanges, n.IPAM.Ranges...)
}
if len(n.IPAM.Ranges) == 0 {
return nil, "", fmt.Errorf("no IP ranges specified")
}
// Validate all ranges
numV4 := 0
numV6 := 0
for i := range n.IPAM.Ranges {
if err := n.IPAM.Ranges[i].Canonicalize(); err != nil {
return nil, "", fmt.Errorf("invalid range set %d: %s", i, err)
}
if n.IPAM.Ranges[i][0].RangeStart.To4() != nil {
numV4++
} else {
numV6++
}
}
// CNI spec 0.2.0 and below supported only one v4 and v6 address
if numV4 > 1 || numV6 > 1 {
if ok, _ := version.GreaterThanOrEqualTo(n.CNIVersion, "0.3.0"); !ok {
return nil, "", fmt.Errorf("CNI version %v does not support more than 1 address per family", n.CNIVersion)
}
}
// Check for overlaps
l := len(n.IPAM.Ranges)
for i, p1 := range n.IPAM.Ranges[:l-1] {
for j, p2 := range n.IPAM.Ranges[i+1:] {
if p1.Overlaps(&p2) {
return nil, "", fmt.Errorf("range set %d overlaps with %d", i, (i + j + 1))
}
}
}
// Copy net name into IPAM so not to drag Net struct around
n.IPAM.Name = n.Name
return n.IPAM, n.CNIVersion, nil
}
可以看出allocator.LoadIPAMConfig
就是把stdin和args里的数据解析到NET对象中,NET结构如下:
type Net struct {
Name string `json:"name"`
CNIVersion string `json:"cniVersion"`
IPAM *IPAMConfig `json:"ipam"`
RuntimeConfig struct {
// The capability arg
IPRanges []RangeSet `json:"ipRanges,omitempty"`
IPs []*ip.IP `json:"ips,omitempty"`
} `json:"runtimeConfig,omitempty"`
Args *struct {
A *IPAMArgs `json:"cni"`
} `json:"args"`
}
- 接着看看
disk.New
的实现,host-local这个插件的数据目录可以配置,默认是/var/lib/cni/networks/{name}
,其中name是它配置文件最外层的name字段。之后会基于这个目录,初始化好一个带文件锁(防止并发操作数据导致数据混乱)的store对象。
func New(network, dataDir string) (*Store, error) {
if dataDir == "" {
dataDir = defaultDataDir
}
dir := filepath.Join(dataDir, network)
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, err
}
lk, err := NewFileLock(dir)
if err != nil {
return nil, err
}
return &Store{lk, dir}, nil
}
func NewFileLock(lockPath string) (*FileLock, error) {
fi, err := os.Stat(lockPath)
if err != nil {
return nil, err
}
if fi.IsDir() {
lockPath = path.Join(lockPath, "lock")
}
f, err := filemutex.New(lockPath)
if err != nil {
return nil, err
}
return &FileLock{f}, nil
}
- 再看看
store.FindByID
的实现。FindByID会先按containerID+\r\n+IfName
拼接match数据在/var/lib/cni/networks/{name}
目录调FindByKey函数找,如果没找到则只按containerID
为match数据去查找。FindByKey会遍历/var/lib/cni/networks/{name}
目录下所有的文件,查看文件内容是否和拼接好的match数据相同,如果相同,则表示找到。
func (s *Store) FindByKey(id string, ifname string, match string) (bool, error) {
found := false
err := filepath.Walk(s.dataDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() {
return nil
}
data, err := ioutil.ReadFile(path)
if err != nil {
return nil
}
if strings.TrimSpace(string(data)) == match {
found = true
}
return nil
})
return found, err
}
func (s *Store) FindByID(id string, ifname string) bool {
s.Lock()
defer s.Unlock()
found := false
match := strings.TrimSpace(id) + LineBreak + ifname
found, err := s.FindByKey(id, ifname, match)
// Match anything created by this id
if !found && err == nil {
match := strings.TrimSpace(id)
found, err = s.FindByKey(id, ifname, match)
}
return found
}
cmdAdd
接下来看看cmdAdd
,cmdAdd
一般用在创建sandbox容器时分配IP,代码如下。刚开始也是用allocator.LoadIPAMConfig
解析配置,再用disk.New
初始化store对象,这两步前面已经分析过,不再赘述。之后会根据配置里的ip range调allocator.NewIPAllocator
初始化一个allocator对象,再基于这个allocator对象的Get方法分配IP。如果出错,则调Release做回滚操作,Release我们放在cmdDel中再分析。
func cmdAdd(args *skel.CmdArgs) error {
ipamConf, confVersion, err := allocator.LoadIPAMConfig(args.StdinData, args.Args)
if err != nil {
return err
}
result := ¤t.Result{CNIVersion: current.ImplementedSpecVersion}
if ipamConf.ResolvConf != "" {
dns, err := parseResolvConf(ipamConf.ResolvConf)
if err != nil {
return err
}
result.DNS = *dns
}
store, err := disk.New(ipamConf.Name, ipamConf.DataDir)
if err != nil {
return err
}
defer store.Close()
// Keep the allocators we used, so we can release all IPs if an error
// occurs after we start allocating
allocs := []*allocator.IPAllocator{}
// Store all requested IPs in a map, so we can easily remove ones we use
// and error if some remain
requestedIPs := map[string]net.IP{} //net.IP cannot be a key
for _, ip := range ipamConf.IPArgs {
requestedIPs[ip.String()] = ip
}
for idx, rangeset := range ipamConf.Ranges {
allocator := allocator.NewIPAllocator(&rangeset, store, idx)
// Check to see if there are any custom IPs requested in this range.
var requestedIP net.IP
for k, ip := range requestedIPs {
if rangeset.Contains(ip) {
requestedIP = ip
delete(requestedIPs, k)
break
}
}
ipConf, err := allocator.Get(args.ContainerID, args.IfName, requestedIP)
if err != nil {
// Deallocate all already allocated IPs
for _, alloc := range allocs {
_ = alloc.Release(args.ContainerID, args.IfName)
}
return fmt.Errorf("failed to allocate for range %d: %v", idx, err)
}
allocs = append(allocs, allocator)
result.IPs = append(result.IPs, ipConf)
}
// If an IP was requested that wasn't fulfilled, fail
if len(requestedIPs) != 0 {
for _, alloc := range allocs {
_ = alloc.Release(args.ContainerID, args.IfName)
}
errstr := "failed to allocate all requested IPs:"
for _, ip := range requestedIPs {
errstr = errstr + " " + ip.String()
}
return fmt.Errorf(errstr)
}
result.Routes = ipamConf.Routes
return types.PrintResult(result, confVersion)
}
- 先看看
allocator.NewIPAllocator
,只是简单返回一个对象:
func NewIPAllocator(s *RangeSet, store backend.Store, id int) *IPAllocator {
return &IPAllocator{
rangeset: s,
store: store,
rangeID: strconv.Itoa(id),
}
}
- 重点逻辑在
allocator.Get
中,allocator.Get
可以指定IP,如果指定了IP,则先会与配置中的ip range做校验,之后调store.Reserve方法保存数据。如果没有指定IP,则会先用store.GetByID方法查看是否已为这个containerID+IfName分配IP,如果已分配则报错,否则拿到迭代器,找到下一个可分配IP,并调store.Reserve保存数据。
func (a *IPAllocator) Get(id string, ifname string, requestedIP net.IP) (*current.IPConfig, error) {
a.store.Lock()
defer a.store.Unlock()
var reservedIP *net.IPNet
var gw net.IP
if requestedIP != nil {
if err := canonicalizeIP(&requestedIP); err != nil {
return nil, err
}
r, err := a.rangeset.RangeFor(requestedIP)
if err != nil {
return nil, err
}
if requestedIP.Equal(r.Gateway) {
return nil, fmt.Errorf("requested ip %s is subnet's gateway", requestedIP.String())
}
reserved, err := a.store.Reserve(id, ifname, requestedIP, a.rangeID)
if err != nil {
return nil, err
}
if !reserved {
return nil, fmt.Errorf("requested IP address %s is not available in range set %s", requestedIP, a.rangeset.String())
}
reservedIP = &net.IPNet{IP: requestedIP, Mask: r.Subnet.Mask}
gw = r.Gateway
} else {
// try to get allocated IPs for this given id, if exists, just return error
// because duplicate allocation is not allowed in SPEC
// https://github.com/containernetworking/cni/blob/master/SPEC.md
allocatedIPs := a.store.GetByID(id, ifname)
for _, allocatedIP := range allocatedIPs {
// check whether the existing IP belong to this range set
if _, err := a.rangeset.RangeFor(allocatedIP); err == nil {
return nil, fmt.Errorf("%s has been allocated to %s, duplicate allocation is not allowed", allocatedIP.String(), id)
}
}
iter, err := a.GetIter()
if err != nil {
return nil, err
}
for {
reservedIP, gw = iter.Next()
if reservedIP == nil {
break
}
reserved, err := a.store.Reserve(id, ifname, reservedIP.IP, a.rangeID)
if err != nil {
return nil, err
}
if reserved {
break
}
}
}
if reservedIP == nil {
return nil, fmt.Errorf("no IP addresses available in range set: %s", a.rangeset.String())
}
return ¤t.IPConfig{
Address: *reservedIP,
Gateway: gw,
}, nil
}
- 迭代器的主要逻辑是基于LastReservedIP(这个数据保存在一个文件中)和ip range,找到下一个可分配的IP并返回
func (a *IPAllocator) GetIter() (*RangeIter, error) {
iter := RangeIter{
rangeset: a.rangeset,
}
// Round-robin by trying to allocate from the last reserved IP + 1
startFromLastReservedIP := false
// We might get a last reserved IP that is wrong if the range indexes changed.
// This is not critical, we just lose round-robin this one time.
lastReservedIP, err := a.store.LastReservedIP(a.rangeID)
if err != nil && !os.IsNotExist(err) {
log.Printf("Error retrieving last reserved ip: %v", err)
} else if lastReservedIP != nil {
startFromLastReservedIP = a.rangeset.Contains(lastReservedIP)
}
// Find the range in the set with this IP
if startFromLastReservedIP {
for i, r := range *a.rangeset {
if r.Contains(lastReservedIP) {
iter.rangeIdx = i
// We advance the cursor on every Next(), so the first call
// to next() will return lastReservedIP + 1
iter.cur = lastReservedIP
break
}
}
} else {
iter.rangeIdx = 0
iter.startIP = (*a.rangeset)[0].RangeStart
}
return &iter, nil
}
func (i *RangeIter) Next() (*net.IPNet, net.IP) {
r := (*i.rangeset)[i.rangeIdx]
// If this is the first time iterating and we're not starting in the middle
// of the range, then start at rangeStart, which is inclusive
if i.cur == nil {
i.cur = r.RangeStart
i.startIP = i.cur
if i.cur.Equal(r.Gateway) {
return i.Next()
}
return &net.IPNet{IP: i.cur, Mask: r.Subnet.Mask}, r.Gateway
}
// If we've reached the end of this range, we need to advance the range
// RangeEnd is inclusive as well
if i.cur.Equal(r.RangeEnd) {
i.rangeIdx += 1
i.rangeIdx %= len(*i.rangeset)
r = (*i.rangeset)[i.rangeIdx]
i.cur = r.RangeStart
} else {
i.cur = ip.NextIP(i.cur)
}
if i.startIP == nil {
i.startIP = i.cur
} else if i.cur.Equal(i.startIP) {
// IF we've looped back to where we started, give up
return nil, nil
}
if i.cur.Equal(r.Gateway) {
return i.Next()
}
return &net.IPNet{IP: i.cur, Mask: r.Subnet.Mask}, r.Gateway
}
cmdDel
cmdDel
前两步的逻辑也是一样的:allocator.LoadIPAMConfig解析配置,disk.New初始化store对象,重点在ipAllocator.Release
释放IP中。
func cmdDel(args *skel.CmdArgs) error {
ipamConf, _, err := allocator.LoadIPAMConfig(args.StdinData, args.Args)
if err != nil {
return err
}
store, err := disk.New(ipamConf.Name, ipamConf.DataDir)
if err != nil {
return err
}
defer store.Close()
// Loop through all ranges, releasing all IPs, even if an error occurs
var errors []string
for idx, rangeset := range ipamConf.Ranges {
ipAllocator := allocator.NewIPAllocator(&rangeset, store, idx)
err := ipAllocator.Release(args.ContainerID, args.IfName)
if err != nil {
errors = append(errors, err.Error())
}
}
if errors != nil {
return fmt.Errorf(strings.Join(errors, ";"))
}
return nil
}
- ipAllocator.Release函数如下,大致逻辑就是根据containerID和IfName找到对应的文件,找到后把该文件删除。
func (a *IPAllocator) Release(id string, ifname string) error {
a.store.Lock()
defer a.store.Unlock()
return a.store.ReleaseByID(id, ifname)
}
func (s *Store) ReleaseByID(id string, ifname string) error {
found := false
match := strings.TrimSpace(id) + LineBreak + ifname
found, err := s.ReleaseByKey(id, ifname, match)
// For backwards compatibility, look for files written by a previous version
if !found && err == nil {
match := strings.TrimSpace(id)
found, err = s.ReleaseByKey(id, ifname, match)
}
return err
}
func (s *Store) ReleaseByKey(id string, ifname string, match string) (bool, error) {
found := false
err := filepath.Walk(s.dataDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() {
return nil
}
data, err := ioutil.ReadFile(path)
if err != nil {
return nil
}
if strings.TrimSpace(string(data)) == match {
if err := os.Remove(path); err != nil {
return nil
}
found = true
}
return nil
})
return found, err
}
总结
我们以flannel使用host-local为例来看看host-local的数据目录结构。先看看flannel CNI配置文件:
# cat /etc/cni/net.d/10-flannel.conflist
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
再看看local-host数据目录
# ls -al /var/lib/cni/networks/cbr0/
总用量 20
drwxr-xr-x 2 root root 4096 4月 23 10:27 .
drwxr-xr-x 3 root root 4096 4月 11 20:31 ..
-rw-r--r-- 1 root root 70 4月 23 10:27 10.244.0.4
-rw-r--r-- 1 root root 70 4月 23 10:27 10.244.0.5
-rw-r--r-- 1 root root 10 4月 23 10:27 last_reserved_ip.0
-rwxr-x--- 1 root root 0 4月 11 20:31 lock
# cat /var/lib/cni/networks/cbr0/10.244.0.4
b70bce810fc74bcf7c48367e1e93c65a1f565c702af393cbb10139c5c85a4219
eth0
# cat /var/lib/cni/networks/cbr0/last_reserved_ip.0
10.244.0.5
结合前文源码分析可以看出,host-local是基于本地文件来维护本机IP分配逻辑的,分配出去一个IP会在数据目录下创建一个以该IP为文件名的文件,文件内容是对应的containerID+\r\n+IfName
,释放IP则是把该文件删除。
微信公众号卡巴斯同步发布,欢迎大家关注。