CSI(Container Storage Interface)规范:
https://github.com/container-storage-interface/spec
主要工作是监听plugins目录,然后根据事件操作,Create事件则注册插件,Remove事件则删除插件
+--------------------------------------+ | ReRegistration | | Socket created with same plugin name | | | | | Socket Created v + Socket Deleted +------------------> Validate +----------------------> Register +--------------> DeRegister + + + | | | | Error | Error | | | | v v v Out Out Out
① node-driver 调用 CSI-plugin 插件的 GetPluginInfo 获得 driver 名字
② node-driver 创建 GRPC-Server,socket 地址在宿主机 /var/lib/kubelet/plugins_registry/${driver-name}-reg.sock,容器里为 /registration/${driver-name}-reg.sock
③ kubelet 与 node-driver 建立 GRPC 连接,调用 GetInfo 获得插件信息,包括 plugin 名字,GRPC-Server socket 地址,这个是 Watcher 目录,为 socket
④ kubelet 向 CSI-Plugin 发送 NodeGetInfo GRPC 请求,获得节点信息,将 CSI-Plugin 加入插件列表
⑤ kubelet 更新 node 资源,主要是更新注解 Annotations,创建 CSINode 资源
⑥ kubelet 调用 NotifyRegistrationStatus 通知 node-driver 是否成功
结构体 PluginHandler
type PluginHandler interface {
// Validate returns an error if the information provided by
// the potential plugin is erroneous (unsupported version, ...)
ValidatePlugin(pluginName string, endpoint string, versions []string, foundInDeprecatedDir bool) error
// RegisterPlugin is called so that the plugin can be register by any
// plugin consumer
// Error encountered here can still be Notified to the plugin.
RegisterPlugin(pluginName, endpoint string, versions []string) error
// DeRegister is called once the pluginwatcher observes that the socket has
// been deleted.
DeRegisterPlugin(pluginName string)
}
1. plugin-watcher 初始化流程
enablePluginsWatcher开启watch插件的flag,已经开启enable
enablePluginsWatcher: utilfeature.DefaultFeatureGate.Enabled(features.KubeletPluginsWatcher)
1.1 显然需要实例化NewWatcher,sockDir路径为 /var/lib/kubelet/plugins
if klet.enablePluginsWatcher {
klet.pluginWatcher = pluginwatcher.NewWatcher(
klet.getPluginsRegistrationDir(), /* sockDir */
klet.getPluginsDir(), /* deprecatedSockDir */
)
}
1.2 注册CSIPlugin,DevicePlugin回调函数
if kl.enablePluginsWatcher {
// Adding Registration Callback function for CSI Driver
kl.pluginWatcher.AddHandler(pluginwatcherapi.CSIPlugin, pluginwatcher.PluginHandler(csi.PluginHandler))
// Adding Registration Callback function for Device Manager
kl.pluginWatcher.AddHandler(pluginwatcherapi.DevicePlugin, kl.containerManager.GetPluginRegistrationHandler())
// Start the plugin watcher
klog.V(4).Infof("starting watcher")
if err := kl.pluginWatcher.Start(); err != nil {
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.KubeletSetupFailed, err.Error())
klog.Fatalf("failed to start Plugin Watcher. err: %v", err)
}
}
2. Start函数
路径 pkg/kubelet/util/pluginwatcher/plugin_watcher.go
2.1 使用fsnotify watcher机制
猜一猜也能大约监测目录中的文件增删事件
fsWatcher, err := fsnotify.NewWatcher()
if err != nil {
return fmt.Errorf("failed to start plugin fsWatcher, err: %v", err)
}
w.fsWatcher = fsWatcher
2.2 核心逻辑过程
处理事件的crete与remove事件,主要函数handleCreateEvent
w.wg.Add(1)
go func(fsWatcher *fsnotify.Watcher) {
defer w.wg.Done()
for {
select {
case event := <-fsWatcher.Events:
//TODO: Handle errors by taking corrective measures
w.wg.Add(1)
func() {
defer w.wg.Done()
if event.Op&fsnotify.Create == fsnotify.Create {
err := w.handleCreateEvent(event)
} else if event.Op&fsnotify.Remove == fsnotify.Remove {
err := w.handleDeleteEvent(event)
}
return
}()
}
}
}(fsWatcher)
2.3 遍历plugins路径下如果有socket创建Create事件
比如 csi.sock时间发生变化,刚好启动csi插件进程,产生Create事件
// Walks through the plugin directory discover any existing plugin sockets.
// Goroutines started here will be waited for in Stop() before cleaning up.
// Ignore all errors except root dir not being walkable
func (w *Watcher) traversePluginDir(dir string) error {
return w.fs.Walk(dir, func(path string, info os.FileInfo, err error) error {
switch mode := info.Mode(); {
case mode.IsDir():
if w.containsBlacklistedDir(path) {
return filepath.SkipDir
}
if err := w.fsWatcher.Add(path); err != nil {
return fmt.Errorf("failed to watch %s, err: %v", path, err)
}
case mode&os.ModeSocket != 0:
w.wg.Add(1)
go func() {
defer w.wg.Done()
w.fsWatcher.Events <- fsnotify.Event{
Name: path,
Op: fsnotify.Create,
}
}()
default:
klog.V(5).Infof("Ignoring file %s with mode %v", path, mode)
}
return nil
})
}
3. handleCreateEvent函数
3.1 一些情况不需要处理的
if w.containsBlacklistedDir(event.Name) {
return nil
}
fi, err := os.Stat(event.Name)
if err != nil {
return fmt.Errorf("stat file %s failed: %v", event.Name, err)
}
if strings.HasPrefix(fi.Name(), ".") {
klog.V(5).Infof("Ignoring file (starts with '.'): %s", fi.Name())
return nil
}
3.2 如果为socket则处理
if !fi.IsDir() {
if fi.Mode()&os.ModeSocket == 0 {
klog.V(5).Infof("Ignoring non socket file %s", fi.Name())
return nil
}
return w.handlePluginRegistration(event.Name)
}
4. handlePluginRegistration函数
4.1 建立unix socket连接,调用GetInfo函数GRPC请求
比如node-driver-registrar提供type为CSIPlugin,得到回调函数handler
client, conn, err := dial(socketPath, 10*time.Second)
if err != nil {
return fmt.Errorf("dial failed at socket %s, err: %v", socketPath, err)
}
defer conn.Close()
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
infoResp, err := client.GetInfo(ctx, ®isterapi.InfoRequest{})
if err != nil {
return fmt.Errorf("failed to get plugin info using RPC GetInfo at socket %s, err: %v", socketPath, err)
}
handler, ok := w.handlers[infoResp.Type]
if !ok {
return w.notifyPlugin(client, false, fmt.Sprintf("no handler registered for plugin type: %s at socket %s", infoResp.Type, socketPath))
}
4.2 注册插件存map中,比较简单
func (w *Watcher) registerPlugin(socketPath, pluginType, pluginName string) {
w.mutex.Lock()
defer w.mutex.Unlock()
// Reregistration case, if this plugin is already in the map, remove it
// This will prevent handleDeleteEvent to issue a DeRegister call
for path, info := range w.plugins {
if info.pluginType != pluginType || info.pluginName != pluginName {
continue
}
delete(w.plugins, path)
break
}
w.plugins[socketPath] = pathInfo{
pluginType: pluginType,
pluginName: pluginName,
}
}
4.3 调用CSIPlugin注册插件
// We add the plugin to the pluginwatcher's map before calling a plugin consumer's Register handle
// so that if we receive a delete event during Register Plugin, we can process it as a DeRegister call.
w.registerPlugin(socketPath, infoResp.Type, infoResp.Name)
if err := handler.RegisterPlugin(infoResp.Name, infoResp.Endpoint, infoResp.SupportedVersions); err != nil {
return w.notifyPlugin(client, false, fmt.Sprintf("plugin registration failed with err: %v", err))
}
5. RegisterPlugin函数
路径 pkg/volume/csi/csi_plugin.go
5.1 保存插件名存map中,比如rbd.csi.ceph.com
func() {
// Storing endpoint of newly registered CSI driver into the map, where CSI driver name will be the key
// all other CSI components will be able to get the actual socket of CSI drivers by its name.
// It's not necessary to lock the entire RegistrationCallback() function because only the CSI
// client depends on this driver map, and the CSI client does not depend on node information
// updated in the rest of the function.
csiDrivers.Lock()
defer csiDrivers.Unlock()
csiDrivers.driversMap[pluginName] = csiDriver{driverName: pluginName, driverEndpoint: endpoint, highestSupportedVersion: highestSupportedVersion}
}()
5.2 建立socket 连接,调用NodeGeteInfo函数GRPC请求
// Get node info from the driver.
csi, err := newCsiDriverClient(csiDriverName(pluginName))
if err != nil {
return err
}
// TODO (verult) retry with exponential backoff, possibly added in csi client library.
ctx, cancel := context.WithTimeout(context.Background(), csiTimeout)
defer cancel()
driverNodeID, maxVolumePerNode, accessibleTopology, err := csi.NodeGetInfo(ctx)
if err != nil {
klog.Error(log("registrationHandler.RegisterPlugin failed at CSI.NodeGetInfo: %v", err))
if unregErr := unregisterDriver(pluginName); unregErr != nil {
klog.Error(log("registrationHandler.RegisterPlugin failed to unregister plugin due to previous: %v", unregErr))
return unregErr
}
return err
}
5.3 根据名称安装CSI driver,不造干啥了,继续跟踪代码
err = nim.InstallCSIDriver(pluginName, driverNodeID, maxVolumePerNode, accessibleTopology)
if err != nil {
klog.Error(log("registrationHandler.RegisterPlugin failed at AddNodeInfo: %v", err))
if unregErr := unregisterDriver(pluginName); unregErr != nil {
klog.Error(log("registrationHandler.RegisterPlugin failed to unregister plugin due to previous error: %v", unregErr))
return unregErr
}
return err
}
nim使用全局变量,Init函数初始化,nodeInfoManager实现了接口,定义在pkg/volume/csi/nodeinfomanager/nodeinfomanager.go,继续跟
func (p *csiPlugin) Init(host volume.VolumeHost) error {
p.host = host
if utilfeature.DefaultFeatureGate.Enabled(features.CSIDriverRegistry) {
csiClient := host.GetCSIClient()
if csiClient == nil {
klog.Warning("The client for CSI Custom Resources is not available, skipping informer initialization")
} else {
// Start informer for CSIDrivers.
factory := csiapiinformer.NewSharedInformerFactory(csiClient, csiResyncPeriod)
p.csiDriverInformer = factory.Csi().V1alpha1().CSIDrivers()
p.csiDriverLister = p.csiDriverInformer.Lister()
go factory.Start(wait.NeverStop)
}
}
// Initializing csiDrivers map and label management channels
csiDrivers = csiDriversStore{driversMap: map[string]csiDriver{}}
nim = nodeinfomanager.NewNodeInfoManager(host.GetNodeName(), host)
// TODO(#70514) Init CSINodeInfo object if the CRD exists and create Driver
// objects for migrated drivers.
return nil
}
6. InstallCSIDriver函数
更新node注解,添加注解
metadata:
annotations:
csi.volume.kubernetes.io/nodeid: '{"rbd.csi.ceph.com":"node1"}'
node.alpha.kubernetes.io/ttl: "0"
volumes.kubernetes.io/controller-managed-attach-detach: "true"
// InstallCSIDriver updates the node ID annotation in the Node object and CSIDrivers field in the
// CSINodeInfo object. If the CSINodeInfo object doesn't yet exist, it will be created.
// If multiple calls to InstallCSIDriver() are made in parallel, some calls might receive Node or
// CSINodeInfo update conflicts, which causes the function to retry the corresponding update.
func (nim *nodeInfoManager) InstallCSIDriver(driverName string, driverNodeID string, maxAttachLimit int64, topology map[string]string) error {
if driverNodeID == "" {
return fmt.Errorf("error adding CSI driver node info: driverNodeID must not be empty")
}