Podman网络之CNI模块 Podman的网络能力是基于CNI实现的,那么它是如何利用CNI网络来实现的呢?通过分析源码可以知道,Podman是直接利用了CRI-O封装的cniNetworkPlugin模块实现的。因此,本文分析的其实主要是CRI-O的cniNetworkPlugin模块的实现。
初识结构体 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 type cniNetworkPlugin struct { cniConfig *libcni.CNIConfig loNetwork *cniNetwork sync.RWMutex defaultNetName netName networks map [string ]*cniNetwork nsManager *nsManager confDir string binDirs []string shutdownChan chan struct {} watcher *fsnotify.Watcher done *sync.WaitGroup podsLock sync.Mutex pods map [string ]*podLock exec cniinvoke.Exec cacheDir string }
初始化CNI模块 初始化主要负责如下功能:
结构体初始化;
初始化netns管理模块;
加载cni网络配置文件;
启动监控cni网络配置目录变化的协程;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 func initCNI (exec cniinvoke.Exec, cacheDir, defaultNetName string , confDir string , binDirs ...string ) (CNIPlugin, error) { ... ... sm, err := newNSManager() plugin.nsManager = nsm ... ... plugin.syncNetworkConfig() ... ... plugin.watcher, err = newWatcher(plugin.confDir) startWg := sync.WaitGroup{} startWg.Add(1 ) go plugin.monitorConfDir(&startWg) startWg.Wait() ... ... }
结构体初始化 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 plugin := &cniNetworkPlugin{ cniConfig: libcni.NewCNIConfig(binDirs, exec), defaultNetName: netName{ name: defaultNetName, changeable: defaultNetName == "" , }, networks: make (map [string ]*cniNetwork), loNetwork: getLoNetwork(), confDir: confDir, binDirs: binDirs, shutdownChan: make (chan struct {}), done: &sync.WaitGroup{}, pods: make (map [string ]*podLock), exec: exec, cacheDir: cacheDir, }
需要关注的两个点:
cniConfig:为cniNetworkPlugin提供了CNI底层操作的能力(加入和退出网络平面);
defaultNetName:既支持用户设置默认网络平面名,又可以自动设置(能够兼容K8S docker-shim的实现,通过排序所有配置文件名,把第一个合法的配置文件作为默认的网络平面名);
netns模块初始化 该模块通过封装nsenter命令,实现进入网络命名空间获取对应的网络配置信息,例如,ip,mac等等相关信息。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 var defaultNamespaceEnterCommandName = "nsenter" type nsManager struct { nsenterPath string } func (nsm *nsManager) init () error { var err error nsm.nsenterPath, err = exec.LookPath(defaultNamespaceEnterCommandName) return err } func getContainerDetails (nsm *nsManager, netnsPath, interfaceName, addrType string ) (*net.IPNet, *net.HardwareAddr, error)
加载cni网络配置文件 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 func loadNetworks (confDir string , cni *libcni.CNIConfig) (map [string ]*cniNetwork, string , error) { files, err := libcni.ConfFiles(confDir, []string {".conf" , ".conflist" , ".json" }) networks := make (map [string ]*cniNetwork) defaultNetName := "" sort.Strings(files) for _, confFile := range files { var confList *libcni.NetworkConfigList if strings.HasSuffix(confFile, ".conflist" ) { confList, err = libcni.ConfListFromFile(confFile) } else { conf, err := libcni.ConfFromFile(confFile) confList, err = libcni.ConfListFromConf(conf) } if len (confList.Plugins) == 0 { continue } if _, err := cni.ValidateNetworkList(context.TODO(), confList); err != nil { continue } if confList.Name == "" { confList.Name = path.Base(confFile) } cniNet := &cniNetwork{ name: confList.Name, filePath: confFile, config: confList, } if _, ok := networks[confList.Name]; !ok { networks[confList.Name] = cniNet } if defaultNetName == "" { defaultNetName = confList.Name } } return networks, defaultNetName, nil }
监控协程 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 func newWatcher (confDir string ) (*fsnotify.Watcher, error) { if err := os.MkdirAll(confDir, 0755 ); err != nil { return nil , fmt.Errorf("failed to create %q: %v" , confDir, err) } watcher, err := fsnotify.NewWatcher() if err != nil { return nil , fmt.Errorf("could not create new watcher %v" , err) } defer func () { if err != nil { watcher.Close() } }() if err = watcher.Add(confDir); err != nil { return nil , fmt.Errorf("failed to add watch on %q: %v" , confDir, err) } return watcher, nil } func (plugin *cniNetworkPlugin) monitorConfDir (start *sync.WaitGroup) { start.Done() plugin.done.Add(1 ) defer plugin.done.Done() for { select { case event := <-plugin.watcher.Events: logrus.Warningf("CNI monitoring event %v" , event) var defaultDeleted bool createWrite := (event.Op&fsnotify.Create == fsnotify.Create || event.Op&fsnotify.Write == fsnotify.Write) if event.Op&fsnotify.Remove == fsnotify.Remove { defNet := plugin.getDefaultNetwork() if defNet != nil && event.Name == defNet.filePath { defaultDeleted = true } } if !createWrite && !defaultDeleted { continue } if err := plugin.syncNetworkConfig(); err != nil { logrus.Errorf("CNI config loading failed, continue monitoring: %v" , err) continue } case err := <-plugin.watcher.Errors: if err == nil { continue } logrus.Errorf("CNI monitoring error %v" , err) return case <-plugin.shutdownChan: return } } }
CNI模块对外接口分析 通用函数 podman支持pod加入多个网络平面,因此封装了一个forEachNetwork函数,用于依次对网络集合的所有网络进行特定操作:
保障所有网络平面的interface网卡名唯一;
构建runtimeconf;
获取网络名对应的cni网络配置信息;
执行特定操作;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 type forEachNetworkFn func (*cniNetwork, *PodNetwork, *libcni.RuntimeConf) error func (plugin *cniNetworkPlugin) forEachNetwork (podNetwork *PodNetwork, fromCache bool , actionFn forEachNetworkFn) error { networks := podNetwork.Networks if len (networks) == 0 { networks = append (networks, NetAttachment{ Name: plugin.GetDefaultNetworkName(), }) } allIfNames := make (map [string ]bool ) for _, req := range networks { if req.Ifname != "" { if allIfNames[req.Ifname] { return fmt.Errorf("network %q requested interface name %q already assigned" , req.Name, req.Ifname) } allIfNames[req.Ifname] = true } } for _, network := range networks { ifName := network.Ifname if ifName == "" { for i := 0 ; i < 10000 ; i++ { candidate := fmt.Sprintf("eth%d" , i) if !allIfNames[candidate] { allIfNames[candidate] = true ifName = candidate break } } if ifName == "" { return fmt.Errorf("failed to find free interface name for network %q" , network.Name) } } rt, err := buildCNIRuntimeConf(plugin.cacheDir, podNetwork, ifName, podNetwork.RuntimeConfig[network.Name]) if err != nil { logrus.Errorf("error building CNI runtime config: %v" , err) return err } var cniNet *cniNetwork if fromCache { var newRt *libcni.RuntimeConf cniNet, newRt, err = plugin.loadNetworkFromCache(network.Name, rt) if err != nil { logrus.Debugf("error loading cached network config: %v" , err) logrus.Debugf("falling back to loading from existing plugins on disk" ) } else { rt = newRt } } if cniNet == nil { cniNet, err = plugin.getNetwork(network.Name) if err != nil { logrus.Errorf(err.Error()) return err } } if err := actionFn(cniNet, podNetwork, rt); err != nil { return err } } return nil }
GetPodNetworkStatus 用于获取pod对应的网络命名空间的网络配置信息,该接口依赖于CNI 0.4.0版本的check接口。
graph TD
A[GetPodNetworkStatus] --> B[forEachNetwork]
B --> C[checkNetwork]
C --> D{version >= 0.4.0}
D -->|yes| E[CheckNetworkList]
D -->|no| F[GetNetworkListCachedResult]
E --> F
subgraph getnetinfo
H(getContainerDetails) --> O[run nsenter]
O --> P[get ip info]
O --> Q[get mac info]
end
F --> H
H --> I(parse result)
Status 1 2 3 4 5 6 func (plugin *cniNetworkPlugin) Status () error { if plugin.getDefaultNetwork() == nil { return fmt.Errorf(errMissingDefaultNetwork, plugin.confDir) } return nil }
Status用于判断CNI模块是否正常,通过默认网络是否设置为判断标准。
SetUpPod flowchart TB
subgraph libcni
D[addToNetwork] --> E[AddNetworkList]
end
subgraph loopnet
B(setup loop) -->|2| C[LoopbackRuntimeConf]
B -->|3| D
end
A(SetUpPod) -->|1| loopnet
subgraph othernets
F(forEachNetwork) -->|6| D
F -->|7| G[record result]
end
A -->|4| H[pod lock]
H -->|5| othernets
othernets -->|8| I[pod unlock]
TearDownPod flowchart TB
subgraph libcni
D[deleteFromNetwork] --> E[DelNetworkList]
end
subgraph loopnet
B(teardown loop) -->|4| C[LoopbackRuntimeConf]
B -->|5| D
end
A(TearDownPod) -->|1| loopnet
subgraph othernets
F(forEachNetwork) -->|6| D
end
A -->|2| H[pod lock]
H -->|3| othernets
othernets -->|7| I[pod unlock]