描述
Typha:在节点数比较多的情况下,Felix 可通过 Typha 直接和 Etcd 进行数据交互,不通过 kube-apiserver,既降低其压力。(有待验证)
引入网上他人图片,恩画的非常好
- BGPClient(BIRD):把 Felix写入 kernel的路由信息分发到当前 Calico网络,确保 workload 间的通信
- BGPRoute Reflector(BIRD):大规模部署时使用,通过一个或者多个 BGPRoute Reflector 来完成集中式的路由分发;后端中有新的规则加入时,Route Reflector 就会将新的记录同步
1. TyphaDaemon New 函数
NewClientV3 建立客户端连接,看着像 etcd,接着看看怎么使用的,github.com/projectcalico/libcalico-go/lib/backend/client.go 中定义了 backend.NewClient,可以使用 etcd 与 kubernetes
ConfigureEarlyLogging 配置日志
func New() *TyphaDaemon {
return &TyphaDaemon{
NewClientV3: func(config apiconfig.CalicoAPIConfig) (DatastoreClient, error) {
client, err := clientv3.New(config)
if err != nil {
return nil, err
}
return ClientV3Shim{client.(RealClientV3), config}, nil
},
ConfigureEarlyLogging: logutils.ConfigureEarlyLogging,
ConfigureLogging: logutils.ConfigureLogging,
CachesBySyncerType: map[syncproto.SyncerType]syncserver.BreadcrumbProvider{},
}
}
2. InitializeAndServeForever 函数
DoEarlyRuntimeSetup 主要是前期设置日志格式以及日志级别,主要用于配置的日志记录
ParseCommandLineArgs 这个分析命令行参数,--config-file:/etc/calico/typha.cfg
[global] MetadataAddr = None LogFilePath = None LogSeverityFile = None
LoadConfiguration 这个函数内容挺多,pkg/config/config_params.go 中 结构体 Config,初始化使用了默认值,使用反射机制,字段非常多,如下一部分,从环境变量中 load 前缀为 typha 的 key,在从配置文件 /etc/calico/typha.cfg 中读取
DatastoreType string `config:"oneof(kubernetes,etcdv3);etcdv3;non-zero,die-on-fail"` EtcdAddr string `config:"authority;127.0.0.1:2379;local"` EtcdScheme string `config:"oneof(http,https);http;local"` EtcdKeyFile string `config:"file(must-exist);;local"` EtcdCertFile string `config:"file(must-exist);;local"` EtcdCaFile string `config:"file(must-exist);;local"` EtcdEndpoints []string `config:"endpoint-list;;local"`
func (t *TyphaDaemon) InitializeAndServeForever(cxt context.Context) error {
t.DoEarlyRuntimeSetup()
t.ParseCommandLineArgs(nil)
err := t.LoadConfiguration(cxt)
if err != nil { // Should only happen if context is canceled.
return err
}
t.CreateServer()
t.Start(cxt)
t.WaitAndShutDown(cxt)
return nil
}
2.1 CreateServer 函数
创建以及配置服务组件,就是不启动
FelixSyncerByIface 调用 felixsyncer.New 创建 felix syncer,实现在 github.com/projectcalico/libcalico-go/lib/backend/sycersv1/felixsyncer/felixsyncerv1.go, 定义了一大队 watchersyncer.ResourceType,实例化 watcherSyncer,实现在 github.com/projectcalico/libcalico-go/lib/backend/watchersyncer/watchersyncer.go 中,具体待定分析
BGPSyncerByIface 调用 bgpsyncer.New 创建 bgp syncer,实现在 github.com/projectcalico/libcalico-go/lib/backend/syncersyv1/bgpsyncer/bgpsyncer.go,同样实例化 watchersyncer
// CreateServer creates and configures (but does not start) the server components.
func (t *TyphaDaemon) CreateServer() {
// Health monitoring, for liveness and readiness endpoints.
t.healthAggregator = health.NewHealthAggregator()
// Now create the Syncer and caching layer (one pipeline for each syncer we support).
t.addSyncerPipeline(syncproto.SyncerTypeFelix, t.DatastoreClient.FelixSyncerByIface)
t.addSyncerPipeline(syncproto.SyncerTypeBGP, t.DatastoreClient.BGPSyncerByIface)
}
2.1.1 addSyncerPipeline 函数
calc.NewSyncerCallbacksDecoupler() 主要是将数据从 syncer 发给 validator,实现在 pkg/calc/async_decoupler.go
calc.NewSyncerCallbacksDecoupler 主要是将数据从 validator 到 cache
snapcache.New 创建 snapshot cache,具体在第 3 章节讲解
func (t *TyphaDaemon) addSyncerPipeline(
syncerType syncproto.SyncerType,
newSyncer func(callbacks bapi.SyncerCallbacks) bapi.Syncer,
) {
// Get a Syncer from the datastore, which will feed the validator layer with updates.
syncerToValidator := calc.NewSyncerCallbacksDecoupler()
syncer := newSyncer(syncerToValidator)
log.Debugf("Created Syncer: %#v", syncer)
// Create the validator, which sits between the syncer and the cache.
validatorToCache := calc.NewSyncerCallbacksDecoupler()
validator := calc.NewValidationFilter(validatorToCache)
// Create our snapshot cache, which stores point-in-time copies of the datastore contents.
cache := snapcache.New(snapcache.Config{
MaxBatchSize: t.ConfigParams.SnapshotCacheMaxBatchSize,
HealthAggregator: t.healthAggregator,
})
pipeline := &syncerPipeline{
Type: syncerType,
Syncer: syncer,
SyncerToValidator: syncerToValidator,
Validator: validator,
ValidatorToCache: validatorToCache,
Cache: cache,
}
t.SyncerPipelines = append(t.SyncerPipelines, pipeline)
t.CachesBySyncerType[syncerType] = cache
}
2.2 TyphaDaemon Start 函数
p.Cache.Start 第 3.1 章节讲解
func (p syncerPipeline) Start(cxt context.Context) {
logCxt := log.WithField("syncerType", p.Type)
logCxt.Info("Starting syncer")
p.Syncer.Start()
logCxt.Info("Starting syncer-to-validator decoupler")
go p.SyncerToValidator.SendTo(p.Validator)
logCxt.Info("Starting validator-to-cache decoupler")
go p.ValidatorToCache.SendTo(p.Cache)
logCxt.Info("Starting cache")
p.Cache.Start(cxt)
logCxt.Info("Started syncer pipeline")
}
2.2.1 从 syncer 发送给 validator 的 channel
func (a *SyncerCallbacksDecoupler) SendToContext(cxt context.Context, sink api.SyncerCallbacks) {
for {
select {
case obj := <-a.c:
switch obj := obj.(type) {
case api.SyncStatus:
sink.OnStatusUpdated(obj)
case []api.Update:
sink.OnUpdates(obj)
}
case <-cxt.Done():
logrus.WithError(cxt.Err()).Info("Context asked us to stop")
return
}
}
}
2.3 syncserver Start 函数
server 函数内容比较多,但是内容比较简单,处理请求,缓存连接 conn,最后调用 connection.handle 处理
func (s *Server) Start(cxt context.Context) {
s.Finished.Add(2)
go s.serve(cxt)
go s.governNumberOfConnections(cxt)
}
2.3.1 connection.handle 函数
doHandshake处理握手请求,类型 MsgClientHello
sendSnapshotAndUpdatesToClient 异步发送 snapshot 更新
sendPingsToClient 定期的 ping-pong 请求
2.4 backend 后端为 kubernetes
if t.ConfigParams.ConnectionRebalancingMode == "kubernetes" {
log.Info("Kubernetes connection rebalancing is enabled, starting k8s poll goroutine.")
k8sAPI := k8s.NewK8sAPI()
ticker := jitter.NewTicker(
t.ConfigParams.K8sServicePollIntervalSecs,
t.ConfigParams.K8sServicePollIntervalSecs/10)
go k8s.PollK8sForConnectionLimit(cxt, t.ConfigParams, ticker.C, k8sAPI, t.Server)
}
log.Info("Started the datastore Syncer/cache layer/server.")
3. snapcache New 函数
Ctrie 中的每个节点都有一个和它相关联的同伴节点,当进行快照时,root节点都会被拷贝到一个新的节点,当树中的节点被访问时,也会被惰性拷贝到新的节点(持久化数据结构),这样的快照操作是常数耗时的。
Ctrie 跟同步map或者跳跃表比起来,插入操作更耗时一些,因为寻址操作变多了。Ctrie真正的优势是内存消耗,跟大多的Hash表不同,它总是一系列在tree中的keys。另一个性能优势就是它可以在常量时间内完成线性快照。
func New(config Config) *Cache {
config.ApplyDefaults()
kvs := ctrie.New(nil /*default hash factory*/)
cond := sync.NewCond(&sync.Mutex{})
snap := &Breadcrumb{
Timestamp: time.Now(),
nextCond: cond,
KVs: kvs.ReadOnlySnapshot(),
}
c := &Cache{
config: config,
inputC: make(chan interface{}, config.MaxBatchSize*2),
breadcrumbCond: cond,
kvs: kvs,
currentBreadcrumb: (unsafe.Pointer)(snap),
wakeUpTicker: jitter.NewTicker(config.WakeUpInterval, config.WakeUpInterval/10),
healthTicks: time.NewTicker(healthInterval).C,
}
if config.HealthAggregator != nil {
config.HealthAggregator.RegisterReporter(healthName, &health.HealthReport{Live: true, Ready: true}, healthInterval*2)
}
c.reportHealth()
return c
}
3.1 Cache Start 函数
// Start starts the cache's main loop in a background goroutine.
func (c *Cache) Start(ctx context.Context) {
go c.loop(ctx)
}
func (c *Cache) loop(ctx context.Context) {
for {
// First, block, waiting for updates and batch them up in our pendingXXX fields.
// This will opportunistically slurp up a limited number of pending updates.
if err := c.fillBatchFromInputQueue(ctx); err != nil {
log.WithError(err).Error("Snapshot main loop exiting.")
return
}
// Then publish the updates in new Breadcrumb(s).
c.publishBreadcrumbs()
}
}
3.1.1 fillBatchFromInputQueue 函数
处理 Cache inputC channel,把类型为 update 的存入 pending,publishBreadCrumbs 处理 pending 的 update
// fillBatchFromInputQueue waits for some input on the input channel, then opportunistically
// pulls as much as possible from the channel. Input is stored in the pendingXXX fields for
// the next stage of processing.
func (c *Cache) fillBatchFromInputQueue(ctx context.Context) error {
batchSize := 0
storePendingUpdate := func(obj interface{}) {
log.Debug("Waiting for next input...")
select {
case obj := <-c.inputC:
log.WithField("update", obj).Debug("Got first update, peeking...")
storePendingUpdate(obj)
batchLoop:
for batchSize < c.config.MaxBatchSize {
select {
case obj = <-c.inputC:
storePendingUpdate(obj)
case <-ctx.Done():
log.WithError(ctx.Err()).Info("Context is done. Stopping.")
return ctx.Err()
default:
break batchLoop
}
}
log.WithField("numUpdates", batchSize).Debug("Finished reading batch.")
3.1.2 publishBreadcrumb
更新 master Ctrie 发布新的 Breadcrumb,其中包含只读 snapshot
4. watcherSyncer
watcherSyncer 结构体实现了 api.Syncer 接口,两个方法,Start() 和 Stop()
// watcherSyncer implements the api.Syncer interface.
type watcherSyncer struct {
status api.SyncStatus
watcherCaches []*watcherCache
results chan interface{}
numSynced int
callbacks api.SyncerCallbacks
wgwc *sync.WaitGroup
wgws *sync.WaitGroup
cancel context.CancelFunc
}
4.1 Start() 函数调用的 run 方法
run 函数实现了主要的逻辑 syncer,首先发送 wait-for-ready 状态,等待连接到 datastore。目前有两个 watcherSyncer,felix 与 bgp。
对每一种 syncer 注册的 resourceType,也就是 watcherCache 中处理 run,第 5.1 章节讲解
// run implements the main syncer loop that loops forever receiving watch events and translating
// to syncer updates.
func (ws *watcherSyncer) run(ctx context.Context) {
log.Debug("Sending initial status event and starting watchers")
ws.wgws.Add(1)
ws.sendStatusUpdate(api.WaitForDatastore)
for _, wc := range ws.watcherCaches {
ws.wgwc.Add(1)
go func(wc *watcherCache) {
wc.run(ctx)
log.Debug("Watcher cache run completed")
ws.wgwc.Done()
}(wc)
}
4.2 watcherSyncer 处理 channel 中 results
log.Info("Starting main event processing loop")
var updates []api.Update
for result := range ws.results {
// Process the data - this will append the data in subsequent calls, and action
// it if we hit a non-update event.
updates := ws.processResult(updates, result)
// Append results into the one update until we either flush the channel or we
// hit our fixed limit per update.
consolidatationloop:
for ii := 0; ii < maxUpdatesToConsolidate; ii++ {
select {
case next := <-ws.results:
updates = ws.processResult(updates, next)
default:
break consolidatationloop
}
}
// Perform final processing (pass in a nil result) before we loop and hit the blocking
// call again.
updates = ws.sendUpdates(updates)
}
4.3 processResult 函数处理 result channel
不采取立刻更新操作,分组发送 felix 更新,类型有 api.Update error api.SyncStatus
// Process a result from the result channel. We don't immediately action updates, but
// instead start grouping them together so that we can send a larger single update to
// Felix.
func (ws *watcherSyncer) processResult(updates []api.Update, result interface{}) []api.Update {
// Switch on the result type.
switch r := result.(type) {
case []api.Update:
// This is an update. If we don't have previous updates then also check to see
// if we need to shift the status into Resync.
// We append these updates to the previous if there were any.
if len(updates) == 0 && ws.status == api.WaitForDatastore {
ws.sendStatusUpdate(api.ResyncInProgress)
}
updates = append(updates, r...)
5. watcherCache
主要是 results channel,无类型,只接收 error,api.Update,api.SyncStatus
// The watcherCache provides watcher/syncer support for a single key type in the
// backend. These results are sent to the main WatcherSyncer on a buffered "results"
// channel. To ensure the order of events is received correctly by the main WatcherSyncer,
// we send all notification types in this channel. Note that because of this the results
// channel is untyped - however the watcherSyncer only expects one of the following
// types:
// - An error
// - An api.Update
// - A api.SyncStatus (only for the very first InSync notification)
type watcherCache struct {
logger *logrus.Entry
client api.Client
watch api.WatchInterface
resources map[string]cacheEntry
oldResources map[string]cacheEntry
results chan<- interface{}
hasSynced bool
errors int
resourceType ResourceType
currentWatchRevision string
}
5.1 watcherCache run 函数
// run creates the watcher and loops indefinitely reading from the watcher.
func (wc *watcherCache) run(ctx context.Context) {
wc.logger.Debug("Watcher cache starting, start initial sync processing")
wc.resyncAndCreateWatcher(ctx)
5.1.1 resyncAndCreateWatcher 函数,循环实现resync,直到成功完成了重新同步启动了watcher
// resyncAndCreateWatcher loops performing resync processing until it successfully
// completes a resync and starts a watcher.
func (wc *watcherCache) resyncAndCreateWatcher(ctx context.Context) {
// The passed in context allows a resync to be stopped mid-resync. The resync should be stopped as quickly as
// possible, but there should be usable data available in wc.resources so that delete events can be sent.
// The strategy is to
// - cancel any long running functions calls made from here, i.e. pass ctx to the client.list() calls
// - but if it finishes, then ensure that the listing gets processed.
// - cancel any sleeps if the context is cancelled
5.1.1.1 如果没有版本,那就实行全部同步
// If we don't have a currentWatchRevision then we need to perform a full resync.
performFullResync := wc.currentWatchRevision == ""
5.1.1.2 实行全部同步
syncer | resourcetype | 实现路径 | |
felix | configUpdateProcessor | github.com/projectcalico/libcalico-go/lib/backend/syncersv1/updateprocessors/configurationprocessor.go | |
felix | |||
UpdateProcessor.OnSyncerStarting 主要是开关功能
client.List 从后端获取该资源,本文使用的是 kubernetes 后端
handleWatchListEvent 对每一 KV,调用 UpdateProcessor的 Process 方法,对于符合的则调用 handleAddedOrModifiedUpdate 更新 KV,其实就是往 channel 塞值 wc.results <- []api.Update
if performFullResync {
// Notify the converter that we are resyncing.
if wc.resourceType.UpdateProcessor != nil {
wc.logger.Debug("Trigger converter resync notification")
wc.resourceType.UpdateProcessor.OnSyncerStarting()
}
// Start the sync by Listing the current resources.
l, err := wc.client.List(ctx, wc.resourceType.ListInterface, "")
// Once this point is reached, it's important not to drop out if the context is cancelled.
// Move the current resources over to the oldResources
wc.oldResources = wc.resources
wc.resources = make(map[string]cacheEntry, 0)
// Send updates for each of the resources we listed - this will revalidate entries in
// the oldResources map.
for _, kvp := range l.KVPairs {
wc.handleWatchListEvent(kvp)
}
// We've listed the current settings. Complete the sync by notifying the main WatcherSyncer
// go routine (if we haven't already) and by sending deletes for the old resources that were
// not acknowledged by the List. The oldResources will be empty after this call.
wc.finishResync()
// Store the current watch revision. This gets updated on any new add/modified event.
wc.currentWatchRevision = l.Revision
}
5.1.1.3 从现在的版本进行 watch 操作
// And now start watching from the revision returned by the List, or from a previous watch event
// (depending on whether we were performing a full resync).
w, err := wc.client.Watch(ctx, wc.resourceType.ListInterface, wc.currentWatchRevision)
5.1.2 从 watch channel 同步数据
类型包括 新增 修改 删除 以及错误,主要是靠 handleWatchListEvent 处理,比较容易看懂,就是新增的更新删除的 KV 处理到 results channel,由 watcherSyncer 处理
case event, ok := <-wc.watch.ResultChan():
if !ok {
// If the channel is closed then resync/recreate the watch.
wc.logger.Info("Watch channel closed by remote - recreate watcher")
wc.resyncAndCreateWatcher(ctx)
continue
}
wc.logger.WithField("RC", wc.watch.ResultChan()).Debug("Reading event from results channel")
// Handle the specific event type.
switch event.Type {
case api.WatchAdded, api.WatchModified:
总结:
typha 启动从参数,环境变量,文件读取配置参数,创建后端连接(kubernetes / etcd)
注册了两个 pipeline,创建 felix bgp syncer,每一个都有注册了 resourceType
watcherCache,对 resource 进行同步,list watch,扔到 channel results
watcherSyncer 处理 channel results