InfluxDB TSM存储引擎初探之东啃西啃

最新推荐文章于 2023-04-07 23:24:50 发布

小湿哥

最新推荐文章于 2023-04-07 23:24:50 发布

阅读量958

点赞数

分类专栏：学习笔记文章标签： influxdb tsm 启动

本文链接：https://blog.csdn.net/jacicson1987/article/details/81986234

版权

学习笔记专栏收录该内容

14 篇文章 1 订阅

订阅专栏

进程简介

influx -------------- 命令行客户端

influx_inspect---- 查看工具

influx_stress------ 压力测试工具

influx_tsm--------- 数据库转换工具

influxd-------------- influxdb服务进程

想研究的TSM就在influxd进程里了，想要直接找到它可不容易，先从influxd启动服务开始找，相信总能找到入口的。

influx基本概念这里就不提了，贴上两个链接：

https://blog.csdn.net/suzy1030/article/details/81458237

https://blog.csdn.net/suzy1030/article/details/81459029

源码分析

直接看代码cmd.Run开始, 挑出一些主要服务来看

首先是配置和环境参数获取

然后根据配置和环境参数NewServer

Monitor init

初始化Monitor, 把配置里这一堆参数指针存进monitor.diagRegistrations map中，Monitor会监控这些参数的变化。.

func (c *Config) diagnosticsClients() map[string]diagnostics.Client {
	// Config settings that are always present.
	m := map[string]diagnostics.Client{
		"config": c,

		"config-data":        c.Data,
		"config-meta":        c.Meta,
		"config-coordinator": c.Coordinator,
		"config-retention":   c.Retention,
		"config-precreator":  c.Precreator,

		"config-monitor":    c.Monitor,
		"config-subscriber": c.Subscriber,
		"config-httpd":      c.HTTPD,

		"config-cqs": c.ContinuousQuery,
	}

	// Config settings that can be repeated and can be disabled.
	if g := graphite.Configs(c.GraphiteInputs); g.Enabled() {
		m["config-graphite"] = g
	}
	if cc := collectd.Configs(c.CollectdInputs); cc.Enabled() {
		m["config-collectd"] = cc
	}
	if t := opentsdb.Configs(c.OpenTSDBInputs); t.Enabled() {
		m["config-opentsdb"] = t
	}
	if u := udp.Configs(c.UDPInputs); u.Enabled() {
		m["config-udp"] = u
	}

	return m
}

MetaClient init

初始化MetaClient, New一张用户表authCache用于缓存通过认证的用户

	return &Client{
		cacheData: &Data{
			ClusterID: uint64(rand.Int63()),
			Index:     1,
		},
		closing:             make(chan struct{}),
		changed:             make(chan struct{}),
		logger:              zap.NewNop(),
		authCache:           make(map[string]authUser),
		path:                config.Dir,
		retentionAutoCreate: config.RetentionAutoCreate,
	}

MataClient load meta.db，并反序列化meta.db内容至缓存，meta.db保存数据库信息，策略信息，用户信息。

func (c *Client) Load() error {
	file := filepath.Join(c.path, metaFile)

	f, err := os.Open(file)
	if err != nil {
		if os.IsNotExist(err) {
			return nil
		}
		return err
	}
	defer f.Close()

	data, err := ioutil.ReadAll(f)
	if err != nil {
		return err
	}

	if err := c.cacheData.UnmarshalBinary(data); err != nil {
		return err
	}
	return nil
}

TSDBstore init

初始化 tsdb_store, 其中包含databases, seriesfiles的表，后续写入和读取会要到这里查找。

func NewStore(path string) *Store {
	logger := zap.NewNop()
	return &Store{
		databases:           make(map[string]struct{}),
		path:                path,
		sfiles:              make(map[string]*SeriesFile),
		indexes:             make(map[string]interface{}),
		pendingShardDeletes: make(map[uint64]struct{}),
		EngineOptions:       NewEngineOptions(),
		Logger:              logger,
		baseLogger:          logger,
	}
}

其中，初始化EngineOptions 配置，WALEnabled为true，所以WAL文件肯定会要写的。

OpenLimiter是一个channel, 缓冲个数为CPU核数

func NewEngineOptions() EngineOptions {
	return EngineOptions{
		EngineVersion: DefaultEngine,
		IndexVersion:  DefaultIndex,
		Config:        NewConfig(),
		WALEnabled:    true,
		OpenLimiter:   limiter.NewFixed(runtime.GOMAXPROCS(0)),
	}
}

初始化Subscriber service, points writer, query executor 略过。

NewServer就此结束，下一步是Run 这个Server

TCP复用器

首先建立一个TCP的复用器，初始化一个listener的map，后面会根据不同的tcp请求使用不同的listener的handler.

func NewMux() *Mux {
	return &Mux{
		m:       make(map[byte]*listener),
		Timeout: DefaultTimeout,
		Logger:  log.New(os.Stderr, "[tcp] ", log.LstdFlags),
	}
}

开一个线程，启动该复用器的TCP监听服务，SnapshotterService向复用器注册了一个listener, 目前为止只发现这一个服务向TCP复用器注册。

	// Multiplex listener.
	mux := tcp.NewMux()
	go mux.Serve(ln)

    ...

    s.SnapshotterService.Listener = mux.Listen(snapshotter.MuxHeader)

HTTPD

初始化一组服务，并把它们添加到Server.Services数组里，最关心的就是http服务了，因为数据库访问就靠它。

	s.appendMonitorService()
	s.appendPrecreatorService(s.config.Precreator)
	s.appendSnapshotterService()
	s.appendContinuousQueryService(s.config.ContinuousQuery)
	s.appendHTTPDService(s.config.HTTPD)
	s.appendStorageService(s.config.Storage)
	s.appendRetentionPolicyService(s.config.Retention)

http服务初始化

func NewService(c Config) *Service {
	s := &Service{
		addr:           c.BindAddress,
		https:          c.HTTPSEnabled,
		cert:           c.HTTPSCertificate,
		key:            c.HTTPSPrivateKey,
		limit:          c.MaxConnectionLimit,
		tlsConfig:      c.TLS,
		err:            make(chan error),
		unixSocket:     c.UnixSocketEnabled,
		unixSocketPerm: uint32(c.UnixSocketPermissions),
		bindSocket:     c.BindSocket,
		Handler:        NewHandler(c),
		Logger:         zap.NewNop(),
	}
	if s.tlsConfig == nil {
		s.tlsConfig = new(tls.Config)
	}
	if s.key == "" {
		s.key = s.cert
	}
	if c.UnixSocketGroup != nil {
		s.unixSocketGroup = int(*c.UnixSocketGroup)
	}
	s.Handler.Logger = s.Logger
	return s
}

其中的handler 包含所有数据请求处理的入口, 终于找到这些入口了，离TSM引擎越来越近了。

func NewHandler(c Config) *Handler {
	h := &Handler{
		mux:            pat.New(),
		Config:         &c,
		Logger:         zap.NewNop(),
		CLFLogger:      log.New(os.Stderr, "[httpd] ", 0),
		Store:          storage.NewStore(),
		stats:          &Statistics{},
		requestTracker: NewRequestTracker(),
	}

    ...

	h.AddRoutes([]Route{
		Route{
			"query-options", // Satisfy CORS checks.
			"OPTIONS", "/query", false, true, h.serveOptions,
		},
		Route{
			"query", // Query serving route.
			"GET", "/query", true, true, h.serveQuery,
		},
		Route{
			"query", // Query serving route.
			"POST", "/query", true, true, h.serveQuery,
		},
		Route{
			"write-options", // Satisfy CORS checks.
			"OPTIONS", "/write", false, true, h.serveOptions,
		},
		Route{
			"write", // Data-ingest route.
			"POST", "/write", true, writeLogEnabled, h.serveWrite,
		},
		Route{
			"prometheus-write", // Prometheus remote write
			"POST", "/api/v1/prom/write", false, true, h.servePromWrite,
		},
		Route{
			"prometheus-read", // Prometheus remote read
			"POST", "/api/v1/prom/read", true, true, h.servePromRead,
		},
		Route{ // Ping
			"ping",
			"GET", "/ping", false, true, h.servePing,
		},
		Route{ // Ping
			"ping-head",
			"HEAD", "/ping", false, true, h.servePing,
		},
		Route{ // Ping w/ status
			"status",
			"GET", "/status", false, true, h.serveStatus,
		},
		Route{ // Ping w/ status
			"status-head",
			"HEAD", "/status", false, true, h.serveStatus,
		},
		Route{
			"prometheus-metrics",
			"GET", "/metrics", false, true, promhttp.Handler().ServeHTTP,
		},
	}...)
	return h
}

Http 服务新建一个tcp server, 与前面的复用器无关。绑定其handler，开新线程监听http请求。

当有新请求来时，将为每一个新连接开启一个新线程(goroutine)，并调用其handler去处理这些请求。

func (srv *Server) Serve(l net.Listener) error {
	defer l.Close()
	if fn := testHookServerServe; fn != nil {
		fn(srv, l)
	}
	var tempDelay time.Duration // how long to sleep on accept failure

	if err := srv.setupHTTP2_Serve(); err != nil {
		return err
	}

	srv.trackListener(l, true)
	defer srv.trackListener(l, false)

	baseCtx := context.Background() // base is always background, per Issue 16220
	ctx := context.WithValue(baseCtx, ServerContextKey, srv)
	for {
		rw, e := l.Accept()
		if e != nil {
			select {
			case <-srv.getDoneChan():
				return ErrServerClosed
			default:
			}
			if ne, ok := e.(net.Error); ok && ne.Temporary() {
				if tempDelay == 0 {
					tempDelay = 5 * time.Millisecond
				} else {
					tempDelay *= 2
				}
				if max := 1 * time.Second; tempDelay > max {
					tempDelay = max
				}
				srv.logf("http: Accept error: %v; retrying in %v", e, tempDelay)
				time.Sleep(tempDelay)
				continue
			}
			return e
		}
		tempDelay = 0
		c := srv.newConn(rw)
		c.setState(c.rwc, StateNew) // before Serve can return
		go c.serve(ctx)
	}
}

开启TSDBstore服务

Load serires file, database index.再load retention policy

		// Load series file.
		sfile, err := s.openSeriesFile(db.Name())
		if err != nil {
			return err
		}

		// Retrieve database index.
		idx, err := s.createIndexIfNotExists(db.Name())
		if err != nil {
			return err
		}

		// Load each retention policy within the database directory.
		rpDirs, err := ioutil.ReadDir(dbPath)
		if err != nil {
			return err
		}

最后load shard(tsm文件)，具体load哪些内容还不清楚，猜想大概有 index block, timestamp

	// Open engine.
	shard := NewShard(shardID, path, walPath, sfile, opt)

	// Disable compactions, writes and queries until all shards are loaded
	shard.EnableOnOpen = false
	shard.WithLogger(s.baseLogger)

	err = shard.Open()
	if err != nil {
		log.Info("Failed to open shard", logger.Shard(shardID), zap.Error(err))
		resC <- &res{err: fmt.Errorf("Failed to open shard: %d: %s", shardID, err)}
		return
	}

并为每一个shard开启一个engine, engine的结构和启动将继续研究，这也是TSM的关键点

func (s *Shard) Open() error {
        
        ...

		// Initialize underlying engine.
		e, err := NewEngine(s.id, idx, s.path, s.walPath, s.sfile, s.options)
		if err != nil {
			return err
		}

		// Set log output on the engine.
		e.WithLogger(s.baseLogger)

		// Disable compactions while loading the index
		e.SetEnabled(false)

		// Open engine.
		if err := e.Open(); err != nil {
			return err
		}
        ...

}

Engine和Cache

每一个shard独有一个Engine, Engine的结构里面存储数据的是Cache.

Compactor会按照Cache大小or时间，将数据写入文件

type Engine struct {

	...

    WAL            *WAL
	Cache          *Cache
	Compactor      *Compactor
	CompactionPlan CompactionPlanner
	FileStore      *FileStore
    
    ...
	
    // provides access to the total set of series IDs
	seriesIDSets tsdb.SeriesIDSets

	// seriesTypeMap maps a series key to field type
	seriesTypeMap *radix.Tree
}

Cache结构里的storer 存储value(points), 而store本身的结构是一组interface, 包括写入和读取value和entry.但是还没有找到这些接口的实例化，大概可以认为这些entry是一个map，由key去write/get, entry本身是一组具有相同key的value的集合。

type Cache struct {
	// Due to a bug in atomic  size needs to be the first word in the struct, as
	// that's the only place where you're guaranteed to be 64-bit aligned on a
	// 32 bit system. See: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
	size         uint64
	snapshotSize uint64

	mu      sync.RWMutex
	store   storer
	maxSize uint64

	// snapshots are the cache objects that are currently being written to tsm files
	// they're kept in memory while flushing so they can be queried along with the cache.
	// they are read only and should never be modified
	snapshot     *Cache
	snapshotting bool

	// This number is the number of pending or failed WriteSnaphot attempts since the last successful one.
	snapshotAttempts int

	stats         *CacheStatistics
	lastSnapshot  time.Time
	lastWriteTime time.Time

	// A one time synchronization used to initial the cache with a store.  Since the store can allocate a
	// a large amount memory across shards, we lazily create it.
	initialize       atomic.Value
	initializedCount uint32
}

type storer interface {
	entry(key []byte) *entry                        // Get an entry by its key.
	write(key []byte, values Values) (bool, error)  // Write an entry to the store.
	add(key []byte, entry *entry)                   // Add a new entry to the store.
	remove(key []byte)                              // Remove an entry from the store.
	keys(sorted bool) [][]byte                      // Return an optionally sorted slice of entry keys.
	apply(f func([]byte, *entry) error) error       // Apply f to all entries in the store in parallel.
	applySerial(f func([]byte, *entry) error) error // Apply f to all entries in serial.
	reset()                                         // Reset the store to an initial unused state.
	split(n int) []storer                           // Split splits the store into n stores
	count() int                                     // Count returns the number of keys in the store
}