InfluxDB TSM存储引擎初探之东啃西啃

目录

进程简介

源码分析

Monitor init

MetaClient init

TSDBstore init

TCP复用器

HTTPD

开启TSDBstore服务

Engine和Cache

结构关系简图

HTTP service

Memory store

结尾



进程简介

influx --------------  命令行客户端

influx_inspect---- 查看工具

influx_stress------ 压力测试工具

influx_tsm--------- 数据库转换工具

influxd-------------- influxdb服务进程

想研究的TSM就在influxd进程里了,想要直接找到它可不容易,先从influxd启动服务开始找,相信总能找到入口的。

influx基本概念这里就不提了,贴上两个链接:

https://blog.csdn.net/suzy1030/article/details/81458237

https://blog.csdn.net/suzy1030/article/details/81459029

源码分析

直接看代码cmd.Run开始, 挑出一些主要服务来看

首先是配置和环境参数获取

然后根据配置和环境参数NewServer

Monitor init

初始化Monitor, 把配置里这一堆参数指针存进monitor.diagRegistrations map中,Monitor会监控这些参数的变化。.

func (c *Config) diagnosticsClients() map[string]diagnostics.Client {
	// Config settings that are always present.
	m := map[string]diagnostics.Client{
		"config": c,

		"config-data":        c.Data,
		"config-meta":        c.Meta,
		"config-coordinator": c.Coordinator,
		"config-retention":   c.Retention,
		"config-precreator":  c.Precreator,

		"config-monitor":    c.Monitor,
		"config-subscriber": c.Subscriber,
		"config-httpd":      c.HTTPD,

		"config-cqs": c.ContinuousQuery,
	}

	// Config settings that can be repeated and can be disabled.
	if g := graphite.Configs(c.GraphiteInputs); g.Enabled() {
		m["config-graphite"] = g
	}
	if cc := collectd.Configs(c.CollectdInputs); cc.Enabled() {
		m["config-collectd"] = cc
	}
	if t := opentsdb.Configs(c.OpenTSDBInputs); t.Enabled() {
		m["config-opentsdb"] = t
	}
	if u := udp.Configs(c.UDPInputs); u.Enabled() {
		m["config-udp"] = u
	}

	return m
}

MetaClient init

初始化MetaClient, New一张用户表authCache用于缓存通过认证的用户

	return &Client{
		cacheData: &Data{
			ClusterID: uint64(rand.Int63()),
			Index:     1,
		},
		closing:             make(chan struct{}),
		changed:             make(chan struct{}),
		logger:              zap.NewNop(),
		authCache:           make(map[string]authUser),
		path:                config.Dir,
		retentionAutoCreate: config.RetentionAutoCreate,
	}

MataClient load meta.db,并反序列化meta.db内容至缓存,meta.db保存数据库信息,策略信息,用户信息。

func (c *Client) Load() error {
	file := filepath.Join(c.path, metaFile)

	f, err := os.Open(file)
	if err != nil {
		if os.IsNotExist(err) {
			return nil
		}
		return err
	}
	defer f.Close()

	data, err := ioutil.ReadAll(f)
	if err != nil {
		return err
	}

	if err := c.cacheData.UnmarshalBinary(data); err != nil {
		return err
	}
	return nil
}

TSDBstore init

初始化 tsdb_store, 其中包含databases, seriesfiles的表,后续写入和读取会要到这里查找。

func NewStore(path string) *Store {
	logger := zap.NewNop()
	return &Store{
		databases:           make(map[string]struct{}),
		path:                path,
		sfiles:              make(map[string]*SeriesFile),
		indexes:             make(map[string]interface{}),
		pendingShardDeletes: make(map[uint64]struct{}),
		EngineOptions:       NewEngineOptions(),
		Logger:              logger,
		baseLogger:          logger,
	}
}

其中,初始化EngineOptions 配置,WALEnabled为true,所以WAL文件肯定会要写的。

OpenLimiter是一个channel, 缓冲个数为CPU核数

func NewEngineOptions() EngineOptions {
	return EngineOptions{
		EngineVersion: DefaultEngine,
		IndexVersion:  DefaultIndex,
		Config:        NewConfig(),
		WALEnabled:    true,
		OpenLimiter:   limiter.NewFixed(runtime.GOMAXPROCS(0)),
	}
}

初始化Subscriber service, points writer, query executor 略过。

NewServer就此结束,下一步是Run 这个Server

TCP复用器

首先建立一个TCP的复用器,初始化一个listener的map,后面会根据不同的tcp请求使用不同的listener的handler.

func NewMux() *Mux {
	return &Mux{
		m:       make(map[byte]*listener),
		Timeout: DefaultTimeout,
		Logger:  log.New(os.Stderr, "[tcp] ", log.LstdFlags),
	}
}

开一个线程,启动该复用器的TCP监听服务,SnapshotterService向复用器注册了一个listener, 目前为止只发现这一个服务向TCP复用器注册。

	// Multiplex listener.
	mux := tcp.NewMux()
	go mux.Serve(ln)

    ...

    s.SnapshotterService.Listener = mux.Listen(snapshotter.MuxHeader)

HTTPD

初始化一组服务,并把它们添加到Server.Services数组里,最关心的就是http服务了,因为数据库访问就靠它。

	s.appendMonitorService()
	s.appendPrecreatorService(s.config.Precreator)
	s.appendSnapshotterService()
	s.appendContinuousQueryService(s.config.ContinuousQuery)
	s.appendHTTPDService(s.config.HTTPD)
	s.appendStorageService(s.config.Storage)
	s.appendRetentionPolicyService(s.config.Retention)

http服务初始化 

func NewService(c Config) *Service {
	s := &Service{
		addr:           c.BindAddress,
		https:          c.HTTPSEnabled,
		cert:           c.HTTPSCertificate,
		key:            c.HTTPSPrivateKey,
		limit:          c.MaxConnectionLimit,
		tlsConfig:      c.TLS,
		err:            make(chan error),
		unixSocket:     c.UnixSocketEnabled,
		unixSocketPerm: uint32(c.UnixSocketPermissions),
		bindSocket:     c.BindSocket,
		Handler:        NewHandler(c),
		Logger:         zap.NewNop(),
	}
	if s.tlsConfig == nil {
		s.tlsConfig = new(tls.Config)
	}
	if s.key == "" {
		s.key = s.cert
	}
	if c.UnixSocketGroup != nil {
		s.unixSocketGroup = int(*c.UnixSocketGroup)
	}
	s.Handler.Logger = s.Logger
	return s
}

其中的handler 包含所有数据请求处理的入口, 终于找到这些入口了,离TSM引擎越来越近了。

func NewHandler(c Config) *Handler {
	h := &Handler{
		mux:            pat.New(),
		Config:         &c,
		Logger:         zap.NewNop(),
		CLFLogger:      log.New(os.Stderr, "[httpd] ", 0),
		Store:          storage.NewStore(),
		stats:          &Statistics{},
		requestTracker: NewRequestTracker(),
	}

    ...

	h.AddRoutes([]Route{
		Route{
			"query-options", // Satisfy CORS checks.
			"OPTIONS", "/query", false, true, h.serveOptions,
		},
		Route{
			"query", // Query serving route.
			"GET", "/query", true, true, h.serveQuery,
		},
		Route{
			"query", // Query serving route.
			"POST", "/query", true, true, h.serveQuery,
		},
		Route{
			"write-options", // Satisfy CORS checks.
			"OPTIONS", "/write", false, true, h.serveOptions,
		},
		Route{
			"write", // Data-ingest route.
			"POST", "/write", true, writeLogEnabled, h.serveWrite,
		},
		Route{
			"prometheus-write", // Prometheus remote write
			"POST", "/api/v1/prom/write", false, true, h.servePromWrite,
		},
		Route{
			"prometheus-read", // Prometheus remote read
			"POST", "/api/v1/prom/read", true, true, h.servePromRead,
		},
		Route{ // Ping
			"ping",
			"GET", "/ping", false, true, h.servePing,
		},
		Route{ // Ping
			"ping-head",
			"HEAD", "/ping", false, true, h.servePing,
		},
		Route{ // Ping w/ status
			"status",
			"GET", "/status", false, true, h.serveStatus,
		},
		Route{ // Ping w/ status
			"status-head",
			"HEAD", "/status", false, true, h.serveStatus,
		},
		Route{
			"prometheus-metrics",
			"GET", "/metrics", false, true, promhttp.Handler().ServeHTTP,
		},
	}...)
	return h
}

Http 服务新建一个tcp server, 与前面的 复用器无关。绑定其handler,开新线程监听http请求。

当有新请求来时,将为每一个新连接开启一个新线程(goroutine),并调用其handler去处理这些请求。

func (srv *Server) Serve(l net.Listener) error {
	defer l.Close()
	if fn := testHookServerServe; fn != nil {
		fn(srv, l)
	}
	var tempDelay time.Duration // how long to sleep on accept failure

	if err := srv.setupHTTP2_Serve(); err != nil {
		return err
	}

	srv.trackListener(l, true)
	defer srv.trackListener(l, false)

	baseCtx := context.Background() // base is always background, per Issue 16220
	ctx := context.WithValue(baseCtx, ServerContextKey, srv)
	for {
		rw, e := l.Accept()
		if e != nil {
			select {
			case <-srv.getDoneChan():
				return ErrServerClosed
			default:
			}
			if ne, ok := e.(net.Error); ok && ne.Temporary() {
				if tempDelay == 0 {
					tempDelay = 5 * time.Millisecond
				} else {
					tempDelay *= 2
				}
				if max := 1 * time.Second; tempDelay > max {
					tempDelay = max
				}
				srv.logf("http: Accept error: %v; retrying in %v", e, tempDelay)
				time.Sleep(tempDelay)
				continue
			}
			return e
		}
		tempDelay = 0
		c := srv.newConn(rw)
		c.setState(c.rwc, StateNew) // before Serve can return
		go c.serve(ctx)
	}
}

开启TSDBstore服务

Load serires file, database index.再load retention policy

		// Load series file.
		sfile, err := s.openSeriesFile(db.Name())
		if err != nil {
			return err
		}

		// Retrieve database index.
		idx, err := s.createIndexIfNotExists(db.Name())
		if err != nil {
			return err
		}

		// Load each retention policy within the database directory.
		rpDirs, err := ioutil.ReadDir(dbPath)
		if err != nil {
			return err
		}

最后load shard(tsm文件),具体load哪些内容还不清楚,猜想大概有 index block, timestamp

	// Open engine.
	shard := NewShard(shardID, path, walPath, sfile, opt)

	// Disable compactions, writes and queries until all shards are loaded
	shard.EnableOnOpen = false
	shard.WithLogger(s.baseLogger)

	err = shard.Open()
	if err != nil {
		log.Info("Failed to open shard", logger.Shard(shardID), zap.Error(err))
		resC <- &res{err: fmt.Errorf("Failed to open shard: %d: %s", shardID, err)}
		return
	}

并为每一个shard开启一个engine, engine的结构和启动将继续研究,这也是TSM的关键点

func (s *Shard) Open() error {
        
        ...

		// Initialize underlying engine.
		e, err := NewEngine(s.id, idx, s.path, s.walPath, s.sfile, s.options)
		if err != nil {
			return err
		}

		// Set log output on the engine.
		e.WithLogger(s.baseLogger)

		// Disable compactions while loading the index
		e.SetEnabled(false)

		// Open engine.
		if err := e.Open(); err != nil {
			return err
		}
        ...

}

Engine和Cache

每一个shard独有一个Engine, Engine的结构里面 存储数据的是Cache.

Compactor会按照Cache大小or时间,将数据写入文件

type Engine struct {

	...

    WAL            *WAL
	Cache          *Cache
	Compactor      *Compactor
	CompactionPlan CompactionPlanner
	FileStore      *FileStore
    
    ...
	
    // provides access to the total set of series IDs
	seriesIDSets tsdb.SeriesIDSets

	// seriesTypeMap maps a series key to field type
	seriesTypeMap *radix.Tree
}

Cache结构里的storer 存储value(points), 而store本身的结构是一组interface, 包括写入和读取value和entry.但是还没有找到这些接口的实例化,大概可以认为这些entry是一个map,由key去write/get, entry本身是一组具有相同key的value的集合。

type Cache struct {
	// Due to a bug in atomic  size needs to be the first word in the struct, as
	// that's the only place where you're guaranteed to be 64-bit aligned on a
	// 32 bit system. See: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
	size         uint64
	snapshotSize uint64

	mu      sync.RWMutex
	store   storer
	maxSize uint64

	// snapshots are the cache objects that are currently being written to tsm files
	// they're kept in memory while flushing so they can be queried along with the cache.
	// they are read only and should never be modified
	snapshot     *Cache
	snapshotting bool

	// This number is the number of pending or failed WriteSnaphot attempts since the last successful one.
	snapshotAttempts int

	stats         *CacheStatistics
	lastSnapshot  time.Time
	lastWriteTime time.Time

	// A one time synchronization used to initial the cache with a store.  Since the store can allocate a
	// a large amount memory across shards, we lazily create it.
	initialize       atomic.Value
	initializedCount uint32
}

type storer interface {
	entry(key []byte) *entry                        // Get an entry by its key.
	write(key []byte, values Values) (bool, error)  // Write an entry to the store.
	add(key []byte, entry *entry)                   // Add a new entry to the store.
	remove(key []byte)                              // Remove an entry from the store.
	keys(sorted bool) [][]byte                      // Return an optionally sorted slice of entry keys.
	apply(f func([]byte, *entry) error) error       // Apply f to all entries in the store in parallel.
	applySerial(f func([]byte, *entry) error) error // Apply f to all entries in serial.
	reset()                                         // Reset the store to an initial unused state.
	split(n int) []storer                           // Split splits the store into n stores
	count() int                                     // Count returns the number of keys in the store
}

结构关系简图

HTTP service

 

Memory store

 

结尾

这篇初探先写到这里,后面将研究 TSM文件格式,以及这个格式和内存中的数据的对应关系(measurement, series, points)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Docker InfluxDB 2.0 是 InfluxDB 数据库的最新版本,相比之前的版本,在性能方面有一些显著的提升。 首先,Docker InfluxDB 2.0 使用了新的存储引擎 TSM(Time Structured Merge),该引擎可以更高效地存储和处理时间序列数据。TSM 存储引擎能够快速地将数据写入和读取出来,大大提升了写入和查询的性能。 其次,Docker InfluxDB 2.0 改进了查询引擎,采用了新的 Flux 查询语言。Flux 查询语言具有更丰富的功能和更高的灵活性,可以更好地处理复杂的查询需求。同时,新的查询引擎通过优化查询计划,减少了查询的执行时间,提高了查询的性能。 第三,Docker InfluxDB 2.0 增加了自动数据压缩的功能。使用数据压缩可以减小存储空间,减少磁盘的占用量。这样可以节省成本,并且减少了数据读写操作所需的时间,提升了整体性能。 此外,Docker InfluxDB 2.0 还引入了更好的高可用性和扩展性。它支持集群部署,可以将数据分布在多个节点上,提高了系统的可靠性和容错能力。同时,它还支持水平扩展,可以根据需要增加节点数量,以应对大规模的数据处理需求。 总之,Docker InfluxDB 2.0 在存储引擎、查询引擎、数据压缩和可扩展性等方面都进行了优化,从而在性能上有了显著的提升。它能够更高效地存储和处理时间序列数据,提供快速的数据查询和分析能力,适用于各种规模和复杂度的应用场景。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值