上一篇转载的文章非常经典,接下来补充几点
md.Run -> NewServer -> s.Open() -> s.MetaClient.Open -> s.TSDBStore.Open() -> service.Open -> s.TSDBStore.Open -> loadShards -> (s *Shard) Open() ->NewEngine
shareopen会创建一个很重要的数据结构
// NewShardIndex returns a new index for a shard.
func NewShardIndex(id uint64, database, path string, opt tsdb.EngineOptions) tsdb.Index {
return &ShardIndex{
Index: opt.InmemIndex.(*Index),
id: id,
opt: opt,
}
}
注册:
tsdb.RegisterIndex(IndexName, func(id uint64, database, path string, opt tsdb.EngineOptions) tsdb.Index {
return NewShardIndex(id, database, path, opt)
})
通过idx, err := NewIndex(s.id, s.database, ipath, s.options)创建和下面的Index区别是这个是单个shard的索引,下面是集合索引
每个share会新建一个engine
NewServer{
//用来记录shard分组信息等,根据时间查询的时候先定位shard(MetaClient.ShardGroupsByTimeRange)
MetaClient: meta.NewClient(c.Meta),
s.TSDBStore = tsdb.NewStore(c.Data.Dir)
}
这个结构体下面的Databases包含RetentionPolicies下包含所有shard信息,shard信息包含时间段,所以查询的时候根据database查找shard的时间段筛选
// Data represents the top level collection of all metadata.
type Data struct {
Term uint64 // associated raft term
Index uint64 // associated raft index
ClusterID uint64
Databases []DatabaseInfo
Users []UserInfo
// adminUserExists provides a constant time mechanism for determining
// if there is at least one admin user.
adminUserExists bool
MaxShardGroupID uint64
MaxShardID uint64
}
还有个比较重要的数据结构,其中的属性series很重要,获取serieskey后可以从中获取series对象
type Index struct {
mu sync.RWMutex
database string
// In-memory metadata index, built on load and updated when new series come in
measurements map[string]*Measurement // measurement name to object and index
series map[string]*Series // map series key to the Series object
lastID uint64 // last used series ID. They're in memory only for this shard
seriesSketch, seriesTSSketch *hll.Plus
measurementsSketch, measurementsTSSketch *hll.Plus
}
表结构里面存放的是根据tag的key与value的seriesids,通过ids到上面结构查找series对象
type Measurement struct {
database string
Name string `json:"name,omitempty"`
name []byte // cached version as []byte
mu sync.RWMutex
fieldNames map[string]struct{}
// in-memory index fields
seriesByID map[uint64]*Series // lookup table for series by their id
seriesByTagKeyValue map[string]map[string]SeriesIDs // map from tag key to value to sorted set of series ids
// lazyily created sorted series IDs
sortedSeriesIDs SeriesIDs // sorted list of series IDs in this measurement
}
---------------------------------------------------------------------------------------------------
//遍历data目录,把所有的db文件遍历出来
for _, db := range dbDirs {
//遍历策略文件夹
for _, rp := range rpDirs {
//遍历share文件夹
// Shard file names are numeric shardIDs
for _, sh := range shardDirs {
shard := NewShard(shardID, path, walPath, opt)
err = shard.Open()
}
}
}
shard.open{
//生成索引数据结构
idx, err := NewIndex(s.id, s.database, ipath, s.options)
// Open engine,生成filestore数据结构,读入wal文件生成cache
//(e *Engine) Open()其中会执行 e.FileStore.Open打开tsm文件
if err := e.Open(); err != nil {
return err
}
//loads the shard metadata into memory
LoadMetadataIndex(shardID uint64, index tsdb.Index)
}
eginee.open{
//把tsm文件读入后生成index索引文件内存
t.accessor = &mmapAccessor{
f: f,
}
index, err := t.accessor.init() -> (m *mmapAccessor) init()
}
LoadMetadataIndex{
//生成series结构
//初始化也就是把索引文件读入,这里注意的是索引文件的key有三部分组成,measument + tags + fieldName,所以表是从索引里面获取的
InitializeSeries(key, name []byte, tags models.Tags)
}
// addToIndexFromKey will pull the measurement name, series key, and field name from a composite key and add it to the
// database index and measurement fields
func (e *Engine) addToIndexFromKey(key []byte, fieldType influxql.DataType) error {
seriesKey, field := SeriesAndFieldFromCompositeKey(key)
name := tsdb.MeasurementFromSeriesKey(seriesKey)
//专门存放表的字段
mf := e.fieldset.CreateFieldsIfNotExists(name)
if err := mf.CreateFieldIfNotExists(field, fieldType, false); err != nil {
return err
}
// Build in-memory index, if necessary.
if e.index.Type() == inmem.IndexName {
tags, _ := models.ParseTags(seriesKey)
//生成索引
if err := e.index.InitializeSeries(seriesKey, name, tags); err != nil {
return err
}
}
return nil
}
InitializeSeries{
func (i *Index) CreateSeriesIfNotExists
//where查询通过此结构获取serieids
m.seriesByTagKeyValue[string(t.Key)] = valueMap
}
查询:
// TagValues returns the tag keys and values in the given database, matching the condition.
ExecuteStatement(executeSelectStatement) -> createIterators (e.ShardMapper.MapShards -> ShardGroupsByTimeRange(先根据时间判断在哪个shard下)) ->CreateIterator -> createVarRefIterator ((s *Store) TagValues() -> ForEachMeasurementSeriesByExpr)
//上面介绍过查询先通过MetaClient的方法获取shardid范围,就能获取index索引得到seriskey集合
//获取e.index.TagSets筛选series集合
tagSets, err := e.index.TagSets([]byte(measurement), opt)
createTagSetIterators -> createTagSetGroupIterators -> createVarRefSeriesIterator -> buildCursor -> buildFloatCursor ->
最终调用KeyCursor ->
//根据serieskey后定位相应data数据位置
(f *FileStore) locations(key string, t int64, ascending bool)