作者: yangzhj



背景

https://docs.pingcap.com/zh/tidb/v6.5/sync-diff-inspector-overview

上述链接是TiDB官方校验工具sync_diff_inspector 的解释,其中功能的第一条就是:对比表结构和数据。

本人最近经历的一个迁移项目在使用该工具过程中,对迁移上下游 库表进行校验时,发现某张表上游无主键,下游有主键,其他结构信息相同时,sync_diff_inspector 工具显示校验无差异。基于上述情况,本人准备翻阅工具的代码,对结构校验的功能进行探索。



探索过程

01获取代码

首先该项目采用的是sync_diff_inspector 6.5.7 版本,因此需要从 github 获取对应版本的源码包:https://github.com/pingcap/tidb-tools/releases/tag/v6.5.7 。然后使用 go ide 工具打开分析。

02代码分析

对于 go 代码的分析,一般从 main 函数作为程序运行入口。我们 从main 函数代码开始,逐步分析结构校验的代码逻辑:

第一步:main.go 文件 ,程序主入口

// Copyright 2021 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
//略
)

func main() {
	//初始化和配置文件检查加载相关代码,略
	if !checkSyncState(ctx, cfg) { //执行sync_diff 校验函数
		log.Warn("check failed!!!")
		os.Exit(1)
	}
	log.Info("check pass!!!")
}

func checkSyncState(ctx context.Context, cfg *config.Config) bool {//sync_diff 校验函数定义
	beginTime := time.Now()
	defer func() {
		log.Info("check data finished", zap.Duration("cost", time.Since(beginTime)))
	}()

	d, err := NewDiff(ctx, cfg) //创建一个校验示例
	if err != nil {
		fmt.Printf("There is something error when initialize diff, please check log info in %s\n", filepath.Join(cfg.Task.OutputDir, config.LogFileName))
		log.Fatal("failed to initialize diff process", zap.Error(err))
		return false
	}
	defer d.Close()

	if !cfg.CheckDataOnly { //判断是否只校验数据,如果只校验数据为否,则会执行下面 检查结构 的代码。
		err = d.StructEqual(ctx)//执行结构校验函数
		if err != nil {
			fmt.Printf("There is something error when compare structure of table, please check log info in %s\n", filepath.Join(cfg.Task.OutputDir, config.LogFileName))
			log.Fatal("failed to check structure difference", zap.Error(err))
			return false
		}
	} else {
		log.Info("Check table data only, skip struct check")
	}
	if !cfg.CheckStructOnly {
		err = d.Equal(ctx)
		if err != nil {
			fmt.Printf("There is something error when compare data of table, please check log info in %s\n", filepath.Join(cfg.Task.OutputDir, config.LogFileName))
			log.Fatal("failed to check data difference", zap.Error(err))
			return false
		}
	} else {
		log.Info("Check table struct only, skip data check")
	}
	return d.PrintSummary(ctx)
}
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20.
  • 21.
  • 22.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
  • 40.
  • 41.
  • 42.
  • 43.
  • 44.
  • 45.
  • 46.
  • 47.
  • 48.
  • 49.
  • 50.
  • 51.
  • 52.
  • 53.
  • 54.
  • 55.
  • 56.
  • 57.
  • 58.
  • 59.
  • 60.
  • 61.
  • 62.
  • 63.
  • 64.

第二步:diff.go 文件,校验准备工作,循环校验每张表,获取结构校验所需的表信息。

func (df *Diff) StructEqual(ctx context.Context) error {//结构校验函数定义
	tables := df.downstream.GetTables()
	tableIndex := 0
	if df.startRange != nil {
		tableIndex = df.startRange.ChunkRange.Index.TableIndex
	}
	for ; tableIndex < len(tables); tableIndex++ {
		isEqual, isSkip, isAllTableExist := false, true, tables[tableIndex].TableLack
		if common.AllTableExist(isAllTableExist) {
			var err error
			isEqual, isSkip, err = df.compareStruct(ctx, tableIndex) //循环调用比较结构的函数
			if err != nil {
				return errors.Trace(err)
			}
		}
		progress.RegisterTable(dbutil.TableName(tables[tableIndex].Schema, tables[tableIndex].Table), !isEqual, isSkip, isAllTableExist)
		df.report.SetTableStructCheckResult(tables[tableIndex].Schema, tables[tableIndex].Table, isEqual, isSkip, isAllTableExist)
	}
	return nil
}

func (df *Diff) compareStruct(ctx context.Context, tableIndex int) (isEqual bool, isSkip bool, err error) {//比较结构函数的定义
	sourceTableInfos, err := df.upstream.GetSourceStructInfo(ctx, tableIndex)
	if err != nil {
		return false, true, errors.Trace(err)
	}
	table := df.downstream.GetTables()[tableIndex]
	isEqual, isSkip = utils.CompareStruct(sourceTableInfos, table.Info) //调用 utils包的结构校验函数,参数是上下游的表信息
	table.IgnoreDataCheck = isSkip
	return isEqual, isSkip, nil
}
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20.
  • 21.
  • 22.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.

第三步骤:util.go 文件 做结构校验逻辑的函数。

// CompareStruct compare tables' columns and indices from upstream and downstream.
// There are 2 return values:
//
//	isEqual	: result of comparing tables' columns and indices
//	isPanic	: the differences of tables' struct can not be ignored. Need to skip data comparing.
func CompareStruct(upstreamTableInfos []*model.TableInfo, downstreamTableInfo *model.TableInfo) (isEqual bool, isPanic bool) {
	// compare columns
	for _, upstreamTableInfo := range upstreamTableInfos {
		if len(upstreamTableInfo.Columns) != len(downstreamTableInfo.Columns) {
			// the numbers of each columns are different, don't compare data
			log.Error("column num not equal",//校验列数量不一致
				zap.String("upstream table", upstreamTableInfo.Name.O),
				zap.Int("column num", len(upstreamTableInfo.Columns)),
				zap.String("downstream table", downstreamTableInfo.Name.O),
				zap.Int("column num", len(downstreamTableInfo.Columns)),
			)
			return false, true
		}

		for i, column := range upstreamTableInfo.Columns {
			if column.Name.O != downstreamTableInfo.Columns[i].Name.O {
				// names are different, panic!
				log.Error("column name not equal",//校验列名称不一致
					zap.String("upstream table", upstreamTableInfo.Name.O),
					zap.String("column name", column.Name.O),
					zap.String("downstream table", downstreamTableInfo.Name.O),
					zap.String("column name", downstreamTableInfo.Columns[i].Name.O),
				)
				return false, true
			}

			if !isCompatible(column.GetType(), downstreamTableInfo.Columns[i].GetType()) {
				// column types are different, panic!
				log.Error("column type not compatible",//校验列类型不一致
					zap.String("upstream table", upstreamTableInfo.Name.O),
					zap.String("column name", column.Name.O),
					zap.Uint8("column type", column.GetType()),
					zap.String("downstream table", downstreamTableInfo.Name.O),
					zap.String("column name", downstreamTableInfo.Columns[i].Name.O),
					zap.Uint8("column type", downstreamTableInfo.Columns[i].GetType()),
				)
				return false, true
			}

			if !sameProperties(column, downstreamTableInfo.Columns[i]) { //调用列属性校验函数
				// column properties are different, panic!
				log.Error("column properties not compatible",//校验列属性不一致
					zap.String("upstream table", upstreamTableInfo.Name.O),
					zap.String("column name", column.Name.O),
					zap.Uint8("column type", column.GetType()),
					zap.String("downstream table", downstreamTableInfo.Name.O),
					zap.String("column name", downstreamTableInfo.Columns[i].Name.O),
					zap.Uint8("column type", downstreamTableInfo.Columns[i].GetType()),
				)
				return false, true
			}
		}
	}

	// compare indices 校验索引的逻辑
	deleteIndicesSet := make(map[string]struct{})
	unilateralIndicesSet := make(map[string]struct{})
	downstreamIndicesMap := make(map[string]*struct {
		index *model.IndexInfo
		cnt   int
	})
	for _, index := range downstreamTableInfo.Indices {
		downstreamIndicesMap[index.Name.O] = &struct {
			index *model.IndexInfo
			cnt   int
		}{index, 0}
	}
	for _, upstreamTableInfo := range upstreamTableInfos {

	NextIndex:
		for _, upstreamIndex := range upstreamTableInfo.Indices {
			if _, ok := deleteIndicesSet[upstreamIndex.Name.O]; ok {
				continue NextIndex
			}

			indexU, ok := downstreamIndicesMap[upstreamIndex.Name.O]
			if ok {
				if len(indexU.index.Columns) != len(upstreamIndex.Columns) {
					// different index, should be removed
					deleteIndicesSet[upstreamIndex.Name.O] = struct{}{}
					continue NextIndex
				}

				for i, indexColumn := range upstreamIndex.Columns {
					if indexColumn.Offset != indexU.index.Columns[i].Offset || indexColumn.Name.O != indexU.index.Columns[i].Name.O {
						// different index, should be removed
						deleteIndicesSet[upstreamIndex.Name.O] = struct{}{}
						continue NextIndex
					}
				}
				indexU.cnt = indexU.cnt + 1
			} else {
				unilateralIndicesSet[upstreamIndex.Name.O] = struct{}{}
			}
		}
	}

	existBilateralIndex := false
	for _, indexU := range downstreamIndicesMap {
		if _, ok := deleteIndicesSet[indexU.index.Name.O]; ok {
			continue
		}
		if indexU.cnt < len(upstreamTableInfos) {
			// Some upstreamInfos don't have this index.
			unilateralIndicesSet[indexU.index.Name.O] = struct{}{}
		} else {
			// there is an index the whole tables have,
			// so unilateral indices can be deleted.
			existBilateralIndex = true
		}
	}

	// delete indices
	// If there exist bilateral index, unilateral indices can be deleted.
	if existBilateralIndex {
		for indexName := range unilateralIndicesSet {
			deleteIndicesSet[indexName] = struct{}{}
		}
	} else {
		log.Warn("no index exists in both upstream and downstream", zap.String("table", downstreamTableInfo.Name.O))
	}
	if len(deleteIndicesSet) > 0 {
		newDownstreamIndices := make([]*model.IndexInfo, 0, len(downstreamTableInfo.Indices))
		for _, index := range downstreamTableInfo.Indices {
			if _, ok := deleteIndicesSet[index.Name.O]; !ok {
				newDownstreamIndices = append(newDownstreamIndices, index)
			} else {
				log.Debug("delete downstream index", zap.String("name", downstreamTableInfo.Name.O), zap.String("index", index.Name.O))
			}
		}
		downstreamTableInfo.Indices = newDownstreamIndices

		for _, upstreamTableInfo := range upstreamTableInfos {
			newUpstreamIndices := make([]*model.IndexInfo, 0, len(upstreamTableInfo.Indices))
			for _, index := range upstreamTableInfo.Indices {
				if _, ok := deleteIndicesSet[index.Name.O]; !ok {
					newUpstreamIndices = append(newUpstreamIndices, index)
				} else {
					log.Debug("delete upstream index", zap.String("name", upstreamTableInfo.Name.O), zap.String("index", index.Name.O))
				}
			}
			upstreamTableInfo.Indices = newUpstreamIndices
		}

	}

	return len(deleteIndicesSet) == 0, false //当两边索引不一致时,返回否
}

func sameProperties(c1, c2 *model.ColumnInfo) bool {//列属性校验函数定义,
	switch c1.GetType() {
	case mysql.TypeVarString, mysql.TypeString, mysql.TypeVarchar:
		if c1.FieldType.GetCharset() != c2.FieldType.GetCharset() {
			log.Warn("Ignoring character set differences",
				zap.String("column name", c1.Name.O),
				zap.String("charset source", c1.FieldType.GetCharset()),
				zap.String("charset target", c2.FieldType.GetCharset()),
			)
		}
		if c1.FieldType.GetCollate() != c2.FieldType.GetCollate() {
			log.Warn("Ignoring collation differences",
				zap.String("column name", c1.Name.O),
				zap.String("collation source", c1.FieldType.GetCollate()),
				zap.String("collation target", c2.FieldType.GetCollate()),
			)
		}
		return c1.FieldType.GetFlen() == c2.FieldType.GetFlen() //判断上下游列的长度定义是否相同
	default:
		return true
	}
}
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20.
  • 21.
  • 22.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
  • 40.
  • 41.
  • 42.
  • 43.
  • 44.
  • 45.
  • 46.
  • 47.
  • 48.
  • 49.
  • 50.
  • 51.
  • 52.
  • 53.
  • 54.
  • 55.
  • 56.
  • 57.
  • 58.
  • 59.
  • 60.
  • 61.
  • 62.
  • 63.
  • 64.
  • 65.
  • 66.
  • 67.
  • 68.
  • 69.
  • 70.
  • 71.
  • 72.
  • 73.
  • 74.
  • 75.
  • 76.
  • 77.
  • 78.
  • 79.
  • 80.
  • 81.
  • 82.
  • 83.
  • 84.
  • 85.
  • 86.
  • 87.
  • 88.
  • 89.
  • 90.
  • 91.
  • 92.
  • 93.
  • 94.
  • 95.
  • 96.
  • 97.
  • 98.
  • 99.
  • 100.
  • 101.
  • 102.
  • 103.
  • 104.
  • 105.
  • 106.
  • 107.
  • 108.
  • 109.
  • 110.
  • 111.
  • 112.
  • 113.
  • 114.
  • 115.
  • 116.
  • 117.
  • 118.
  • 119.
  • 120.
  • 121.
  • 122.
  • 123.
  • 124.
  • 125.
  • 126.
  • 127.
  • 128.
  • 129.
  • 130.
  • 131.
  • 132.
  • 133.
  • 134.
  • 135.
  • 136.
  • 137.
  • 138.
  • 139.
  • 140.
  • 141.
  • 142.
  • 143.
  • 144.
  • 145.
  • 146.
  • 147.
  • 148.
  • 149.
  • 150.
  • 151.
  • 152.
  • 153.
  • 154.
  • 155.
  • 156.
  • 157.
  • 158.
  • 159.
  • 160.
  • 161.
  • 162.
  • 163.
  • 164.
  • 165.
  • 166.
  • 167.
  • 168.
  • 169.
  • 170.
  • 171.
  • 172.
  • 173.
  • 174.
  • 175.
  • 176.

03功能总结

根据对上述结构校验代码的分析,我们可以总结出v6.5.7 sync_diff_inspector 工具可以校验 的表结构项目包括如下几项:

  1. 列的数量
  2. 列的名称
  3. 列的类型
  4. 列的长度定义
  5. 索引的差异