合并mysql 请求
这个需求是在多表合并当成单表的时候存在非常多的请求。在合并前是各个表的压力。合并后就变成了单个表的压力。相当于是qps 的累加。如果在应用测缓存了请求,进行合并,那么tiDB 端的压力就会小特别多。
设计
mergeProcess
- 为了合并请求就需要缓存结果。缓存就很熟悉了。
"github.com/patrickmn/go-cache"
- 合并请求那肯定是要有方法去做。
type MergeHandler func(oldData, newData interface{}) interface{}
- 合并完请求最后处理的时候也是一个方法.
type ProcessHandler func(context.Context, interface{}) error
-
合并完做处理的时候,你总不能停止服务吧?那么我们需要两个缓存,在一个缓存处理的时候,另一个缓存写。总是不同的切换。
-
切换的时间点,一般都是两个维度,一个是时间上限,一个是数据量上限。
综合以上的考虑,最终的结构体如下:
type MergeProcess struct {
waitMergeMaxTime int64 // 合并等待最大时长
dataMaxSize int64
firstDataCache, secondDataCache *DataCache
//handler ProcessHandler
mergeHandler MergeHandler
currentCache **DataCache
isRotating int32
sync.Mutex
}
这里把 ProcessHandler 去掉是因为做了第二次优化,ProcessHandler 不再绑定在指定的worker 上。这里再次抽象出一组worker 来,在submit 工作后不停的是process。
mergePrcoess 主要的工作是
- 提供dataCache的封装,客户端无需关注当前是在哪一个cache 执行操作。插入数据功能是需要提供的。
- rotation 功能,切换 两个缓存,以及处理数据。
- 在定时检测时间数据量,到达阈值之后执行rotation 功能。
核心代码如下:
type MergeHandler func(oldData, newData interface{}) interface{}
type CtxWithInterface struct {
Ctx context.Context
V interface{}
}
func NewCtxWithInterface(ctx context.Context, v interface{}) *CtxWithInterface {
return &CtxWithInterface{
Ctx: ctx,
V: v,
}
}
type MergeProcess struct {
waitMergeMaxTime int64 // 合并等待最大时长
dataMaxSize int64
firstDataCache, secondDataCache *DataCache
//handler ProcessHandler
mergeHandler MergeHandler
currentCache **DataCache
isRotating int32
sync.Mutex
}
func (mp *MergeProcess) String() string {
return fmt.Sprintf("WaitMergeMaxTime=%d DataMaxSize=%d FirstDataMap=%s FirstDataMap=%s isRotating=%d", mp.waitMergeMaxTime,
mp.dataMaxSize, mp.firstDataCache, mp.secondDataCache, mp.isRotating)
}
func (mp *MergeProcess) getCurrentDataCache() *DataCache {
return *mp.currentCache
}
func (mp *MergeProcess) Insert(ctx context.Context, key string, newData interface{}) error {
return mp.getCurrentDataCache().Insert(key, NewCtxWithInterface(ctx, newData), mp.mergeHandler)
}
func (mp *MergeProcess) rotation() {
mp.Lock()
defer mp.Unlock()
startTime := utils.GetTimeStamp()
defer func() {
atomic.CompareAndSwapInt32(&mp.isRotating, 1, 0)
}()
isRotating := atomic.LoadInt32(&mp.isRotating)
if isRotating == 1 {
utils.DebugPrintf("still rotation return")
return
}
// start rotation
atomic.StoreInt32(&mp.isRotating, 1)
currentCache := mp.getCurrentDataCache()
mp.swapCurrentCache()
utils.DebugPrintf("oldcurrentCache=%+v currentCache=%+v same=%t", currentCache, mp.getCurrentDataCache(), currentCache == mp.getCurrentDataCache())
if err := mp.getCurrentDataCache().HandleCacheV2(); err != nil {
logger.LogErrorf("HandleCacheV2 err=%s", err)
}
utils.DebugPrintf("rotation finish !!!!!!!!!!!!!! useTime=%d mp=%s", utils.GetTimeStamp()-startTime, mp)
}
func (mp *MergeProcess) swapCurrentCache() {
currentCache := mp.getCurrentDataCache()
if currentCache == mp.firstDataCache {
mp.currentCache = &mp.secondDataCache
} else {
mp.currentCache = &mp.firstDataCache
}
}
func (mp *MergeProcess) isNeedRotation() bool {
currentSet := mp.getCurrentDataCache()
if currentSet.count >= mp.dataMaxSize {
return true
}
return false
}
func (mp *MergeProcess) Run() {
// check is time merge or size match limit size
maxRunTicker := time.Tick(time.Duration(mp.waitMergeMaxTime) * time.Second)
checkTicker := time.Tick(time.Second)
defer func() {
if err := recover(); err != nil {
stack := utils.Stack(3)
logger.LogErrorf("[Recovery] %s %s panic recovered:\n%s\n%s", time.Now().Format("2006/01/02 - 15:04:05"), err, stack, false)
go mp.Run()
}
}()
for {
select {
case <-maxRunTicker:
mp.rotation()
case <-checkTicker:
if isNeed := mp.isNeedRotation(); isNeed {
utils.DebugPrint("isNeedRotation true start rotation")
mp.rotation()
}
}
}
}
func NewMergeProcess(waitMergeTime, dataMaxSize int64, mergeHandler MergeHandler, worker *Worker) *MergeProcess {
firstCache := NewDataCache("firstDataCache", worker)
secondCache := NewDataCache("secondDataCache", worker)
//secondCache := NewDataSet("secondDataCache", processHandler)
mp := &MergeProcess{
waitMergeMaxTime: waitMergeTime,
dataMaxSize: dataMaxSize,
firstDataCache: firstCache,
secondDataCache: secondCache,
isRotating: 0,
//handler: processHandler,
currentCache: &firstCache,
mergeHandler: mergeHandler,
}
go mp.Run()
return mp
}
dataCache
dataCache 功能主要是管理数据,执行合并的操作。以及在rotation大的时候暴露具体方法给上一层使用,处理请求。
- 插入,合并数据。
- 处理数据。
package merge_utils
import (
"errors"
"fmt"
"github.com/patrickmn/go-cache"
"sync/atomic"
)
type DataCache struct {
cache *cache.Cache
localLock *local_lock.LocalLock //
count int64
name string
worker *Worker
}
func (dc *DataCache) String() string {
return fmt.Sprintf("name=%s count=%d", dc.name, dc.count)
}
func (dc *DataCache) HandleCacheV2() error {
/*
should be careful about this func. should block util all job was finish
currently, use total count push and finish size to check.
*/
startTime := utils.GetTimeStamp()
itemCount := dc.cache.ItemCount()
if itemCount == 0 {
utils.DebugPrintf("HandleCacheV2 use_time=%d", utils.GetTimeStamp()-startTime)
return nil
}
allItem := dc.cache.Items()
for _, item := range allItem {
dc.worker.Submit(item.Object)
}
// wait finish
dc.worker.waitUntilFinish()
// clear all
dc.cache.Flush()
dc.worker.clear()
atomic.StoreInt64(&dc.count, 0)
utils.DebugPrintf("HandleCacheV2 use_time=%d", utils.GetTimeStamp()-startTime)
return nil
}
func (dc *DataCache) HandleCache(processHandler ProcessHandler) error {
// todo in this process can not insert data any more. can block? how? Concurrency run?
allItem := dc.cache.Items()
errMsg := ""
for key, item := range allItem {
tmp := item.Object.(CtxWithInterface)
err := processHandler(tmp.Ctx, tmp.V)
if err != nil {
errMsg += fmt.Sprintf("err=%s key=%s value=%s", err, key, item.Object)
}
dc.cache.Delete(key)
}
//utils.DebugPrintf("self=%s", dc)
atomic.StoreInt64(&dc.count, 0)
if errMsg == "" {
return nil
}
return errors.New(errMsg)
}
func (ds *DataCache) Insert(key string, newData interface{}, mergeHandler MergeHandler) error {
// lock this key
ds.localLock.Acquire(key)
defer ds.localLock.Release(key)
utils.DebugPrintf("setName=%s key=%s v=%+v", ds.name, key, newData)
oldData, ok := ds.cache.Get(key)
if ok {
oldTmp := oldData.(*CtxWithInterface)
newTmp := newData.(*CtxWithInterface)
mergeData := mergeHandler(oldTmp.V, newTmp.V)
ds.cache.Set(key, NewCtxWithInterface(newTmp.Ctx, mergeData), cache.NoExpiration)
utils.DebugPrintf("merge Insert self=%s", ds)
return nil
}
atomic.AddInt64(&ds.count, 1)
utils.DebugPrintf("Insert self=%s", ds)
ds.cache.Set(key, newData, cache.NoExpiration)
return nil
}
// use
func NewDataCache(name string, worker *Worker) *DataCache {
m1 := make(map[string]cache.Item)
firstMap := cache.NewFrom(cache.NoExpiration, cache.NoExpiration, m1)
ll := local_lock.NewDefaultLocalLock(name)
//worker := NewDefaultDataSetWorker(handler)
return &DataCache{
count: 0,
cache: firstMap,
localLock: ll,
name: name,
worker: worker,
}
}
worker
dataCache HandleCache 流程是真个dataCache 的核心。如果直接遍历,那时间肯定是过慢的。这个时候就需要有一个worker 的概念。一个worker 组并发的去消费缓存里面的数据。worker 的核心功能点也不多。包含以下几点:
- processHandler 处理函数。
- Submit,开放提交任务。
- Notify chan 用来通知这一组任务执行完毕。
- totalSize, finishSize 来表示任务总大小。以及已完成的数据量大小。 相等时便触发通知。
type Worker struct {
workerNumber int64 // go routine number will not quit
processDataChan chan *CtxWithInterface // data chan
handler ProcessHandler
chanSize int64
totalSize int64
finishSize int64
Notify chan struct{}
}
相关实现:
package merge_utils
import (
"context"
"fmt"
"sync/atomic"
"time"
)
//var debug = true
type ProcessHandler func(context.Context, interface{}) error
type Worker struct {
workerNumber int64 // go routine number will not quit
processDataChan chan *CtxWithInterface // data chan
handler ProcessHandler
chanSize int64
totalSize int64
finishSize int64
Notify chan struct{}
}
func NewDataSetWorker(workerNumber, chanSize int64, handler ProcessHandler) *Worker {
worker := &Worker{
workerNumber: workerNumber,
chanSize: chanSize,
handler: handler,
Notify: make(chan struct{}),
}
worker.finishSize = 0
worker.totalSize = 0
worker.processDataChan = make(chan *CtxWithInterface, chanSize)
// run here
worker.Run()
return worker
}
func NewDefaultDataSetWorker(handler ProcessHandler) *Worker {
return NewDataSetWorker(10, 100, handler)
}
func (w *Worker) String() string {
return fmt.Sprintf("workerNumber=%d chanSize=%d handler=%s totalSize=%d finishSize=%d", w.workerNumber, w.chanSize,
utils.GetFuncName(w.handler), w.totalSize, w.finishSize)
}
func (w *Worker) count() {
atomic.AddInt64(&w.finishSize, 1)
if atomic.LoadInt64(&w.finishSize) == w.totalSize {
w.notify()
}
}
func (w *Worker) notify() {
utils.DebugPrintf("r34r3 rotation")
w.Notify <- struct{}{}
}
func (w *Worker) waitUntilFinish() {
<-w.Notify
}
func (w *Worker) clear() {
atomic.StoreInt64(&w.totalSize, 0)
atomic.StoreInt64(&w.finishSize, 0)
}
func (w *Worker) run() {
// worker can not be done
defer func() {
if err := recover(); err != nil {
fmt.Printf("err=%+v", err)
stack := utils.Stack(3)
logger.LogErrorf("[Recovery] %s %s panic recovered:\n%s\n%s", time.Now().Format("2006/01/02 - 15:04:05"), err, stack, false)
w.count()
go w.run()
}
}()
for {
select {
case data := <-w.processDataChan:
// handler can not painc show handle
utils.DebugPrintf("processDataChan w=%s", w)
ctx, v := data.Ctx, data.V
if err := w.handler(ctx, v); err != nil {
logger.LogErrorf("err=%s data=%+v", err, data)
}
// after done count down
w.count()
utils.DebugPrintf("w=%s", w)
}
}
}
func (w *Worker) Submit(data interface{}) {
atomic.AddInt64(&w.totalSize, 1)
w.processDataChan <- data.(*CtxWithInterface)
}
func (w *Worker) Run() {
for i := int64(0); i < w.workerNumber; i++ {
go w.run()
}
}