Golang实战：利用Atomic和轮询机制实现任务排队和并发流量控制

最新推荐文章于 2025-05-15 20:12:38 发布

liuwill

最新推荐文章于 2025-05-15 20:12:38 发布

阅读量1.5k

点赞数 35

分类专栏：开发技术文章标签： golang 后端架构

本文链接：https://blog.csdn.net/liuwill/article/details/140983299

版权

开发技术专栏收录该内容

12 篇文章

订阅专栏

在一次开发大模型应用的工程化过程中，我们碰到一个问题，开源的模型核心代码是用Python写的，有自己的一套并发管理和排队机制，而模型一次只能处理一个生成任务，生成的时间也很长，在A10上，需要几秒钟到几十秒处理一个请求，就会导致在Python的锁上排队的其他请求被不断的阻塞。

因为团队的主要开发语言是Golang，我们使用Golang开发了一个调度程序，大模型生成任务的请求先先提交到Golang服务，经过排队和流量控制，再转发到运行在本机的Python的程序处理。

专门为每个部署的Python应用实例，配对启动一个Golang调度程序，相当于在不修改Python源代码的情况下，配置了一套Agent，可以实现对Python程序的扩展，独立的实现很多功能，例如运行统计、数据上报、状态检查、还有请求和响应的转发处理，可以为架构带来更大的灵活性，还可以配合服务端的调度程序和配置，控制客户端的行为。

以上是为什么采用这种方案的架构思考，下面的文章，重点介绍在客户端Agent的实现过程中，通过Atomic采用等机制，实现了任务排队和无锁化可控的流量控制机制。

实现思路

使用atomic包：通过atomic包实现对共享变量的原子操作，避免数据竞争。
轮询机制：通过轮询方式检查并处理队列中的请求，确保每次只处理一个任务。
超时机制：在轮询过程中计时，如果一个任务等待时间过长，直接返回错误，避免任务堆积。
队列管理：维护一个队列，记录当前排队的任务，如果队列满了，拒绝新请求

示例代码


package main

import (
  "context"
  "fmt"
  "net/http"
  "sync"
  "sync/atomic"
  "time"

  "github.com/google/uuid"
)

const (
  DispatchCode_Success = iota + 1
  DispatchCode_TooManyRequest
  DispatchCode_WaitTooLong
  DispatchCode_Restarting
)

var (
  dispatchList []string

  lock    sync.RWMutex
  counter int32

  lastRunTime int64
  runStatus   int32
)

type dispatchFunc func(ctx context.Context, code int)

func Recover(cleanups ...func()) {
  for _, cleanup := range cleanups {
    cleanup()
  }

  if p := recover(); p != nil {
    // log.Println(p)
  }
}

func runFunc(ctx context.Context, fc dispatchFunc, code int) {
  defer Recover()

  fc(ctx, code)
}

func addPendingTask(ctx context.Context, dispatchId string) {

  lock.Lock()
  defer lock.Unlock()
  dispatchList = append(
    dispatchList, dispatchId)
}

func checkPendingTask(ctx context.Context, dispatchId string) bool {
  lock.RLock()
  defer lock.RUnlock()
  if len(dispatchList) <= 0 {
    return true
  }

  if dispatchId == dispatchList[0] {
    return true
  }
  return false
}

func removePendingTask(ctx context.Context, dispatchId string) {
  lock.Lock()
  defer lock.Unlock()
  if len(dispatchList) <= 0 {
    return
  }

  for i, id := range dispatchList {
    if id == dispatchId {
      dispatchList = append(dispatchList[:i], dispatchList[i+1:]...)
      return
    }
  }
}

func dispatch(ctx context.Context, fc dispatchFunc) {
  if atomic.LoadInt32(&counter) > 3 {
    runFunc(ctx, fc, DispatchCode_TooManyRequest)
    return
  }

  requestStartAt := time.Now()
  dispatchId := uuid.New().String()

  addPendingTask(ctx, dispatchId)
  atomic.AddInt32(&counter, 1)
  defer atomic.AddInt32(&counter, -1)

  i := 0
  for {
    if time.Now().Sub(requestStartAt) > time.Minute*5 {
      removePendingTask(ctx, dispatchId)
      runFunc(ctx, fc, DispatchCode_WaitTooLong)
      return
    }

    if lastRunAt := atomic.LoadInt64(&lastRunTime); lastRunAt > 0 && time.Now().Sub(time.UnixMilli(lastRunAt)) > time.Minute*5 {
      removePendingTask(ctx, dispatchId)
      runFunc(ctx, fc, DispatchCode_WaitTooLong)
      return
    }

    if checkPendingTask(ctx, dispatchId) && atomic.LoadInt32(&runStatus) == 0 {
      break
    }

    time.Sleep(time.Millisecond * 100 * time.Duration(1+i%3))
    i++
  }

  removePendingTask(ctx, dispatchId)

  defer atomic.StoreInt32(&runStatus, 0)
  atomic.StoreInt32(&runStatus, 1)

  defer atomic.StoreInt64(&lastRunTime, 0)
  atomic.StoreInt64(&lastRunTime, time.Now().UnixMilli())

  runFunc(ctx, fc, DispatchCode_Success)
}

func requestHandler(w http.ResponseWriter, r *http.Request) {
  defer Recover()
  ctx := r.Context()

  // requestAt := time.Now()
  dispatch(ctx, func(ctx context.Context, code int) {
    // startAt := time.Now()
    if code == DispatchCode_TooManyRequest {
      w.WriteHeader(http.StatusTooManyRequests)
      _, _ = w.Write([]byte(`{"error": "too_many_requests", "code": 429}`))
      return
    } else if code == DispatchCode_WaitTooLong {
      w.WriteHeader(http.StatusRequestTimeout)
      _, _ = w.Write([]byte(`{"error": "request_timeout", "code": 408}`))
      return
    }

    result, err := processRequest(ctx)

    // endAt := time.Now()
    if err != nil {
      w.WriteHeader(http.StatusBadRequest)
      return
    }

    w.Header().Set("Content-Type", "application/json")
    _, _ = w.Write(result)
  })
}

func processRequest(ctx context.Context) ([]byte, error) {
  // 模拟处理请求的耗时操作
  fmt.Println("Processing request")
  time.Sleep(5 * time.Second)
  fmt.Println("Request processed")
  return nil, nil
}

func main() {
  http.HandleFunc("/", requestHandler)
  fmt.Println("Server started at :8080")
  if err := http.ListenAndServe(":8080", nil); err != nil {
    fmt.Println("Server failed:", err)
  }
}

回顾和总结

我们所做的工作基于这样一个事实：processRequest的执行，同时只能由一个请求调用，否则所有请求都会被阻塞挂起，然后按照FIFO顺序，逐个处理完，如果中间需要释放被挂起的请求，唯一的方法是重启进程。

最初的实现版本中，Agent的职能是数据采集、子进程管理和状态检查，辅助服务端的调度服务器，动态的选择执行大模型生成任务的节点。因此，我们采用了简单加锁的方法，在调用processRequest之前，要先加锁，然后执行完之后释放锁。这种方法在一段时间内运行得很平稳。

然而，直到一次突发的请求高峰打破了这种平衡，所有可以调度的GPU服务器都堆积了大量的请求，因为锁的存在，堆积的请求都被关在一个无法编程访问到的队列中，程序层面什么也做不了，只能等待已经被阻塞的任务，全部按顺序执行完。就算新增加了机器，或者出现某个特别长的任务阻塞的情况，也无法针对后续的任务优化响应时间。

为了解决这个问题，我们认识到问题在于锁是阻塞的，被阻塞的线程直到锁释放之前，是无法进行任何操作的；优化的思想，源于操作系统的实现中造就存在的思想。

就是引入轮询和等待机制，将“死锁”变成“活锁”，如果请求来到的时候，队列中已经有太多的请求，那么可以直接拒绝再处理更多请求。

程序的核心是轮询和等待，也就是for循环：