这里主要分析下通过temporal的sdk创建一个workflow的流程
客户端代码
源码入口:temporal-go/internal/internal_workflow_client.go
func (w *workflowClientInterceptor) ExecuteWorkflow(
ctx context.Context,
in *ClientExecuteWorkflowInput,
) (WorkflowRun, error) {
// This is always set before interceptor is invoked
workflowID := in.Options.ID
if workflowID == "" {
return nil, fmt.Errorf("no workflow ID in options")
}
executionTimeout := in.Options.WorkflowExecutionTimeout
runTimeout := in.Options.WorkflowRunTimeout
workflowTaskTimeout := in.Options.WorkflowTaskTimeout
dataConverter := WithContext(ctx, w.client.dataConverter)
if dataConverter == nil {
dataConverter = converter.GetDefaultDataConverter()
}
// Encode input
input, err := encodeArgs(dataConverter, in.Args)
if err != nil {
return nil, err
}
memo, err := getWorkflowMemo(in.Options.Memo, dataConverter)
if err != nil {
return nil, err
}
searchAttr, err := serializeSearchAttributes(in.Options.SearchAttributes)
if err != nil {
return nil, err
}
// get workflow headers from the context
header, err := headerPropagated(ctx, w.client.contextPropagators)
if err != nil {
return nil, err
}
// run propagators to extract information about tracing and other stuff, store in headers field
startRequest := &workflowservice.StartWorkflowExecutionRequest{
Namespace: w.client.namespace,
RequestId: uuid.New(),
WorkflowId: workflowID,
WorkflowType: &commonpb.WorkflowType{Name: in.WorkflowType},
TaskQueue: &taskqueuepb.TaskQueue{Name: in.Options.TaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL},
Input: input,
WorkflowExecutionTimeout: &executionTimeout,
WorkflowRunTimeout: &runTimeout,
WorkflowTaskTimeout: &workflowTaskTimeout,
Identity: w.client.identity,
WorkflowIdReusePolicy: in.Options.WorkflowIDReusePolicy,
RetryPolicy: convertToPBRetryPolicy(in.Options.RetryPolicy),
CronSchedule: in.Options.CronSchedule,
Memo: memo,
SearchAttributes: searchAttr,
Header: header,
}
var response *workflowservice.StartWorkflowExecutionResponse
grpcCtx, cancel := newGRPCContext(ctx, grpcMetricsHandler(
w.client.metricsHandler.WithTags(metrics.RPCTags(in.WorkflowType, metrics.NoneTagValue, in.Options.TaskQueue))),
defaultGrpcRetryParameters(ctx))
defer cancel()
response, err = w.client.workflowService.StartWorkflowExecution(grpcCtx, startRequest)
// Allow already-started error
var runID string
if e, ok := err.(*serviceerror.WorkflowExecutionAlreadyStarted); ok && !in.Options.WorkflowExecutionErrorWhenAlreadyStarted {
runID = e.RunId
} else if err != nil {
return nil, err
} else {
runID = response.RunId
}
iterFn := func(fnCtx context.Context, fnRunID string) HistoryEventIterator {
metricsHandler := w.client.metricsHandler.WithTags(metrics.RPCTags(in.WorkflowType,
metrics.NoneTagValue, in.Options.TaskQueue))
return w.client.getWorkflowHistory(fnCtx, workflowID, fnRunID, true,
enumspb.HISTORY_EVENT_FILTER_TYPE_CLOSE_EVENT, metricsHandler)
}
curRunIDCell := util.PopulatedOnceCell(runID)
return &workflowRunImpl{
workflowType: in.WorkflowType,
workflowID: workflowID,
firstRunID: runID,
currentRunID: &curRunIDCell,
iterFn: iterFn,
dataConverter: w.client.dataConverter,
failureConverter: w.client.failureConverter,
registry: w.client.registry,
}, nil
}
上面代码大致如下:
1. 通过数据转换器编码workflow方法参数
2. 向temporal服务器发送开始执行workflow的rpc请求
3. 返回workflowRun对象,包含一些运行时信息供调用方使用
服务端代码
源码入口:temporal/service/frontend/workflow_handler.go
func (wh *WorkflowHandler) StartWorkflowExecution(ctx context.Context, request *workflowservice.StartWorkflowExecutionRequest) (_ *workflowservice.StartWorkflowExecutionResponse, retError error) {
defer log.CapturePanic(wh.logger, &retError)
if request == nil {
return nil, errRequestNotSet
}
if err := wh.validateWorkflowID(request.GetWorkflowId()); err != nil {
return nil, err
}
namespaceName := namespace.Name(request.GetNamespace())
if err := wh.validateRetryPolicy(namespaceName, request.RetryPolicy); err != nil {
return nil, err
}
if err := wh.validateWorkflowStartDelay(request.GetCronSchedule(), request.GetWorkflowStartDelay()); err != nil {
return nil, err
}
if err := backoff.ValidateSchedule(request.GetCronSchedule()); err != nil {
return nil, err
}
wh.logger.Debug("Received StartWorkflowExecution.", tag.WorkflowID(request.GetWorkflowId()))
if request.WorkflowType == nil || request.WorkflowType.GetName() == "" {
return nil, errWorkflowTypeNotSet
}
if len(request.WorkflowType.GetName()) > wh.config.MaxIDLengthLimit() {
return nil, errWorkflowTypeTooLong
}
if err := wh.validateTaskQueue(request.TaskQueue, namespaceName); err != nil {
return nil, err
}
if err := wh.validateStartWorkflowTimeouts(request); err != nil {
return nil, err
}
if request.GetRequestId() == "" {
// For easy direct API use, we default the request ID here but expect all
// SDKs and other auto-retrying clients to set it
request.RequestId = uuid.New()
}
if len(request.GetRequestId()) > wh.config.MaxIDLengthLimit() {
return nil, errRequestIDTooLong
}
request, err := wh.unaliasStartWorkflowExecutionRequestSearchAttributes(request, namespaceName)
if err != nil {
return nil, err
}
if err = wh.validateSearchAttributes(request.GetSearchAttributes(), namespaceName); err != nil {
return nil, err
}
enums.SetDefaultWorkflowIdReusePolicy(&request.WorkflowIdReusePolicy)
wh.logger.Debug("Start workflow execution request namespace.", tag.WorkflowNamespace(namespaceName.String()))
namespaceID, err := wh.namespaceRegistry.GetNamespaceID(namespaceName)
if err != nil {
return nil, err
}
wh.logger.Debug("Start workflow execution request namespaceID.", tag.WorkflowNamespaceID(namespaceID.String()))
resp, err := wh.historyClient.StartWorkflowExecution(ctx, common.CreateHistoryStartWorkflowRequest(namespaceID.String(), request, nil, time.Now().UTC()))
if err != nil {
return nil, err
}
return &workflowservice.StartWorkflowExecutionResponse{RunId: resp.GetRunId(), EagerWorkflowTask: resp.GetEagerWorkflowTask()}, nil
}
上面代码逻辑大致如下:
1. 对客户端参数的一些必要性校验
2. 向temporal的history服务发起请求
下面来看下对history服务的处理
func (c *clientImpl) StartWorkflowExecution(
ctx context.Context,
request *historyservice.StartWorkflowExecutionRequest,
opts ...grpc.CallOption,
) (*historyservice.StartWorkflowExecutionResponse, error) {
shardID := c.shardIDFromWorkflowID(request.NamespaceId, request.GetStartRequest().GetWorkflowId())
var response *historyservice.StartWorkflowExecutionResponse
op := func(ctx context.Context, client historyservice.HistoryServiceClient) error {
var err error
ctx, cancel := c.createContext(ctx)
defer cancel()
response, err = client.StartWorkflowExecution(ctx, request, opts...)
return err
}
if err := c.executeWithRedirect(ctx, shardID, op); err != nil {
return nil, err
}
return response, nil
}
func (r *basicRedirector) execute(ctx context.Context, shardID int32, op clientOperation) error {
if err := checkShardID(shardID); err != nil {
return err
}
address, err := shardLookup(r.historyServiceResolver, shardID)
if err != nil {
return err
}
return r.redirectLoop(ctx, address, op)
}
func (r *basicRedirector) redirectLoop(ctx context.Context, address rpcAddress, op clientOperation) error {
for {
if err := common.IsValidContext(ctx); err != nil {
return err
}
clientConn := r.connections.getOrCreateClientConn(address)
err := op(ctx, clientConn.historyClient)
var solErr *serviceerrors.ShardOwnershipLost
if !errors.As(err, &solErr) || len(solErr.OwnerHost) == 0 {
return err
}
// TODO: consider emitting a metric for number of redirects
address = rpcAddress(solErr.OwnerHost)
}
}
上面代码逻辑大致如下:
1. 根据workflowId和runId得到history服务器的分片id
2. 通过分片id找到具体的history服务器地址
3. 向具体的hisotry服务发起执行workflow的rpc请求
History服务代码
源码入口:temporal/service/history/handler.go
func (h *Handler) StartWorkflowExecution(ctx context.Context, request *historyservice.StartWorkflowExecutionRequest) (_ *historyservice.StartWorkflowExecutionResponse, retError error) {
defer metrics.CapturePanic(h.logger, h.metricsHandler, &retError)
h.startWG.Wait()
namespaceID := namespace.ID(request.GetNamespaceId())
if namespaceID == "" {
return nil, h.convertError(errNamespaceNotSet)
}
startRequest := request.StartRequest
workflowID := startRequest.GetWorkflowId()
shardContext, err := h.controller.GetShardByNamespaceWorkflow(namespaceID, workflowID)
if err != nil {
return nil, h.convertError(err)
}
engine, err := shardContext.GetEngine(ctx)
if err != nil {
return nil, h.convertError(err)
}
response, err := engine.StartWorkflowExecution(ctx, request)
if err != nil {
return nil, h.convertError(err)
}
if response.Clock == nil {
response.Clock, err = shardContext.NewVectorClock()
if err != nil {
return nil, h.convertError(err)
}
}
return response, nil
}
这里只贴了部分代码,一系列的调用方法就不贴在这里了,history服务的大致逻辑就是:
对请求进行一些相关的处理,然后将workflow启动事件写入数据库,最后发送一个workflow任务的广播