GO-第三节:工厂模式+泛型
一、背景
- 基于Kubeflow, 创建内置的Job, 如PytorchJob、PaddleJob、XGBoostJob、TFJob等等
- 基于上述做工厂模式统一处理
- 通过接口传递枚举类型, 创建相对应的Job实例, 然后调用内部API函数, 以下实例做一个获取Job名称的的获取实例
- 要求go的版本1.18+
- 不对之处请指教, 老样子不废话直接开干
二、构建介绍
- 整体目录结构如下
❯ tree
.
├── bo
│ └── KubeJobBo.go
├── enum
│ └── framework.enum.go
├── go.mod
├── go.sum
├── kubeflow
│ ├── client
│ │ ├── kubeflow.job.client.service.go
│ │ ├── paddle.job.client.service.go
│ │ └── pytorch.job.client.service.go
│ └── factory
│ ├── kubeflow.factory.go
│ ├── paddle.factory.go
│ └── pytorch.factory.go
└── main.go
- 创建枚举
- 在enum包下创建枚举结构 framework.enum.go 文件, 内容如下
// FrameworkType 框架类型枚举类
// swagger:enum FrameworkType(PYTORCH PADDLE)
type FrameworkType string
const (
// PYTORCH 集群状态 - PYTORCH
// swagger:enum PYTORCH 框架类型 - PYTORCH
PYTORCH FrameworkType = "PYTORCH"
// PADDLE 框架类型 - PADDLE
// swagger:enum PADDLE 框架类型 - PADDLE
PADDLE FrameworkType = "PADDLE"
)
// Value
// Title: 获取枚举值
// Description: 获取枚举值
func (frame FrameworkType) Value() string {
return strings.ToLower(string(frame))
}
- 创建训练框架客户端接口
- 在 kubeflow.job.client.service.go 中创建KubeflowJobClient泛型客户端接口
- 然后分别创建PaddleJobClient、PyTorchJobClient客户端, 实现KubeflowJobClient接口方法
package client
import (
"context"
v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
kfClientSetV1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"xincan.com.cn/demo/bo"
)
// KubeflowJobClient
// Title KubeflowJobClient
// Description 训练框架客户端接口
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
type KubeflowJobClient[T any] interface {
// Get 获取训练框架任务
Get(ctx context.Context, name string, opts metav1.GetOptions) (*T, error)
}
// PaddleJobClient
// Title PaddleJobClient
// Description 训练框架客户端服务结构体, 实现KubeflowJobClient接口
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
type PaddleJobClient struct {
// Client PaddleJobInterface 训练框架客户端接口
Client kfClientSetV1.PaddleJobInterface
}
// Get
// Title Get
// Description 获取训练框架任务详情
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
// Param ctx context.Context 上下文
// Param name string 资源名称
// Param opts metav1.GetOptions 获取选项
// Return *v1.PaddleJo 训练框架任务结构体
// Return error 错误
func (job *PaddleJobClient) Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.PaddleJob, error) {
return job.Client.Get(ctx, name, opts)
}
// PyTorchJobClient
// Title PyTorchJobClient
// Description 训练框架客户端服务结构体, 实现KubeflowJobClient接口
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
type PyTorchJobClient struct {
// Client PyTorchJobInterface 训练框架客户端接口
Client kfClientSetV1.PyTorchJobInterface
}
// Get
// Title Get
// Description 获取训练框架任务详情
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
// Param ctx context.Context 上下文
// Param name string 资源名称
// Param opts metav1.GetOptions 获取选项
// Return *v1.PyTorchJob 训练框架任务结构体
// Return error 错误
func (job *PyTorchJobClient) Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.PyTorchJob, error) {
return job.Client.Get(ctx, name, opts)
}
- 创建训练框架工厂接口
- 在 kubeflow.factory.go 中创建KubeflowFactory客户端工厂接口
// KubeflowFactory
// Title KubeflowFactory
// Description 训练框架客户端工厂接口
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
type KubeflowFactory interface {
CreateJobClient(kubeflowClient *kfClientSetV1.KubeflowV1Client, namespace string) client.KubeflowJobClient[any]
}
- 创建PaddleJobClientFactory训练框架客户端工厂, 实现KubeflowFactory接口函数
package factory
import (
v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
kfClientSetV1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1"
"xincan.com.cn/demo/kubeflow/client"
)
// PaddleJobClientFactory
// Title PaddleJobClientFactory
// Description 训练框架客户端工厂结构体
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
type PaddleJobClientFactory struct {
*KubeflowJobFactory
}
// NewPaddleJobClientFactory
// Description 创建训练框架客户端工厂结构体
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
// Param client kubeflow客户端
// Param namespace 命名空间
// Return *PaddleJobClientFactory 创建训练框架客户端工厂结构体
func NewPaddleJobClientFactory(client *kfClientSetV1.KubeflowV1Client, namespace string) *PaddleJobClientFactory {
return &PaddleJobClientFactory{
KubeflowJobFactory: NewKubeflowJobFactory(client, namespace),
}
}
// CreateJobClient
// Title CreateJobClient
// Description 创建训练框架客户端
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
// Return client.KubeflowJobClient[v1.PaddleJob] 创建训练框架客户端
func (factory *PaddleJobClientFactory) CreateJobClient() client.KubeflowJobClient[v1.PaddleJob] {
return &client.PaddleJobClient{
Client: factory.kubeflowClient.PaddleJobs(factory.namespace),
}
}
- 创建PytorchJobClientFactory训练框架客户端工厂, 实现KubeflowFactory接口函数
package factory
import (
v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
kfClientSetV1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1"
"xincan.com.cn/demo/kubeflow/client"
)
// PytorchJobClientFactory
// Title PytorchJobClientFactory
// Description 训练框架客户端工厂结构体
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
type PytorchJobClientFactory struct {
*KubeflowJobFactory
}
// NewPytorchJobClientFactory
// Title NewPytorchJobClientFactory
// Description 创建训练框架客户端工厂结构体
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
// Param client kubeflow客户端
// Param namespace 命名空间
// Return *PaddleJobClientFactory 创建训练框架客户端工厂结构体
func NewPytorchJobClientFactory(client *kfClientSetV1.KubeflowV1Client, namespace string) *PytorchJobClientFactory {
return &PytorchJobClientFactory{
KubeflowJobFactory: NewKubeflowJobFactory(client, namespace),
}
}
// CreateJobClient
// Title CreateJobClient
// Description 创建训练框架客户端
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
// Return client.KubeflowJobClient[v1.PyTorchJob] 创建训练框架客户端
func (factory *PytorchJobClientFactory) CreateJobClient() client.KubeflowJobClient[v1.PyTorchJob] {
return &client.PyTorchJobClient{
Client: factory.kubeflowClient.PyTorchJobs(factory.namespace),
}
}
- 创建基础工厂
- 在 kubeflow.factory.go 中继续创建基础工厂实现
package factory
import (
"fmt"
kfClientSetV1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1"
"xincan.com.cn/demo/enum"
"xincan.com.cn/demo/kubeflow/client"
)
// KubeflowFactory
// Title KubeflowFactory
// Description 训练框架客户端工厂接口
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
type KubeflowFactory interface {
CreateJobClient(kubeflowClient *kfClientSetV1.KubeflowV1Client, namespace string) client.KubeflowJobClient[any]
}
// KubeflowJobFactory
// Title KubeflowJobFactory
// Description 训练框架任务结构体
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
type KubeflowJobFactory struct {
// kubeflowClient kubeflow客户端
kubeflowClient *kfClientSetV1.KubeflowV1Client
// namespace kubernetes命名空间
namespace string
}
// NewKubeflowJobFactory
// Title NewKubeflowJobFactory
// Description 训练框架任务结构体实例化
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
func NewKubeflowJobFactory(client *kfClientSetV1.KubeflowV1Client, namespace string) *KubeflowJobFactory {
return &KubeflowJobFactory{
kubeflowClient: client,
namespace: namespace,
}
}
// 定义JobClient类型映射
var jobClientMap = map[enum.FrameworkType]func(*KubeflowJobFactory) interface{}{
enum.PYTORCH: func(factory *KubeflowJobFactory) interface{} {
return &client.PyTorchJobClient{
Client: factory.kubeflowClient.PyTorchJobs(factory.namespace),
}
},
enum.PADDLE: func(factory *KubeflowJobFactory) interface{} {
return &client.PaddleJobClient{
Client: factory.kubeflowClient.PaddleJobs(factory.namespace),
}
},
}
// GetJobClient
// Title GetJobClient
// Description 根据训练框架类型, 获取对应的训练任务函数
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
// Param frameworkType enum.FrameworkType 训练框架类型
// Return client.KubeflowJobClient 训练任务函数
// Return error 错误信息
func (factory *KubeflowJobFactory) GetJobClient(frameworkType enum.FrameworkType) (client.KubeflowJobClient, error) {
switch frameworkType {
case enum.PYTORCH:
return &client.PyTorchJobClient{
Client: factory.kubeflowClient.PyTorchJobs(factory.namespace),
}, nil
case enum.PADDLE:
return &client.PaddleJobClient{
Client: factory.kubeflowClient.PaddleJobs(factory.namespace),
}, nil
default:
return nil, fmt.Errorf("unsupported framework type: %s", frameworkType)
}
}
// GetTypedJobClient
// Title GetTypedJobClient
// Description 根据训练框架类型, 获取对应的训练任务客户端
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
// Param frameworkType enum.FrameworkType 训练框架类型
// Return client.KubeflowJobClient[any] 训练任务
func GetTypedJobClient[T any](factory *KubeflowJobFactory, frameworkType enum.FrameworkType) (client.KubeflowJobClient[T], error) {
creator, ok := jobClientMap[frameworkType]
if !ok {
return nil, fmt.Errorf("unsupported framework type: %s", frameworkType)
}
if jobClient, ok := creator(factory).(client.KubeflowJobClient[T]); ok {
return jobClient, nil
}
return nil, fmt.Errorf("type mismatch for framework type: %s", frameworkType)
}
- 主函数测试调用
package main
import (
"context"
"fmt"
v1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
kfClientSetV1 "github.com/kubeflow/training-operator/pkg/client/clientset/versioned/typed/kubeflow.org/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/clientcmd"
"xincan.com.cn/demo/enum"
"xincan.com.cn/demo/kubeflow/client"
"xincan.com.cn/demo/kubeflow/factory"
)
// KubeflowJobClient
// Title KubeflowJobClient
// Description 初始化Kubeflow
// Auth alittlexincan@163.com 时间(2024/07/22 11:36)
func initKubeflow() *kfClientSetV1.KubeflowV1Client {
kubeConfig, _ := clientcmd.BuildConfigFromFlags("", "./.kube/1148032689266233344")
return kfClientSetV1.NewForConfigOrDie(kubeConfig)
}
func createJob() {
// 初始化kubeflow客户端
kubeflowV1Client := initKubeflow()
// 根据kubeflow客户端、命名空间, 创建训练框架客户端工厂
jobFactory := factory.NewKubeflowJobFactory(kubeflowV1Client, "troila")
第一种方式 ///
fmt.Println("================== 第一种方式 " + enum.PYTORCH.Value() + " =====================")
// 根据训练框架类型获取pytorch客户端
pytorchClient, _ := factory.GetTypedJobClient[v1.PyTorchJob](jobFactory, enum.PYTORCH)
// 根据指定的客户端获取内置函数
pytorch, _ := pytorchClient.Get(context.TODO(), "pytorch-1203457869012345678", metav1.GetOptions{})
for _, value := range pytorch.Status.Conditions {
fmt.Println(value.Type)
}
fmt.Println("================== 第一种方式 " + enum.PYTORCH.Value() + " =====================")
paddleClient, _ := factory.GetTypedJobClient[v1.PaddleJob](jobFactory, enum.PADDLE)
paddle, _ := paddleClient.Get(context.TODO(), "pytorch-1203457869012345679", metav1.GetOptions{})
for _, value := range pytorch.Status.Conditions {
fmt.Println(value.Type)
}
第二种方式 ///
fmt.Println("================== 第二种方式 " + enum.PYTORCH.Value() + " =====================")
// 根据训练框架类型获取pytorch客户端
pytorchJobClient, _ := jobFactory.GetJobClient(enum.PYTORCH)
// 转化为指定的泛型客户端, 实则是确定了用那个客户端
pytorchJob := pytorchJobClient.(client.KubeflowJobClient[v1.PyTorchJob])
// 根据指定的客户端获取内置函数
pytorch, _ = pytorchJob.Get(context.TODO(), "pytorch-1203457869012345678", metav1.GetOptions{})
for _, value := range pytorch.Status.Conditions {
fmt.Println(value.Type)
}
fmt.Println("================== 第二种方式 " + enum.PADDLE.Value() + " =====================")
paddleJobClient, _ := jobFactory.GetJobClient(enum.PADDLE)
job := paddleJobClient.(client.KubeflowJobClient[v1.PaddleJob])
paddle, _ = job.Get(context.TODO(), "paddle-1203457869012345679", metav1.GetOptions{})
for _, value := range paddle.Status.Conditions {
fmt.Println(value.Type)
}
第三种方式 ///
fmt.Println("================== 第三种方式 " + enum.PYTORCH.Value() + " =====================")
// 根据kubeflow客户端、命名空间, 创建指定的训练框架客户端工厂
pytorchFactory := factory.NewPytorchJobClientFactory(kubeflowV1Client, "troila")
// 通过指定的工厂创建训练框架客户端, 然后获取内置函数
pytorch, _ = pytorchFactory.CreateJobClient().Get(context.TODO(), "pytorch-1203457869012345678", metav1.GetOptions{})
for _, value := range pytorch.Status.Conditions {
fmt.Println(value.Type)
}
fmt.Println("================== 第三种方式 " + enum.PADDLE.Value() + " =====================")
paddleFactory := factory.NewPaddleJobClientFactory(kubeflowV1Client, "troila")
paddle, _ = paddleFactory.CreateJobClient().Get(context.TODO(), "paddle-1203457869012345679", metav1.GetOptions{})
for _, value := range paddle.Status.Conditions {
fmt.Println(value.Type)
}
}
func main() {
createJob()
}
- 结果验证
// 控制台打印结果
GOROOT=/Users/xincan/software/go/go-1.22.5 #gosetup
GOPATH=/Users/xincan/workspace/goworkspace #gosetup
/Users/xincan/software/go/go-1.22.5/bin/go build -o /Users/xincan/Library/Caches/JetBrains/GoLand2023.3/tmp/GoLand/___1demo xincan.com.cn/demo #gosetup
/Users/xincan/Library/Caches/JetBrains/GoLand2023.3/tmp/GoLand/___1demo
================== 第一种方式 pytorch =====================
Created
Running
Succeeded
================== 第一种方式 pytorch =====================
Created
Running
Succeeded
================== 第二种方式 pytorch =====================
Created
Running
Succeeded
================== 第二种方式 paddle =====================
Created
Running
Succeeded
================== 第三种方式 pytorch =====================
Created
Running
Succeeded
================== 第三种方式 paddle =====================
Created
Running
Succeeded
Process finished with the exit code 0
// 服务器集群打印结果
root@node1:~# kubectl -n troila get PytorchJob,PaddleJob
NAME STATE AGE
pytorchjob.kubeflow.org/pytorch-1203457869012345678 Succeeded 22h
NAME STATE AGE
paddlejob.kubeflow.org/paddle-1203457869012345679 Running 8s
root@node1:~# kubectl -n troila get pod