kubernetes 离线安装 kubeflow

环境: centos7.6,kubernetes 1.18.9,kubeflow 1.1.0

不推荐使用 kubernetes 1.18.9 部署 kubeflow 1.1.0,太多问题,即使官方说没有发现问题,本实验中 seldon 始终部署失败,估计是 apiversion 的问题。seldon 部署官方说 kubernetes <= 1.17
部署 kubeflow 还是使用 kubernetes 1.15版本吧,具体参考官网推荐https://www.kubeflow.org/docs/started/k8s/overview/,整体步骤差不多

1、解决镜像依赖问题

192.168.1.11 neuxs 代理阿里云 docker registry:
https://registry.cn-hangzhou.aliyuncs.com

192.168.1.11 neuxs 代理 quay docker registry:
https://quay.io

使用 docker group registry

/etc/hosts 中添加 gcr.io、quay.io 解析

192.168.1.11 gcr.io
192.168.1.11 quay.io

/etc/docker/daemon.json

{
    "insecure-registries": [
        "192.168.1.11",
        "gcr.io",
        "quay.io"
    ],
    "registry-mirrors": [
        "
https://192.168.1.11"
;
    ]

}

4、安装 storageclass(本实验用 ceph rbd)

[root@node1 kubeflow]# cat storageclass.yml 
---
apiVersion: v1
kind: Secret
metadata:
  name: ceph-admin-secret
  namespace: kube-system
type: "kubernetes.io/rbd"
data:
  # ceph auth get-key client.admin | base64
  key:  "QVFCdTc0eGZNcUE1Sxxxx6Snl1TGRCdjFLQ0ZFazVMWk9pR1E9PQo="
---
apiVersion: v1
kind: Secret
metadata:
  name: ceph-secret
  namespace: kube-system
type: "kubernetes.io/rbd"
data:
  # ceph auth add client.kube mon 'allow r' osd 'allow rwx pool=kube'
  # ceph auth get-key client.kube | base64
  key: "QVFCdTc0eGZNcUE1Sxxxx6Snl1TGRCdjFLQ0ZFazVMWk9pR1E9PQo="


---
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
  name: "ceph-rbd-storage"
  annotations:
    storageclass.kubernetes.io/is-default-class: "true"
provisioner: ceph.com/rbd
allowVolumeExpansion: true
parameters:
  monitors: "192.168.1.80,192.168.1.81,192.168.1.82,192.168.1.83,192.168.1.84"
  pool: "infra_k8s"
  adminId: "admin"
  adminSecretNamespace: kube-system
  adminSecretName: ceph-admin-secret
  userId: "infra_k8s"
  userSecretName: ceph-secret
  userSecretNamespace: kube-system
  imageFormat: "2"
  imageFeatures: layering

[root@node1 kubeflow]# cat rbac.yml 
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: rbd-provisioner
  namespace: kube-system

---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: rbd-provisioner
rules:
- apiGroups: [""]
  resources: ["persistentvolumes"]
  verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
  resources: ["persistentvolumeclaims"]
  verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
  resources: ["storageclasses"]
  verbs: ["get", "list", "watch"]
- apiGroups: [""]
  resources: ["events"]
  verbs: ["create", "update", "patch"]
- apiGroups: [""]
  resources: ["services"]
  resourceNames: ["kube-dns","coredns"]
  verbs: ["list", "get"]
- apiGroups: [""]
  resources: ["endpoints"]
  verbs: ["get", "list", "watch", "create", "update", "patch"]

---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: rbd-provisioner
subjects:
- kind: ServiceAccount
  name: rbd-provisioner
  namespace: kube-system
roleRef:
  kind: ClusterRole
  name: rbd-provisioner
  apiGroup: rbac.authorization.k8s.io

---

apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: rbd-provisioner
  namespace: kube-system
rules:
- apiGroups: [""]
  resources: ["secrets"]
  verbs: ["get"]
- apiGroups: [""]
  resources: ["endpoints"]
  verbs: ["get", "list", "watch", "create", "update", "patch"]

---

apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: rbd-provisioner
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: rbd-provisioner
subjects:
- kind: ServiceAccount
  name: rbd-provisioner
  namespace: kube-system

[root@node1 kubeflow]# cat rbd-provisioner-deployment.yml 
apiVersion: apps/v1
kind: Deployment
metadata:
  name: rbd-provisioner
  namespace: kube-system
spec:
  selector:
    matchLabels:
      app: rbd-provisioner
  replicas: 1
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        app: rbd-provisioner
    spec:
      containers:
      - name: rbd-provisioner
        image: "external_storage/rbd-provisioner:v2.1.1-k8s1.11"
        env:
        - name: PROVISIONER_NAME
          value: ceph.com/rbd
      serviceAccount: rbd-provisioner
kubectl apply -f rbac.yml 
kubectl apply -f storageclass.yml 
kubectl apply -f rbd-provisioner-deployment.yml

注意:需要给每个节点安装 ceph-common,否则会在 attach/detach volume 部分报错。因为 kubelet 需要使用到 rbd 二进制文件来 attach 和 detach rbd image。参考 https://github.com/kubernetes/kubernetes/issues/38923#issuecomment-313054666

Events:
  Type     Reason                  Age              From                     Message
  ----     ------                  ----             ----                     -------
  Normal   Scheduled               22s              default-scheduler        Successfully assigned default/pod-pvc to node5
  Normal   SuccessfulAttachVolume  22s              attachdetach-controller  AttachVolume.Attach succeeded for volume "pvc-a2baddf7-5de6-45e0-bf45-4ca1accece9a"
  Warning  FailedMount             2s (x4 over 6s)  kubelet                  MountVolume.WaitForAttach failed for volume "pvc-a2baddf7-5de6-45e0-bf45-4ca1accece9a" : fail to check rbd image status with: (executable file not found in $PATH), rbd output: ()

测试 rbd

[root@node1 kubeflow]# cat pod-pvc.yml 
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  namespace: default 
  name: myclaim
spec:
  accessModes:
    - ReadWriteOnce
  storageClassName: ceph-rbd-storage
  resources:
    requests:
      storage: 1Gi

---
apiVersion: v1
kind: Pod
metadata:
  name: pod-pvc
  namespace: default
  labels:
    foo: bar
  annotations:
    key1: value1
    key2: |
      multi
      line
      value
spec:
  containers:
  - name: main
    image: busybox
    command: ["sleep", "9999999"]
    resources:
      requests:
        cpu: 15m
        memory: 100Ki
      limits:
        cpu: 100m
        memory: 4Mi
    volumeMounts:
    - name: mypvc
      mountPath: /etc/downward

  volumes:
    - name: mypvc
      persistentVolumeClaim:
        claimName: myclaim
kubectl apply -f pod-pvc.yml 

3、部署 kubeflow

下载 kfctl_v1.1.0-0-g9a3621e_linux.tar.gz,解压并移动 kfct 到 /usr/local/bin/kfctl

tar -zxvf kfctl_v1.1.0-0-g9a3621e_linux.tar.gz
mv kfctl /usr/local/bin/
mkdir /root/kubeflow
cd /root/kubeflow

修改环境变量

export PATH=$PATH:/usr/local/bin/kfctl
export KF_NAME=kubeflow
export BASE_DIR=/root/kubeflow
export KF_DIR=${BASE_DIR}/${KF_NAME}
export CONFIG_FILE=${KF_DIR}/kfctl_istio_dex.yaml

下载 kfctl_istio_dex.yaml

修改 kfctl_istio_dex.yaml,uri 改为 nexus 中上传的 manifests-1.1-branch.tar.gz uri。

  repos:
  - name: manifests
    uri: 
http://192.168.1.11/repository/raw-file/kubeflow/manifests-1.1-branch.tar.gz

  version: v1.1-branch

部署 kubeflow

kfctl apply -V -f kfctl_istio_dex.yaml
[root@node1 ~]# kb get csr
NAME                    AGE    SIGNERNAME                     REQUESTOR                                                             CONDITION
cache-server.kubeflow   3m4s   kubernetes.io/legacy-unknown   system:serviceaccount:kubeflow:kubeflow-pipelines-cache-deployer-sa   Pending
[root@node1 ~]# kb certificate approve cache-server.kubeflow
certificatesigningrequest.certificates.k8s.io/cache-server.kubeflow approved

cache-deployer-deployment 会报错

[root@node1 kubeflow]# kb logs -f cache-deployer-deployment-7bf4fc59f4-twdgn -c main -n kubeflow
...
Error from server (Forbidden): certificatesigningrequests.certificates.k8s.io "cache-server.kubeflow" is forbidden: user not permitted to approve requests with signerName "kubernetes.io/legacy-unknown"

修改 kb edit clusterrole kubeflow-pipelines-cache-deployer-clusterrole,增加下面配置

- apiGroups:
  - certificates.k8s.io
  resources:
  - signers
  resourceNames:
  - kubernetes.io/legacy-unknown
  verbs:
  - approve

等 cache-deployer-deployment pod 重启,就有权限创建 secret webhook-server-tls

删除 kubeflow (测试)

kfctl delete -V -f kfctl_istio_dex.yaml

参考 https://www.kubeflow.org/docs/started/k8s/kfctl-istio-dex/

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值