python项目微服务化并部署在k8s上

ERROR_LESS

已于 2022-04-27 10:59:47 修改

阅读量5.2k

点赞数 3

分类专栏： k8s docker flask 文章标签： docker kubernetes

于 2022-04-25 11:04:14 首次发布

本文链接：https://blog.csdn.net/qq_47058489/article/details/122111549

版权

docker 同时被 3 个专栏收录

28 篇文章 2 订阅

订阅专栏

k8s

25 篇文章 5 订阅

订阅专栏

flask

2 篇文章 0 订阅

订阅专栏

0 前提

创建tensorflow-gpu虚拟环境，参考这篇博客

1 尝试运行demo

# 进入之前创建好的tf虚拟环境
root@master:/home/hqc# source activate tf

# 进入含源码所在地址
(tf) root@master:/home/hqc# cd 自然基金项目/Federated/
(tf) root@master:/home/hqc/自然基金项目/Federated# 

# 运行程序
(tf) root@master:/home/hqc/自然基金项目/Federated# python main.py
# 会报错，依次conda install 对应包 即可！

# conda install 对应包 
(tf) root@master:/home/hqc/自然基金项目/Federated# conda install matplotlib
(tf) root@master:/home/hqc/自然基金项目/Federated# conda install numpy

# 再次运行程序
(tf) root@master:/home/hqc/自然基金项目/Federated# python main.py
	...
	...
	32/32 [==============================] - 0s 805us/step - loss: 0.0610 - accuracy: 0.9840
	Sever: 轮次: 99,准确率: 0.9840，共测试了10000张图片 
	Epoch 1/3
	7/7 [==============================] - 0s 1ms/step - loss: 0.0515 - accuracy: 0.9799
	Epoch 2/3
	7/7 [==============================] - 0s 2ms/step - loss: 0.0153 - accuracy: 1.0000
	Epoch 3/3
	7/7 [==============================] - 0s 1ms/step - loss: 0.0073 - accuracy: 1.0000
	Epoch 1/3
	6/6 [==============================] - 0s 1ms/step - loss: 0.0122 - accuracy: 1.0000
	Epoch 2/3
	6/6 [==============================] - 0s 2ms/step - loss: 0.0053 - accuracy: 1.0000
	Epoch 3/3
	6/6 [==============================] - 0s 1ms/step - loss: 0.0038 - accuracy: 1.0000
	Epoch 1/3
	16/16 [==============================] - 0s 1ms/step - loss: 0.0101 - accuracy: 0.9980
	Epoch 2/3
	16/16 [==============================] - 0s 1ms/step - loss: 0.0037 - accuracy: 1.0000
	Epoch 3/3
	16/16 [==============================] - 0s 1ms/step - loss: 0.0017 - accuracy: 1.0000
	Epoch 1/3
	1/1 [==============================] - 0s 2ms/step - loss: 0.0122 - accuracy: 1.0000
	Epoch 2/3
	1/1 [==============================] - 0s 2ms/step - loss: 0.0065 - accuracy: 1.0000
	Epoch 3/3
	1/1 [==============================] - 0s 1ms/step - loss: 0.0038 - accuracy: 1.0000
	32/32 [==============================] - 0s 775us/step - loss: 0.0595 - accuracy: 0.9800
	Sever: 轮次: 100,准确率: 0.9800，共测试了10000张图片 
	QStandardPaths: wrong ownership on runtime directory /run/user/1000, 1000 instead of 0

# 成功！

最终结果图

2 使用flask+html将demo微服务化

from flask import Flask,render_template
from flask import jsonify
import random

import matplotlib.pyplot as plt
# this module is used to draw a picture
# usually connected to 'numpy' module
# its usage just like matlab

import numpy as np
# this module is used to calculate or transform arrays and lists

from tensorflow.keras import datasets, layers, models
# tensorflow.keras is a high lever module for python API
# 'from tensorflow.keras import datasets' is used to download datasets
# 'import layers' is used to customize the layers of neural network
# 'import models' is used to customize the whole model of neural network

app = Flask(__name__)

# set number of rounds
BATCH = 100

# the images source
class DataSource(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()

        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))

        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.train_images, self.train_labels = train_images[0:15000], train_labels[0:15000]
        self.test_images, self.test_labels = test_images[0:10000], test_labels[0:10000]


def random_num_with_fix_total(maxvalue, num):
    """生成总和固定的整数序列
    maxvalue: 序列总和
    num：要生成的整数个数"""
    a = random.sample(range(1, maxvalue), k=num - 1)  # 在1~99之间，采集20个数据
    a.append(0)  # 加上数据开头
    a.append(maxvalue)
    a = sorted(a)
    b = [a[count] - a[count - 1] for count in range(1, len(a))]  # 列表推导式，计算列表中每两个数之间的间隔
    return b


class DataSource1(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.TI, self.TL = train_images[0:15000], train_labels[0:15000]
        self.train_images = np.empty(BATCH, dtype=object)
        self.train_labels = np.empty(BATCH, dtype=object)
        begin = 0
        rand_count = random_num_with_fix_total(15000, BATCH)
        for count in range(100):
            self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
            self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
            begin = begin + rand_count[count]
        self.test_images, self.test_labels = test_images[0:10000], test_labels[0:10000]


class DataSource2(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
        self.train_images = np.empty(BATCH, dtype=object)
        self.train_labels = np.empty(BATCH, dtype=object)
        begin = 0
        rand_count = random_num_with_fix_total(15000, BATCH)
        for count in range(100):
            self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
            self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
            begin = begin + rand_count[count]


class DataSource3(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
        self.train_images = np.empty(BATCH, dtype=object)
        self.train_labels = np.empty(BATCH, dtype=object)
        begin = 0
        rand_count = random_num_with_fix_total(15000, BATCH)
        for count in range(100):
            self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
            self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
            begin = begin + rand_count[count]


class DataSource4(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
        self.train_images = np.empty(BATCH, dtype=object)
        self.train_labels = np.empty(BATCH, dtype=object)
        begin = 0
        rand_count = random_num_with_fix_total(15000, BATCH)
        for count in range(100):
            self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
            self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
            begin = begin + rand_count[count]


# Define as LeNet
class CNN(object):
    def __init__(self):
        model = models.Sequential()
        model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
        model.add(layers.MaxPool2D((2, 2)))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.MaxPool2D((2, 2)))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.Flatten())
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(10, activation='softmax'))
        #        model.summary()  #打印网络结构
        self.model = model


# FedAvg Function
def FedAvg():
    weight_CNN_1 = np.load("Client1Weight.npy", allow_pickle=True)
    weight_CNN_2 = np.load("Client2Weight.npy", allow_pickle=True)
    weight_CNN_3 = np.load("Client3Weight.npy", allow_pickle=True)
    weight_CNN_4 = np.load("Client4Weight.npy", allow_pickle=True)
    weight_array = (weight_CNN_1 + weight_CNN_2 + weight_CNN_3 + weight_CNN_4) / 4
    weight_out = np.array(weight_array)
    return weight_out


# EKF Function
def EKF(cnn, weight_in):
    cnn.model.set_weights(weight_in)
    return cnn


# Create Models:LeNet
cnn_sever = CNN()
cnn1 = CNN()
cnn2 = CNN()
cnn3 = CNN()
cnn4 = CNN()
# Prepare Client Data
data_sever = DataSource()
data1 = DataSource1()
data2 = DataSource2()
data3 = DataSource3()
data4 = DataSource4()

# Compile Client and Sever Model
cnn_sever.model.compile(optimizer='adam',
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])
cnn1.model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])
cnn2.model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])
cnn3.model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])
cnn4.model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])
storage_acc = []
weight = cnn_sever.model.get_weights()
np.save("SeverWeight", weight)
# All Clint Train
for i in range(BATCH):
    # Client Model Update(Downloads From Sever)
    weight = np.load("SeverWeight.npy", allow_pickle=True)
    #    cnn1.model.set_weights(weight)
    #    cnn2.model.set_weights(weight)
    #    cnn3.model.set_weights(weight)
    #    cnn4.model.set_weights(weight)
    cnn1 = EKF(cnn1, weight)
    cnn2 = EKF(cnn2, weight)
    cnn3 = EKF(cnn3, weight)
    cnn4 = EKF(cnn4, weight)
    # Client Model Fit
    cnn1.model.fit(data1.train_images[i], data1.train_labels[i], epochs=3)
    cnn2.model.fit(data2.train_images[i], data2.train_labels[i], epochs=3)
    cnn3.model.fit(data3.train_images[i], data3.train_labels[i], epochs=3)
    cnn4.model.fit(data4.train_images[i], data4.train_labels[i], epochs=3)
    # FedAvg

    weight_CNN1 = np.array(cnn1.model.get_weights())
    weight_CNN2 = np.array(cnn2.model.get_weights())
    weight_CNN3 = np.array(cnn3.model.get_weights())
    weight_CNN4 = np.array(cnn4.model.get_weights())


    np.save("Client1Weight", weight_CNN1)
    np.save("Client2Weight", weight_CNN2)
    np.save("Client3Weight", weight_CNN3)
    np.save("Client4Weight", weight_CNN4)
    weight = FedAvg()
    # Uploads to Sever
    cnn_sever.model.set_weights(weight)
    np.save("SeverWeight", weight)
    test_loss, test_acc = cnn_sever.model.evaluate(data_sever.test_images[0:1000], data_sever.test_labels[0:1000])
    print("Sever: 轮次: %d,准确率: %.4f，共测试了%d张图片 " % (i + 1, test_acc, len(data_sever.test_labels)))
    storage_acc = np.append(storage_acc, test_acc)
# Show Acc
x = np.array(range(100))
plt.plot(x, storage_acc)
plt.savefig('./static/acc.png')

@app.route('/')
def index():
    return render_template('index.html', weight = str(weight))

if __name__ == '__main__':
    app.run(host = '0.0.0.0')

关联的HTML文件：

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>RESULT</title>
</head>
<body>
    the picture of accuracy is here :
    <br>
    <br>
    <img src="/static/acc.png" width="1080px" height="720px">
    <br>
    <br>
    the sever's weight is :
    <br>
    <br>
    {{weight}}

</body>
</html>

注意：

导入Flask,render_template模块
一定要注意文件结构：图片要放在static文件夹下，html文件要放在templates文件夹下，不能都放在一个文件夹下（至于为啥，不知道，反正是不能运行）
最后的host必须为host = '0.0.0.0'否则docker run 之后不能在公网进行访问

3 容器镜像化

Dockerfile文件：

FROM python
RUN mkdir -p /Federated \
    && mkdir -p /Federated/templates \
    && mkdir -p /Federated/static \
    && pip install tensorflow \
    && pip install matplotlib \
    && pip install flask
COPY ./templates/index.html /Federated/templates
COPY main.py /Federated/
WORKDIR /Federated
EXPOSE 5000
RUN /bin/bash -c 'echo init ok'
CMD ["python", "main.py"]

注意：得自己pip安装所需要的包

4 创建deployment和service

federated-deployment.yaml文件：

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: federated-deployment
  name: federated-deployment
spec:
  replicas: 2
  selector:
    matchLabels:
      app: federated-deployment
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: federated-deployment
    spec:
      containers:
      - image: registry.cn-beijing.aliyuncs.com/hqc-k8s/federated:v1.0
        name: federated
        resources: {}
        ports:
        - containerPort: 5000
        imagePullPolicy: IfNotPresent
status: {}

federated-service.yaml文件：

apiVersion: v1 # 注意此处不能和deployment一样为‘apps/v1’
kind: Service
metadata:
  name: federated-deployment
  labels:
    app: federated-deployment
spec:
  ports:
  - port: 80
    targetPort: 5000
    nodePort: 30001
    protocol: TCP
  selector:
    app: federated-deployment
  type: NodePort

5 结果

1 命令行结果

root@master:/home/hqc/自然基金项目/Federated# kubectl get all
NAME                                         READY   STATUS        RESTARTS   AGE
pod/federated-deployment-5d5cfb4c7c-5bq9r    1/1     Running       0          12m
pod/federated-deployment-5d5cfb4c7c-mcfg2    1/1     Running       0          12m

NAME                            TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)        AGE
service/federated-deployment    NodePort    10.107.96.50     <none>        80:30001/TCP   8m41s

NAME                                    READY   UP-TO-DATE   AVAILABLE   AGE
deployment.apps/federated-deployment    2/2     2            2           12m

NAME                                               DESIRED   CURRENT   READY   AGE
replicaset.apps/federated-deployment-5d5cfb4c7c    2         2         2       12m

2 dashboard结果

在这里插入图片描述

3 运行结果

在这里插入图片描述

6 升级微服务（添加选择下载成果文件功能）

1 main.py

添加了下载功能
bootstrap优化html模块
flash闪现消息模块

from flask import Flask,render_template
from flask import jsonify
import random

import matplotlib.pyplot as plt
# this module is used to draw a picture
# usually connected to 'numpy' module
# its usage just like matlab

import numpy as np
# this module is used to calculate or transform arrays and lists

from tensorflow.keras import datasets, layers, models
# tensorflow.keras is a high lever module for python API
# 'from tensorflow.keras import datasets' is used to download datasets
# 'import layers' is used to customize the layers of neural network
# 'import models' is used to customize the whole model of neural network

from flask_bootstrap import Bootstrap # 继承"bootstrap/base.html"模板
import os # 处理路径相关

from flask import flash # 用于提示
from flask import request # 访问请求需要用到
#from flask import redirect # 重定向
#from flask import url_for # 获取url
from flask import send_from_directory # 用于下载文件

#from werkzeug.utils import secure_filename # 当用户输入恶意字符时，对服务器进行保护

app = Flask(__name__)
bootstrap = Bootstrap(app)
# 编写html要想继承"bootstrap/base.html"文件就必须加上这一句

app.secret_key = 'sdafdsdfdasaf'
# 使用flash后需要设置秘钥，否则会报错，秘钥随机即可

# set number of rounds
BATCH = 100

# the images source
class DataSource(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()

        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))

        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.train_images, self.train_labels = train_images[0:15000], train_labels[0:15000]
        self.test_images, self.test_labels = test_images[0:10000], test_labels[0:10000]


def random_num_with_fix_total(maxvalue, num):
    """生成总和固定的整数序列
    maxvalue: 序列总和
    num：要生成的整数个数"""
    a = random.sample(range(1, maxvalue), k=num - 1)  # 在1~99之间，采集20个数据
    a.append(0)  # 加上数据开头
    a.append(maxvalue)
    a = sorted(a)
    b = [a[count] - a[count - 1] for count in range(1, len(a))]  # 列表推导式，计算列表中每两个数之间的间隔
    return b


class DataSource1(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.TI, self.TL = train_images[0:15000], train_labels[0:15000]
        self.train_images = np.empty(BATCH, dtype=object)
        self.train_labels = np.empty(BATCH, dtype=object)
        begin = 0
        rand_count = random_num_with_fix_total(15000, BATCH)
        for count in range(100):
            self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
            self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
            begin = begin + rand_count[count]
        self.test_images, self.test_labels = test_images[0:10000], test_labels[0:10000]


class DataSource2(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
        self.train_images = np.empty(BATCH, dtype=object)
        self.train_labels = np.empty(BATCH, dtype=object)
        begin = 0
        rand_count = random_num_with_fix_total(15000, BATCH)
        for count in range(100):
            self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
            self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
            begin = begin + rand_count[count]


class DataSource3(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
        self.train_images = np.empty(BATCH, dtype=object)
        self.train_labels = np.empty(BATCH, dtype=object)
        begin = 0
        rand_count = random_num_with_fix_total(15000, BATCH)
        for count in range(100):
            self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
            self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
            begin = begin + rand_count[count]


class DataSource4(object):
    def __init__(self):
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        # 6万张训练图片，1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0
        self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
        self.train_images = np.empty(BATCH, dtype=object)
        self.train_labels = np.empty(BATCH, dtype=object)
        begin = 0
        rand_count = random_num_with_fix_total(15000, BATCH)
        for count in range(100):
            self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
            self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
            begin = begin + rand_count[count]


# Define as LeNet
class CNN(object):
    def __init__(self):
        model = models.Sequential()
        model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
        model.add(layers.MaxPool2D((2, 2)))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.MaxPool2D((2, 2)))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.Flatten())
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(10, activation='softmax'))
        #        model.summary()  #打印网络结构
        self.model = model


# FedAvg Function
def FedAvg():
    weight_CNN_1 = np.load("Client1Weight.npy", allow_pickle=True)
    weight_CNN_2 = np.load("Client2Weight.npy", allow_pickle=True)
    weight_CNN_3 = np.load("Client3Weight.npy", allow_pickle=True)
    weight_CNN_4 = np.load("Client4Weight.npy", allow_pickle=True)
    weight_array = (weight_CNN_1 + weight_CNN_2 + weight_CNN_3 + weight_CNN_4) / 4
    weight_out = np.array(weight_array)
    return weight_out


# EKF Function
def EKF(cnn, weight_in):
    cnn.model.set_weights(weight_in)
    return cnn


# Create Models:LeNet
cnn_sever = CNN()
cnn1 = CNN()
cnn2 = CNN()
cnn3 = CNN()
cnn4 = CNN()
# Prepare Client Data
data_sever = DataSource()
data1 = DataSource1()
data2 = DataSource2()
data3 = DataSource3()
data4 = DataSource4()

# Compile Client and Sever Model
cnn_sever.model.compile(optimizer='adam',
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])
cnn1.model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])
cnn2.model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])
cnn3.model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])
cnn4.model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])
storage_acc = []
weight = cnn_sever.model.get_weights()
np.save("SeverWeight", weight)
# All Clint Train
for i in range(BATCH):
    # Client Model Update(Downloads From Sever)
    weight = np.load("SeverWeight.npy", allow_pickle=True)
    #    cnn1.model.set_weights(weight)
    #    cnn2.model.set_weights(weight)
    #    cnn3.model.set_weights(weight)
    #    cnn4.model.set_weights(weight)
    cnn1 = EKF(cnn1, weight)
    cnn2 = EKF(cnn2, weight)
    cnn3 = EKF(cnn3, weight)
    cnn4 = EKF(cnn4, weight)
    # Client Model Fit
    cnn1.model.fit(data1.train_images[i], data1.train_labels[i], epochs=3)
    cnn2.model.fit(data2.train_images[i], data2.train_labels[i], epochs=3)
    cnn3.model.fit(data3.train_images[i], data3.train_labels[i], epochs=3)
    cnn4.model.fit(data4.train_images[i], data4.train_labels[i], epochs=3)
    # FedAvg

    weight_CNN1 = np.array(cnn1.model.get_weights())
    weight_CNN2 = np.array(cnn2.model.get_weights())
    weight_CNN3 = np.array(cnn3.model.get_weights())
    weight_CNN4 = np.array(cnn4.model.get_weights())


    np.save("Client1Weight", weight_CNN1)
    np.save("Client2Weight", weight_CNN2)
    np.save("Client3Weight", weight_CNN3)
    np.save("Client4Weight", weight_CNN4)
    weight = FedAvg()
    # Uploads to Sever
    cnn_sever.model.set_weights(weight)
    np.save("SeverWeight", weight)
    test_loss, test_acc = cnn_sever.model.evaluate(data_sever.test_images[0:1000], data_sever.test_labels[0:1000])
    print("Sever: 轮次: %d,准确率: %.4f，共测试了%d张图片 " % (i + 1, test_acc, len(data_sever.test_labels)))
    storage_acc = np.append(storage_acc, test_acc)

# @app.route('/c1')
# def weight_CNN1():
#     return str(weight_CNN1)
#
# @app.route('/c2')
# def weight_CNN2():
#     return ('the CNN2 client\'s weight is : \n' , str(weight_CNN2))
#
# @app.route('/c3')
# def weight_CNN3():
#     return ('the CNN3 client\'s weight is : \n' , str(weight_CNN3))
#
# @app.route('/c4')
# def weight_CNN4():
#     return ('the CNN4 client\'s weight is : \n' , str(weight_CNN4))

# Show Acc
x = np.array(range(100))
plt.plot(x, storage_acc)
plt.savefig('./acc.png')

# @app.route('/')
# def index():
#     return render_template('index.html', weight = str(weight))

@app.route('/', methods=['GET', 'POST'])
# GET只是对服务器获取数据
# POST会对服务器产生改变，请求上传就一定需要POST

def download_file():
    Path = os.listdir('.') # 列出指定路径下的所有文件和文件夹
    #print(Path)

		# 用于限定文件后缀名进行展示
    entries = []
    for path in Path:
        if os.path.splitext(path)[1] == '.png' or os.path.splitext(path)[1] == '.npy':
            entries.append(path)
            #print(entries)
            
    return render_template('download.html',entries = entries)


@app.route('/downloads/<filename>', methods=['GET', 'POST'])
def downloaded_file(filename):
    #if request.method == 'GET':
        #filename = request.url # 此处'files'对应的是html中的name="file"
        #print(filename)
    flash('Download Successfully ! ! !', 'success')
    return send_from_directory('./',filename,as_attachment = True)
# 若没有as_attachment = True，则会新开一个标签页显示上传文件的内容；有此参数则会直接下载


if __name__ == '__main__':
    app.run(host = '0.0.0.0')
    # from livereload import Server  # 用于再次刷新本页面
    #
    # server = Server(app.wsgi_app)
    # server.watch('**/*.*')
    # server.serve()

2 download.html

{% extends "bootstrap/base.html" %}
{% block title %}DOWNLOAD PAGE{% endblock %}

{% block content %}
    <div class="container">
        {% with messages = get_flashed_messages(with_categories=true) %}
            {% if messages %}
                {% for category, message in messages %}
                    <div class="alert alert-{{ category }}">
                        <button type="button" class="close" data-dismiss="alert" aria-hidden="true">×</button>
                        {{ message }}
                    </div>
                {% endfor %}
            {% endif %}
        {% endwith %}
        <h2>You Can Download The Following Files</h2>
        <h5 style="color:red">(just click it ☝ ~)</h5>
        <ol>
        {% for entry in entries %}
        <li><a href="{{url_for('downloaded_file',filename = entry)}}">{{entry}}</a>
        {% endfor %}
        </ol>
    </div>
{% endblock %}

3 Dockerfile 和 requirement.txt

FROM federated:v1.2
COPY . . # 将当前文件夹的所有文件复制到容器中
WORKDIR . # 设置当前工作目录
RUN pip install -r requirement.txt
EXPOSE 5000
RUN /bin/bash -c 'echo init ok'
CMD ["python", "main.py"]

requirement.txt：
可用tensorflow==2.4.1的形式指定版本进行pip install

Flask
tensorflow
matplotlib
flask_bootstrap
numpy

4 deployment 和 service

deployment.yaml:

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: federated-deployment
  name: federated-deployment
spec:
  replicas: 2
  selector:
    matchLabels:
      app: federated-deployment
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: federated-deployment
    spec:
      containers:
      - image: registry.cn-beijing.aliyuncs.com/hqc-k8s/federated:v1.3
        name: federated
        resources: {}
        ports:
        - containerPort: 5000
        imagePullPolicy: IfNotPresent
status: {}

service.yaml:

apiVersion: v1 # 注意此处不能和deployment一样为‘apps/v1’
kind: Service
metadata:
  name: federated-service
  labels:
    app: federated-service
spec:
  ports:
  - port: 80
    targetPort: 5000
    nodePort: 30000
    protocol: TCP
  selector:
    app: federated-service
  type: NodePort

5 结果

控制器和服务等全部正常
在这里插入图片描述

5.1 出错

但是发现只是一瞬间running，过一会就一直重启。ip+port的方式更是没法成功。
查看日志发现：

root@master:/home/hqc/自然基金项目/Federated# kubectl logs federated-deployment-68f5c7fb8d-2zqpf -n default
# federated-deployment-68f5c7fb8d-2zqpf 是容器名

在这里插入图片描述发现是内部程序的原因，不是集群的问题，程序无法访问网址下载所需数据集。
为啥本地docker run可以成功运行，创建deployment后不能访问呢？？

以为的原因：

看这报错好像是外网的问题，但实际上不是。
肯定是service.yaml文件出错了，修改后仍然不行。

5.2 解决

最后发现不知为啥master的IP地址变成了192.168.43.49，好离谱。
想设置静态IP，按照网上大家都行的方法尝试发现没法固定IP，遂作罢。这个工作以后再弄。
直接修改hosts文件为192.168.43.49。
vim /etc/hosts
重启节点，惊喜发现所有都running。
在这里插入图片描述
创建deployment和service之后要等一段时间运行结束，再进行ip+port方式访问。

成功！！！