0 前提
创建tensorflow-gpu虚拟环境,参考这篇博客
1 尝试运行demo
# 进入之前创建好的tf虚拟环境
root@master:/home/hqc# source activate tf
# 进入含源码所在地址
(tf) root@master:/home/hqc# cd 自然基金项目/Federated/
(tf) root@master:/home/hqc/自然基金项目/Federated#
# 运行程序
(tf) root@master:/home/hqc/自然基金项目/Federated# python main.py
# 会报错,依次conda install 对应包 即可!
# conda install 对应包
(tf) root@master:/home/hqc/自然基金项目/Federated# conda install matplotlib
(tf) root@master:/home/hqc/自然基金项目/Federated# conda install numpy
# 再次运行程序
(tf) root@master:/home/hqc/自然基金项目/Federated# python main.py
...
...
32/32 [==============================] - 0s 805us/step - loss: 0.0610 - accuracy: 0.9840
Sever: 轮次: 99,准确率: 0.9840,共测试了10000张图片
Epoch 1/3
7/7 [==============================] - 0s 1ms/step - loss: 0.0515 - accuracy: 0.9799
Epoch 2/3
7/7 [==============================] - 0s 2ms/step - loss: 0.0153 - accuracy: 1.0000
Epoch 3/3
7/7 [==============================] - 0s 1ms/step - loss: 0.0073 - accuracy: 1.0000
Epoch 1/3
6/6 [==============================] - 0s 1ms/step - loss: 0.0122 - accuracy: 1.0000
Epoch 2/3
6/6 [==============================] - 0s 2ms/step - loss: 0.0053 - accuracy: 1.0000
Epoch 3/3
6/6 [==============================] - 0s 1ms/step - loss: 0.0038 - accuracy: 1.0000
Epoch 1/3
16/16 [==============================] - 0s 1ms/step - loss: 0.0101 - accuracy: 0.9980
Epoch 2/3
16/16 [==============================] - 0s 1ms/step - loss: 0.0037 - accuracy: 1.0000
Epoch 3/3
16/16 [==============================] - 0s 1ms/step - loss: 0.0017 - accuracy: 1.0000
Epoch 1/3
1/1 [==============================] - 0s 2ms/step - loss: 0.0122 - accuracy: 1.0000
Epoch 2/3
1/1 [==============================] - 0s 2ms/step - loss: 0.0065 - accuracy: 1.0000
Epoch 3/3
1/1 [==============================] - 0s 1ms/step - loss: 0.0038 - accuracy: 1.0000
32/32 [==============================] - 0s 775us/step - loss: 0.0595 - accuracy: 0.9800
Sever: 轮次: 100,准确率: 0.9800,共测试了10000张图片
QStandardPaths: wrong ownership on runtime directory /run/user/1000, 1000 instead of 0
# 成功!
2 使用flask+html将demo微服务化
from flask import Flask,render_template
from flask import jsonify
import random
import matplotlib.pyplot as plt
# this module is used to draw a picture
# usually connected to 'numpy' module
# its usage just like matlab
import numpy as np
# this module is used to calculate or transform arrays and lists
from tensorflow.keras import datasets, layers, models
# tensorflow.keras is a high lever module for python API
# 'from tensorflow.keras import datasets' is used to download datasets
# 'import layers' is used to customize the layers of neural network
# 'import models' is used to customize the whole model of neural network
app = Flask(__name__)
# set number of rounds
BATCH = 100
# the images source
class DataSource(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.train_images, self.train_labels = train_images[0:15000], train_labels[0:15000]
self.test_images, self.test_labels = test_images[0:10000], test_labels[0:10000]
def random_num_with_fix_total(maxvalue, num):
"""生成总和固定的整数序列
maxvalue: 序列总和
num:要生成的整数个数"""
a = random.sample(range(1, maxvalue), k=num - 1) # 在1~99之间,采集20个数据
a.append(0) # 加上数据开头
a.append(maxvalue)
a = sorted(a)
b = [a[count] - a[count - 1] for count in range(1, len(a))] # 列表推导式,计算列表中每两个数之间的间隔
return b
class DataSource1(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.TI, self.TL = train_images[0:15000], train_labels[0:15000]
self.train_images = np.empty(BATCH, dtype=object)
self.train_labels = np.empty(BATCH, dtype=object)
begin = 0
rand_count = random_num_with_fix_total(15000, BATCH)
for count in range(100):
self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
begin = begin + rand_count[count]
self.test_images, self.test_labels = test_images[0:10000], test_labels[0:10000]
class DataSource2(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
self.train_images = np.empty(BATCH, dtype=object)
self.train_labels = np.empty(BATCH, dtype=object)
begin = 0
rand_count = random_num_with_fix_total(15000, BATCH)
for count in range(100):
self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
begin = begin + rand_count[count]
class DataSource3(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
self.train_images = np.empty(BATCH, dtype=object)
self.train_labels = np.empty(BATCH, dtype=object)
begin = 0
rand_count = random_num_with_fix_total(15000, BATCH)
for count in range(100):
self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
begin = begin + rand_count[count]
class DataSource4(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
self.train_images = np.empty(BATCH, dtype=object)
self.train_labels = np.empty(BATCH, dtype=object)
begin = 0
rand_count = random_num_with_fix_total(15000, BATCH)
for count in range(100):
self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
begin = begin + rand_count[count]
# Define as LeNet
class CNN(object):
def __init__(self):
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
# model.summary() #打印网络结构
self.model = model
# FedAvg Function
def FedAvg():
weight_CNN_1 = np.load("Client1Weight.npy", allow_pickle=True)
weight_CNN_2 = np.load("Client2Weight.npy", allow_pickle=True)
weight_CNN_3 = np.load("Client3Weight.npy", allow_pickle=True)
weight_CNN_4 = np.load("Client4Weight.npy", allow_pickle=True)
weight_array = (weight_CNN_1 + weight_CNN_2 + weight_CNN_3 + weight_CNN_4) / 4
weight_out = np.array(weight_array)
return weight_out
# EKF Function
def EKF(cnn, weight_in):
cnn.model.set_weights(weight_in)
return cnn
# Create Models:LeNet
cnn_sever = CNN()
cnn1 = CNN()
cnn2 = CNN()
cnn3 = CNN()
cnn4 = CNN()
# Prepare Client Data
data_sever = DataSource()
data1 = DataSource1()
data2 = DataSource2()
data3 = DataSource3()
data4 = DataSource4()
# Compile Client and Sever Model
cnn_sever.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
cnn1.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
cnn2.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
cnn3.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
cnn4.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
storage_acc = []
weight = cnn_sever.model.get_weights()
np.save("SeverWeight", weight)
# All Clint Train
for i in range(BATCH):
# Client Model Update(Downloads From Sever)
weight = np.load("SeverWeight.npy", allow_pickle=True)
# cnn1.model.set_weights(weight)
# cnn2.model.set_weights(weight)
# cnn3.model.set_weights(weight)
# cnn4.model.set_weights(weight)
cnn1 = EKF(cnn1, weight)
cnn2 = EKF(cnn2, weight)
cnn3 = EKF(cnn3, weight)
cnn4 = EKF(cnn4, weight)
# Client Model Fit
cnn1.model.fit(data1.train_images[i], data1.train_labels[i], epochs=3)
cnn2.model.fit(data2.train_images[i], data2.train_labels[i], epochs=3)
cnn3.model.fit(data3.train_images[i], data3.train_labels[i], epochs=3)
cnn4.model.fit(data4.train_images[i], data4.train_labels[i], epochs=3)
# FedAvg
weight_CNN1 = np.array(cnn1.model.get_weights())
weight_CNN2 = np.array(cnn2.model.get_weights())
weight_CNN3 = np.array(cnn3.model.get_weights())
weight_CNN4 = np.array(cnn4.model.get_weights())
np.save("Client1Weight", weight_CNN1)
np.save("Client2Weight", weight_CNN2)
np.save("Client3Weight", weight_CNN3)
np.save("Client4Weight", weight_CNN4)
weight = FedAvg()
# Uploads to Sever
cnn_sever.model.set_weights(weight)
np.save("SeverWeight", weight)
test_loss, test_acc = cnn_sever.model.evaluate(data_sever.test_images[0:1000], data_sever.test_labels[0:1000])
print("Sever: 轮次: %d,准确率: %.4f,共测试了%d张图片 " % (i + 1, test_acc, len(data_sever.test_labels)))
storage_acc = np.append(storage_acc, test_acc)
# Show Acc
x = np.array(range(100))
plt.plot(x, storage_acc)
plt.savefig('./static/acc.png')
@app.route('/')
def index():
return render_template('index.html', weight = str(weight))
if __name__ == '__main__':
app.run(host = '0.0.0.0')
关联的HTML文件:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>RESULT</title>
</head>
<body>
the picture of accuracy is here :
<br>
<br>
<img src="/static/acc.png" width="1080px" height="720px">
<br>
<br>
the sever's weight is :
<br>
<br>
{{weight}}
</body>
</html>
注意:
- 导入Flask,render_template模块
- 一定要注意文件结构:图片要放在static文件夹下,html文件要放在templates文件夹下,不能都放在一个文件夹下(至于为啥,不知道,反正是不能运行)
- 最后的host必须为
host = '0.0.0.0'
否则docker run 之后不能在公网进行访问
3 容器镜像化
Dockerfile文件:
FROM python
RUN mkdir -p /Federated \
&& mkdir -p /Federated/templates \
&& mkdir -p /Federated/static \
&& pip install tensorflow \
&& pip install matplotlib \
&& pip install flask
COPY ./templates/index.html /Federated/templates
COPY main.py /Federated/
WORKDIR /Federated
EXPOSE 5000
RUN /bin/bash -c 'echo init ok'
CMD ["python", "main.py"]
注意:得自己pip安装所需要的包
4 创建deployment和service
federated-deployment.yaml
文件:
apiVersion: apps/v1
kind: Deployment
metadata:
creationTimestamp: null
labels:
app: federated-deployment
name: federated-deployment
spec:
replicas: 2
selector:
matchLabels:
app: federated-deployment
strategy: {}
template:
metadata:
creationTimestamp: null
labels:
app: federated-deployment
spec:
containers:
- image: registry.cn-beijing.aliyuncs.com/hqc-k8s/federated:v1.0
name: federated
resources: {}
ports:
- containerPort: 5000
imagePullPolicy: IfNotPresent
status: {}
federated-service.yaml
文件:
apiVersion: v1 # 注意此处不能和deployment一样为‘apps/v1’
kind: Service
metadata:
name: federated-deployment
labels:
app: federated-deployment
spec:
ports:
- port: 80
targetPort: 5000
nodePort: 30001
protocol: TCP
selector:
app: federated-deployment
type: NodePort
5 结果
1 命令行结果
root@master:/home/hqc/自然基金项目/Federated# kubectl get all
NAME READY STATUS RESTARTS AGE
pod/federated-deployment-5d5cfb4c7c-5bq9r 1/1 Running 0 12m
pod/federated-deployment-5d5cfb4c7c-mcfg2 1/1 Running 0 12m
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/federated-deployment NodePort 10.107.96.50 <none> 80:30001/TCP 8m41s
NAME READY UP-TO-DATE AVAILABLE AGE
deployment.apps/federated-deployment 2/2 2 2 12m
NAME DESIRED CURRENT READY AGE
replicaset.apps/federated-deployment-5d5cfb4c7c 2 2 2 12m
2 dashboard结果
3 运行结果
6 升级微服务(添加选择下载成果文件功能)
1 main.py
- 添加了下载功能
- bootstrap优化html模块
- flash闪现消息模块
from flask import Flask,render_template
from flask import jsonify
import random
import matplotlib.pyplot as plt
# this module is used to draw a picture
# usually connected to 'numpy' module
# its usage just like matlab
import numpy as np
# this module is used to calculate or transform arrays and lists
from tensorflow.keras import datasets, layers, models
# tensorflow.keras is a high lever module for python API
# 'from tensorflow.keras import datasets' is used to download datasets
# 'import layers' is used to customize the layers of neural network
# 'import models' is used to customize the whole model of neural network
from flask_bootstrap import Bootstrap # 继承"bootstrap/base.html"模板
import os # 处理路径相关
from flask import flash # 用于提示
from flask import request # 访问请求需要用到
#from flask import redirect # 重定向
#from flask import url_for # 获取url
from flask import send_from_directory # 用于下载文件
#from werkzeug.utils import secure_filename # 当用户输入恶意字符时,对服务器进行保护
app = Flask(__name__)
bootstrap = Bootstrap(app)
# 编写html要想继承"bootstrap/base.html"文件就必须加上这一句
app.secret_key = 'sdafdsdfdasaf'
# 使用flash后需要设置秘钥,否则会报错,秘钥随机即可
# set number of rounds
BATCH = 100
# the images source
class DataSource(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.train_images, self.train_labels = train_images[0:15000], train_labels[0:15000]
self.test_images, self.test_labels = test_images[0:10000], test_labels[0:10000]
def random_num_with_fix_total(maxvalue, num):
"""生成总和固定的整数序列
maxvalue: 序列总和
num:要生成的整数个数"""
a = random.sample(range(1, maxvalue), k=num - 1) # 在1~99之间,采集20个数据
a.append(0) # 加上数据开头
a.append(maxvalue)
a = sorted(a)
b = [a[count] - a[count - 1] for count in range(1, len(a))] # 列表推导式,计算列表中每两个数之间的间隔
return b
class DataSource1(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.TI, self.TL = train_images[0:15000], train_labels[0:15000]
self.train_images = np.empty(BATCH, dtype=object)
self.train_labels = np.empty(BATCH, dtype=object)
begin = 0
rand_count = random_num_with_fix_total(15000, BATCH)
for count in range(100):
self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
begin = begin + rand_count[count]
self.test_images, self.test_labels = test_images[0:10000], test_labels[0:10000]
class DataSource2(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
self.train_images = np.empty(BATCH, dtype=object)
self.train_labels = np.empty(BATCH, dtype=object)
begin = 0
rand_count = random_num_with_fix_total(15000, BATCH)
for count in range(100):
self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
begin = begin + rand_count[count]
class DataSource3(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
self.train_images = np.empty(BATCH, dtype=object)
self.train_labels = np.empty(BATCH, dtype=object)
begin = 0
rand_count = random_num_with_fix_total(15000, BATCH)
for count in range(100):
self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
begin = begin + rand_count[count]
class DataSource4(object):
def __init__(self):
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0
self.TI, self.TL = train_images[15000:30000], train_labels[15000:30000]
self.train_images = np.empty(BATCH, dtype=object)
self.train_labels = np.empty(BATCH, dtype=object)
begin = 0
rand_count = random_num_with_fix_total(15000, BATCH)
for count in range(100):
self.train_images[count] = self.TI[begin:(begin + rand_count[count])]
self.train_labels[count] = self.TL[begin:(begin + rand_count[count])]
begin = begin + rand_count[count]
# Define as LeNet
class CNN(object):
def __init__(self):
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
# model.summary() #打印网络结构
self.model = model
# FedAvg Function
def FedAvg():
weight_CNN_1 = np.load("Client1Weight.npy", allow_pickle=True)
weight_CNN_2 = np.load("Client2Weight.npy", allow_pickle=True)
weight_CNN_3 = np.load("Client3Weight.npy", allow_pickle=True)
weight_CNN_4 = np.load("Client4Weight.npy", allow_pickle=True)
weight_array = (weight_CNN_1 + weight_CNN_2 + weight_CNN_3 + weight_CNN_4) / 4
weight_out = np.array(weight_array)
return weight_out
# EKF Function
def EKF(cnn, weight_in):
cnn.model.set_weights(weight_in)
return cnn
# Create Models:LeNet
cnn_sever = CNN()
cnn1 = CNN()
cnn2 = CNN()
cnn3 = CNN()
cnn4 = CNN()
# Prepare Client Data
data_sever = DataSource()
data1 = DataSource1()
data2 = DataSource2()
data3 = DataSource3()
data4 = DataSource4()
# Compile Client and Sever Model
cnn_sever.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
cnn1.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
cnn2.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
cnn3.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
cnn4.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
storage_acc = []
weight = cnn_sever.model.get_weights()
np.save("SeverWeight", weight)
# All Clint Train
for i in range(BATCH):
# Client Model Update(Downloads From Sever)
weight = np.load("SeverWeight.npy", allow_pickle=True)
# cnn1.model.set_weights(weight)
# cnn2.model.set_weights(weight)
# cnn3.model.set_weights(weight)
# cnn4.model.set_weights(weight)
cnn1 = EKF(cnn1, weight)
cnn2 = EKF(cnn2, weight)
cnn3 = EKF(cnn3, weight)
cnn4 = EKF(cnn4, weight)
# Client Model Fit
cnn1.model.fit(data1.train_images[i], data1.train_labels[i], epochs=3)
cnn2.model.fit(data2.train_images[i], data2.train_labels[i], epochs=3)
cnn3.model.fit(data3.train_images[i], data3.train_labels[i], epochs=3)
cnn4.model.fit(data4.train_images[i], data4.train_labels[i], epochs=3)
# FedAvg
weight_CNN1 = np.array(cnn1.model.get_weights())
weight_CNN2 = np.array(cnn2.model.get_weights())
weight_CNN3 = np.array(cnn3.model.get_weights())
weight_CNN4 = np.array(cnn4.model.get_weights())
np.save("Client1Weight", weight_CNN1)
np.save("Client2Weight", weight_CNN2)
np.save("Client3Weight", weight_CNN3)
np.save("Client4Weight", weight_CNN4)
weight = FedAvg()
# Uploads to Sever
cnn_sever.model.set_weights(weight)
np.save("SeverWeight", weight)
test_loss, test_acc = cnn_sever.model.evaluate(data_sever.test_images[0:1000], data_sever.test_labels[0:1000])
print("Sever: 轮次: %d,准确率: %.4f,共测试了%d张图片 " % (i + 1, test_acc, len(data_sever.test_labels)))
storage_acc = np.append(storage_acc, test_acc)
# @app.route('/c1')
# def weight_CNN1():
# return str(weight_CNN1)
#
# @app.route('/c2')
# def weight_CNN2():
# return ('the CNN2 client\'s weight is : \n' , str(weight_CNN2))
#
# @app.route('/c3')
# def weight_CNN3():
# return ('the CNN3 client\'s weight is : \n' , str(weight_CNN3))
#
# @app.route('/c4')
# def weight_CNN4():
# return ('the CNN4 client\'s weight is : \n' , str(weight_CNN4))
# Show Acc
x = np.array(range(100))
plt.plot(x, storage_acc)
plt.savefig('./acc.png')
# @app.route('/')
# def index():
# return render_template('index.html', weight = str(weight))
@app.route('/', methods=['GET', 'POST'])
# GET只是对服务器获取数据
# POST会对服务器产生改变,请求上传就一定需要POST
def download_file():
Path = os.listdir('.') # 列出指定路径下的所有文件和文件夹
#print(Path)
# 用于限定文件后缀名进行展示
entries = []
for path in Path:
if os.path.splitext(path)[1] == '.png' or os.path.splitext(path)[1] == '.npy':
entries.append(path)
#print(entries)
return render_template('download.html',entries = entries)
@app.route('/downloads/<filename>', methods=['GET', 'POST'])
def downloaded_file(filename):
#if request.method == 'GET':
#filename = request.url # 此处'files'对应的是html中的name="file"
#print(filename)
flash('Download Successfully ! ! !', 'success')
return send_from_directory('./',filename,as_attachment = True)
# 若没有as_attachment = True,则会新开一个标签页显示上传文件的内容;有此参数则会直接下载
if __name__ == '__main__':
app.run(host = '0.0.0.0')
# from livereload import Server # 用于再次刷新本页面
#
# server = Server(app.wsgi_app)
# server.watch('**/*.*')
# server.serve()
2 download.html
{% extends "bootstrap/base.html" %}
{% block title %}DOWNLOAD PAGE{% endblock %}
{% block content %}
<div class="container">
{% with messages = get_flashed_messages(with_categories=true) %}
{% if messages %}
{% for category, message in messages %}
<div class="alert alert-{{ category }}">
<button type="button" class="close" data-dismiss="alert" aria-hidden="true">×</button>
{{ message }}
</div>
{% endfor %}
{% endif %}
{% endwith %}
<h2>You Can Download The Following Files</h2>
<h5 style="color:red">(just click it ☝ ~)</h5>
<ol>
{% for entry in entries %}
<li><a href="{{url_for('downloaded_file',filename = entry)}}">{{entry}}</a>
{% endfor %}
</ol>
</div>
{% endblock %}
3 Dockerfile 和 requirement.txt
FROM federated:v1.2
COPY . . # 将当前文件夹的所有文件复制到容器中
WORKDIR . # 设置当前工作目录
RUN pip install -r requirement.txt
EXPOSE 5000
RUN /bin/bash -c 'echo init ok'
CMD ["python", "main.py"]
requirement.txt:
可用tensorflow==2.4.1
的形式指定版本进行pip install
Flask
tensorflow
matplotlib
flask_bootstrap
numpy
4 deployment 和 service
deployment.yaml:
apiVersion: apps/v1
kind: Deployment
metadata:
creationTimestamp: null
labels:
app: federated-deployment
name: federated-deployment
spec:
replicas: 2
selector:
matchLabels:
app: federated-deployment
strategy: {}
template:
metadata:
creationTimestamp: null
labels:
app: federated-deployment
spec:
containers:
- image: registry.cn-beijing.aliyuncs.com/hqc-k8s/federated:v1.3
name: federated
resources: {}
ports:
- containerPort: 5000
imagePullPolicy: IfNotPresent
status: {}
service.yaml:
apiVersion: v1 # 注意此处不能和deployment一样为‘apps/v1’
kind: Service
metadata:
name: federated-service
labels:
app: federated-service
spec:
ports:
- port: 80
targetPort: 5000
nodePort: 30000
protocol: TCP
selector:
app: federated-service
type: NodePort
5 结果
控制器和服务等全部正常
5.1 出错
但是发现只是一瞬间running,过一会就一直重启。ip+port
的方式更是没法成功。
查看日志发现:
root@master:/home/hqc/自然基金项目/Federated# kubectl logs federated-deployment-68f5c7fb8d-2zqpf -n default
# federated-deployment-68f5c7fb8d-2zqpf 是容器名
发现是内部程序的原因,不是集群的问题,程序无法访问网址下载所需数据集。
为啥本地docker run可以成功运行,创建deployment后不能访问呢??
以为的原因:
- 看这报错好像是外网的问题,但实际上不是。
- 肯定是service.yaml文件出错了,修改后仍然不行。
5.2 解决
最后发现不知为啥master的IP地址变成了192.168.43.49
,好离谱。
想设置静态IP,按照网上大家都行的方法尝试发现没法固定IP,遂作罢。这个工作以后再弄。
直接修改hosts文件为192.168.43.49
。
vim /etc/hosts
重启节点,惊喜发现所有都running。
创建deployment和service之后要等一段时间运行结束,再进行ip+port
方式访问。
成功!!!