实现同样一个功能,笔者运行需要11秒,而同窗的运行仅需要1秒不到,但是实际实现逻辑是类似的,所以需要使用性能分析工具对瓶颈进行分析。
安装
- 命令行安装:
pip install line_profiler
- 本地下载后安装:
https://www.lfd.uci.edu/~gohlke/pythonlibs/#line_profiler
根据平台选择对应whl文件,然后本地安装。
修改代码
先来一个demo,do_stuff是我们的目标,要测试这个函数每一行的耗时。
from line_profiler import LineProfiler
import random
def do_other_stuff(numbers):
s = sum(numbers)
def do_stuff(numbers):
do_other_stuff(numbers)
l = [numbers[i]/43 for i in range(len(numbers))]
m = ['hello'+str(numbers[i]) for i in range(len(numbers))]
numbers = [random.randint(1,100) for i in range(1000)]
lp = LineProfiler()
lp.add_function(do_other_stuff) # add additional function to profile
lp_wrapper = lp(do_stuff)
lp_wrapper(numbers)
lp.print_stats()
所以实际上最后五行内容是添加进来的。
在笔者的问题中,是加载mnist数据集:
import os
import time
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import random
from line_profiler import LineProfiler
import time
# WORK1: --------------BEGIN-------------------
# 构建数据平衡采样方法:make_batch
# 参数等都可以自定义
# 返回值为(input_a, input_b), label
# input_a形状为(batch_size,28,28),input_b形状为(batch_size,28,28),label形状为(batch_size,)
def make_batch(batch_size, dataset):
label = []
input_a = []
input_b = []
x1 = np.array(dataset[0])
y1 = np.array(dataset[1])
cls_num = batch_size // 20 # 每个类采样个数, pos neg
cls_idx_same = [np.where(y1 == i)[0] for i in range(10)]
cls_idx_diff = [np.where(y1 != i)[0] for i in range(10)]
# pos
for class_num in range(10): # num of classes
for _ in range(cls_num): # 每个类采样个数
choose_two = random.sample(cls_idx_same[class_num].tolist(),2)
input_a.append(x1[choose_two[0]])
input_b.append(x1[choose_two[1]])
label.append(0)
# # neg
for class_num in range(10):
for _ in range(cls_num):
choose_same = random.sample(cls_idx_same[class_num].tolist(), 1)
choose_diff = random.sample(cls_idx_diff[class_num].tolist(), 1)
input_a.append(x1[choose_same[0]])
input_b.append(x1[choose_diff[0]])
label.append(1)
input_a = np.array(input_a)
input_b = np.array(input_b)
label = np.array(label).astype(np.float)
return (input_a, input_b), label
if __name__ == "__main__":
path = './dataset/mnist.npz'
f = np.load(path)
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
f.close()
# 测试也是255归一化的数据,请不要改归一化
x_train = x_train / 255.
x_test = x_test / 255.
idx_shuffle = np.arange(len(x_train))
np.random.shuffle(idx_shuffle)
x_train = x_train[idx_shuffle]
y_train = y_train[idx_shuffle]
slice_08 = int(len(x_train)*0.8)
train_set = [x_train[:slice_08],y_train[:slice_08]]
# train_set = [np.array(x_train[:slice_08]),
# np.array(y_train[:slice_08])]
# val_set = [x_test, y_test]
lasttime = time.time()
# for i in range(100):
# make_batch(64, train_set)
lp = LineProfiler()
lp_warpper = lp(make_batch)
lp_warpper(64, train_set)
lp.print_stats()
print(time.time()-lasttime)
来运行分析一下运行一次的耗时:
可以发现 np.array(dataset[0]) 耗时非常严重,不适合放到循环中。所以改动这个部分,在循环外提前转换格式可以节约很长时间。
import os
import time
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import random
from line_profiler import LineProfiler
import time
# WORK1: --------------BEGIN-------------------
# 构建数据平衡采样方法:make_batch
# 参数等都可以自定义
# 返回值为(input_a, input_b), label
# input_a形状为(batch_size,28,28),input_b形状为(batch_size,28,28),label形状为(batch_size,)
def make_batch(batch_size, dataset):
label = []
input_a = []
input_b = []
x1 = dataset[0]
y1 = dataset[1]
cls_num = batch_size // 20 # 每个类采样个数, pos neg
cls_idx_same = [np.where(y1 == i)[0] for i in range(10)]
cls_idx_diff = [np.where(y1 != i)[0] for i in range(10)]
# pos
for class_num in range(10): # num of classes
for _ in range(cls_num): # 每个类采样个数
choose_two = random.sample(cls_idx_same[class_num].tolist(),2)
input_a.append(x1[choose_two[0]])
input_b.append(x1[choose_two[1]])
label.append(0)
# # neg
for class_num in range(10):
for _ in range(cls_num):
choose_same = random.sample(cls_idx_same[class_num].tolist(), 1)
choose_diff = random.sample(cls_idx_diff[class_num].tolist(), 1)
input_a.append(x1[choose_same[0]])
input_b.append(x1[choose_diff[0]])
label.append(1)
input_a = np.array(input_a)
input_b = np.array(input_b)
label = np.array(label).astype(np.float)
return (input_a, input_b), label
if __name__ == "__main__":
path = './dataset/mnist.npz'
f = np.load(path)
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
f.close()
# 测试也是255归一化的数据,请不要改归一化
x_train = x_train / 255.
x_test = x_test / 255.
idx_shuffle = np.arange(len(x_train))
np.random.shuffle(idx_shuffle)
x_train = x_train[idx_shuffle]
y_train = y_train[idx_shuffle]
slice_08 = int(len(x_train)*0.8)
train_set = [np.array(x_train[:slice_08]),np.array(y_train[:slice_08])]
# val_set = [x_test, y_test]
lasttime = time.time()
# for i in range(100):
# make_batch(64, train_set)
lp = LineProfiler()
lp_warpper = lp(make_batch)
lp_warpper(64, train_set)
lp.print_stats()
print(time.time()-lasttime)
运行结果如下:
这样瓶颈就转移到其他地方了。