TensorFlow实战 6 AlexNet进阶(ImageNet识别)

在上一篇AlexNet原理中通过TensorFlow搭建简单的alexNet模型,此次将通过实验运用AlexNet实现目标检测识别

参考博客TensorFlow学习 -- AlexNet实现&图像识别

开源source:tf_alxenet

文件结构说明:

       1)bvlc_alexnet.npy :alexnet已经训练好文件

       2)caffe_classes.py:class_names类别标签

       3)main.py主程序

main.py 代码:

#alexnet application for image sets identification
#source http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/
#needed file:	bvlc_alexnet.npy -- the weights; they need to be in the working directory
#				caffe_classes.py -- the classes, in the same order as the outputs of the network

import tensorflow as tf
import numpy as np

#define convLayer to generate different conv layer
def convLayer(x, k_height, k_width, stride_x, stride_y, feature_num, name, padding = 'SAME', groups = 1):
	'''
	descrption: to generate different conv layer
	Args:		x: input data [image or last conv result]
				k_height: kernal height
				k_width: kernal width
				stride_x: strides x step
				stride_y: stride y step
				feature_num: numbers of feature [kernal size]
				name: tf string name
				padding: padding mode [default: 'SAME']
				groups: the architecture of alexnet [default groups = 1 (1 GPU)]
	Returns:	relu(x*w + b)
	'''
	#access the image channal
	channal = int(x.get_shape()[-1])
	#define conv anonymous function
	conv = lambda image, kernal : tf.nn.conv2d(image, kernal, strides = [1, stride_x, stride_y, 1], padding = padding)
	#define name variable for weights and biases
	with tf.variable_scope(name) as scope:
		w = tf.get_variable('w', shape = [k_height, k_width, channal / groups, feature_num])
		b = tf.get_variable('b', shape = [feature_num])
		#divide the tensor as sub_tensor and reoutput x_ and w_
		x_ = tf.split(value = x, num_or_size_splits = groups, axis = 3)
		w_ = tf.split(value = w, num_or_size_splits = groups, axis = 3)
		#extract feature map for x and w
		feature_map = [conv(v_x, v_w) for v_x, v_w in zip(x_, w_)]
		#merge feature map
		merge_feature_map = tf.concat(axis = 3, values = feature_map)
		#calcuate output
		out = tf.nn.bias_add(merge_feature_map, b)
		#relu activation
		return tf.nn.relu(tf.reshape(out, merge_feature_map.get_shape().as_list()), name = scope.name)

#define fcLayer function to generate different full conncetion layer
def fcLayer(x, in_dim, out_dim, relu_flag, name):
	'''
	descrption:	to generate different full conncetion layer
	Args:	x: the input data
			in_dim: input dimension
			out_dim: output dimension
			relu_flag: use the relu activation or not
			name : tf name string
	Returns: relu activated result
	'''
	with tf.variable_scope(name) as scope:
		#get name variable w and b
		w = tf.get_variable('w', shape = [in_dim, out_dim], dtype = 'float')
		b = tf.get_variable('b', shape = [out_dim], dtype = 'float')
		#calcuate (x*w + b)
		out = tf.nn.xw_plus_b(x, w, b, name = scope.name)
		if relu_flag:
			tf.nn.relu(out)
            return out
		return out

#define alexnet model class to set up alexnet network
class alexnet(object):
	'''
	descrption: to set up alexnet network
	function:	_init_ -- set parameters and initialize the network
				nn -- the cnn architecture
				load -- to load model
	'''
	#define _init_ function to initialize the parameters for alexnet model
	def __init__(self, x, keep_pro, class_num, model_path = 'bvlc_alexnet.npy'):
		'''
		descrption: to initialize parameters for alexnet model
		Args:		x: input data[image]
					keep_pro: keep alive probility of neruial units
					class_num: numbers of class
					model_path: the trained model file
		'''
		self.X = x
		self.KEEPPRO = keep_pro
		self.CLASSNUM = class_num
		self.MODELPATH = model_path
		self.nn()

	#define nn function to set up the alexnet network
	def nn(self):
		'''
		descrption: to set oup the alexnet network
		Args: 	self parameters
		Returns: 	None
		'''
		#conv 1 layer
		conv_1 = convLayer(self.X, 11, 11, 4, 4, 96, 'conv1', 'VALID')
		#max pooling layer
		pool_1 = tf.nn.max_pool(conv_1, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool_1')
		#lrn operation
		lrn_1 = tf.nn.lrn(pool_1, depth_radius = 2, alpha = 2e-05, beta = 0.75, bias = 1.0, name = 'lrn1')
		#conv 2 layer
		conv_2 = convLayer(lrn_1, 5, 5, 1, 1, 256, 'conv2', groups = 2)
		#max pooling layer
		pool_2 = tf.nn.max_pool(conv_2, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool2')
		#lrn operation
		lrn_2 = tf.nn.lrn(pool_2, depth_radius = 2, alpha = 2e-05, beta = 0.75, bias = 1.0, name = 'lrn2')
		#conv 3 layer
		conv_3 = convLayer(lrn_2, 3, 3, 1, 1, 384, 'conv3')
		#conv 4 layer
		conv_4 = convLayer(conv_3, 3, 3, 1, 1, 384, 'conv4', groups = 2)
		#conv 5 layer
		conv_5 = convLayer(conv_4, 3, 3, 1, 1, 256, 'conv5', groups = 2)
		#max pooling layer
		pool_5 = tf.nn.max_pool(conv_5, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool5')
		#full conncetion 1 layer
		fc_in = tf.reshape(pool_5, [-1, 256*6*6])
		fc_1 = fcLayer(fc_in, 256*6*6, 4096, True, 'fc6')
		#dropout operation
		dropout_1 = tf.nn.dropout(fc_1, self.KEEPPRO)
		#full conncetion 2 layer
		fc_2 = fcLayer(dropout_1, 4096, 4096, True, 'fc7')
		#dropout operation
		dropout_2 = tf.nn.dropout(fc_2, self.KEEPPRO)
		#full conncetion 3 layer
		self.fc_3 = fcLayer(dropout_2, 4096, self.CLASSNUM, True, 'fc8')
	#define load function to load alexnet model
	def load(self, sess):
		'''
		descrption: to load alexnet model
		Args:	sess: tf session
		Returns:	None
		'''
		file_dict = np.load(self.MODELPATH, encoding = 'bytes').item()
		for name in file_dict:
			if name not in []:
				with tf.variable_scope(name, reuse = True):
					for parameter in file_dict[name]:
						if len(parameter.shape) == 1:
							sess.run(tf.get_variable('b', trainable = False).assign(parameter))
						else:
							sess.run(tf.get_variable('w', trainable = False).assign(parameter))

#test model
import os
import cv2
import caffe_classes

if __name__ == '__main__':
	#set parameters
	keep_pro = 1
	class_num = 1000
	test_path = "testimage"
	#read test image
	testImg = []
	for i in os.listdir(test_path):
		testImg.append(cv2.imread(test_path + '/' + i))

	img_mean = np.array([104, 117, 124], np.float)
	
	#define input placeholder
	x = tf.placeholder('float', [1, 227, 227, 3])
	
	#load alexnet model
	model = alexnet(x, keep_pro, class_num)
	
	#display score
	score = model.fc_3
	print(score)
	
	#define softmax variable
	softmax = tf.nn.softmax(score)
	
	with tf.Session() as sess:
		sess.run(tf.global_variables_initializer())
		#load model
		model.load(sess)
		for i, img in enumerate(testImg):
			#resize the image and substract mean value
			test = cv2.resize(img.astype(np.float), (227, 227)) - img_mean
			#convert image to test
			test = test.reshape((1, 227, 227, 3))
			#take index where the predict accuracy is most
			max_index = np.argmax(sess.run(softmax, feed_dict = {x: test}))
			#take the most probility class
			res = caffe_classes.class_names[max_index]
			print(res)
			#set font
			font = cv2.FONT_HERSHEY_SIMPLEX
			#display class name
			cv2.putText(img, res, (int(img.shape[0] / 3), int(img.shape[1] / 3)), font, 1, (0, 0, 252), 2)
			#show result
			cv2.imshow('test', img)
			cv2.waitKey(0)

实验结果:

 

practice makes perfect! 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值