U-net源码分析(Keras版本)

转载:https://blog.csdn.net/mieleizhi0522/article/details/82217677
源码地址:https://github.com/zhixuhao/unet

首先是论文讲解:地址

源码地址:https://github.com/FENGShuanglang/unet   

环境:尽量用python3去运行,因为我用了python2.7,弄了两天都不行,测试输出一直是全灰!

源码文件夹目录:

这里主要讲解data.py,  model.py,   main.py三个文件(也只要这三个python文件)

先看一下main.py,按照main.py文件的运行顺序去查找每个函数的意义:


   
   
  1. from model import *
  2. from data import * #导入这两个文件中的所有函数
  3. #os.environ[“CUDA_VISIBLE_DEVICES”] = “0”
  4. data_gen_args = dict(rotation_range= 0.2,
  5. width_shift_range= 0.05,
  6. height_shift_range= 0.05,
  7. shear_range= 0.05,
  8. zoom_range= 0.05,
  9. horizontal_flip= True,
  10. fill_mode= ‘nearest’) #数据增强时的变换方式的字典
  11. myGene = trainGenerator( 2, ‘data/membrane/train’, ‘image’, ‘label’,data_gen_args,save_to_dir = None)
  12. #得到一个生成器,以batch=2的速率无限生成增强后的数据
  13. model = unet()
  14. model_checkpoint = ModelCheckpoint( ‘unet_membrane.hdf5’, monitor= ‘loss’,verbose= 1, save_best_only= True)
  15. #回调函数,第一个是保存模型路径,第二个是检测的值,检测Loss是使它最小,第三个是只保存在验证集上性能最好的模型
  16. model.fit_generator(myGene,steps_per_epoch= 300,epochs= 1,callbacks=[model_checkpoint])
  17. #steps_per_epoch指的是每个epoch有多少个batch_size,也就是训练集总样本数除以batch_size的值
  18. #上面一行是利用生成器进行batch_size数量的训练,样本和标签通过myGene传入
  19. testGene = testGenerator( “data/membrane/test”)
  20. results = model.predict_generator(testGene, 30,verbose= 1)
  21. #30是step,steps: 在停止之前,来自 generator 的总步数 (样本批次)。 可选参数 Sequence:如果未指定,将使用len(generator) 作为步数。
  22. #上面的返回值是:预测值的 Numpy 数组。
  23. saveResult( “data/membrane/test”,results) #保存结果

data.py文件:


   
   
  1. from __future__ import print_function
  2. from keras.preprocessing.image import ImageDataGenerator
  3. import numpy as np
  4. import os
  5. import glob
  6. import skimage.io as io
  7. import skimage.transform as trans
  8. Sky = [ 128, 128, 128]
  9. Building = [ 128, 0, 0]
  10. Pole = [ 192, 192, 128]
  11. Road = [ 128, 64, 128]
  12. Pavement = [ 60, 40, 222]
  13. Tree = [ 128, 128, 0]
  14. SignSymbol = [ 192, 128, 128]
  15. Fence = [ 64, 64, 128]
  16. Car = [ 64, 0, 128]
  17. Pedestrian = [ 64, 64, 0]
  18. Bicyclist = [ 0, 128, 192]
  19. Unlabelled = [ 0, 0, 0]
  20. COLOR_DICT = np.array([Sky, Building, Pole, Road, Pavement,
  21. Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled])
  22. def adjustData(img,mask,flag_multi_class,num_class):
  23. if(flag_multi_class): #此程序中不是多类情况,所以不考虑这个
  24. img = img / 255
  25. mask = mask[:,:,:, 0] if(len(mask.shape) == 4) else mask[:,:, 0]
  26. #if else的简洁写法,一行表达式,为真时放在前面,不明白mask.shape=4的情况是什么,由于有batch_size,所以mask就有3维[batch_size,wigth,heigh],估计mask[:,:,0]是写错了,应该写成[0,:,:],这样可以得到一片图片,
  27. new_mask = np.zeros(mask.shape + (num_class,))
  28. #np.zeros里面是shape元组,此目的是将数据厚度扩展到num_class层,以在层的方向实现one-hot结构
  29. for i in range(num_class):
  30. #for one pixel in the image, find the class in mask and convert it into one-hot vector
  31. #index = np.where(mask == i)
  32. #index_mask = (index[0],index[1],index[2],np.zeros(len(index[0]),dtype = np.int64) + i) if (len(mask.shape) == 4) else (index[0],index[1],np.zeros(len(index[0]),dtype = np.int64) + i)
  33. #new_mask[index_mask] = 1
  34. new_mask[mask == i,i] = 1 #将平面的mask的每类,都单独变成一层,
  35. new_mask = np.reshape(new_mask,(new_mask.shape[ 0],new_mask.shape[ 1]*new_mask.shape[ 2],new_mask.shape[ 3])) if flag_multi_class else np.reshape(new_mask,(new_mask.shape[ 0]*new_mask.shape[ 1],new_mask.shape[ 2]))
  36. mask = new_mask
  37. elif(np.max(img) > 1):
  38. img = img / 255
  39. mask = mask / 255
  40. mask[mask > 0.5] = 1
  41. mask[mask <= 0.5] = 0
  42. return (img,mask)
  43. #上面这个函数主要是对训练集的数据和标签的像素值进行归一化
  44. def trainGenerator(batch_size,train_path,image_folder,mask_folder,aug_dict,image_color_mode = "grayscale",
  45. mask_color_mode = "grayscale",image_save_prefix = "image",mask_save_prefix = "mask",
  46. flag_multi_class = False,num_class = 2,save_to_dir = None,target_size = (256,256),seed = 1):
  47. '''
  48. can generate image and mask at the same time
  49. use the same seed for image_datagen and mask_datagen to ensure the transformation for image and mask is the same
  50. if you want to visualize the results of generator, set save_to_dir = "your path"
  51. '''
  52. image_datagen = ImageDataGenerator(**aug_dict)
  53. mask_datagen = ImageDataGenerator(**aug_dict)
  54. image_generator = image_datagen.flow_from_directory( #https://blog.csdn.net/nima1994/article/details/80626239
  55. train_path, #训练数据文件夹路径
  56. classes = [image_folder], #类别文件夹,对哪一个类进行增强
  57. class_mode = None, #不返回标签
  58. color_mode = image_color_mode, #灰度,单通道模式
  59. target_size = target_size, #转换后的目标图片大小
  60. batch_size = batch_size, #每次产生的(进行转换的)图片张数
  61. save_to_dir = save_to_dir, #保存的图片路径
  62. save_prefix = image_save_prefix, #生成图片的前缀,仅当提供save_to_dir时有效
  63. seed = seed)
  64. mask_generator = mask_datagen.flow_from_directory(
  65. train_path,
  66. classes = [mask_folder],
  67. class_mode = None,
  68. color_mode = mask_color_mode,
  69. target_size = target_size,
  70. batch_size = batch_size,
  71. save_to_dir = save_to_dir,
  72. save_prefix = mask_save_prefix,
  73. seed = seed)
  74. train_generator = zip(image_generator, mask_generator) #组合成一个生成器
  75. for (img,mask) in train_generator:
  76. #由于batch是2,所以一次返回两张,即img是一个2张灰度图片的数组,[2,256,256]
  77. img,mask = adjustData(img,mask,flag_multi_class,num_class) #返回的img依旧是[2,256,256]
  78. yield (img,mask)
  79. #每次分别产出两张图片和标签,不懂yield的请看https://blog.csdn.net/mieleizhi0522/article/details/82142856
  80. #上面这个函数主要是产生一个数据增强的图片生成器,方便后面使用这个生成器不断生成图片
  81. def testGenerator(test_path,num_image = 30,target_size = (256,256),flag_multi_class = False,as_gray = True):
  82. for i in range(num_image):
  83. img = io.imread(os.path.join(test_path, "%d.png"%i),as_gray = as_gray)
  84. img = img / 255
  85. img = trans.resize(img,target_size)
  86. img = np.reshape(img,img.shape+( 1,)) if ( not flag_multi_class) else img
  87. img = np.reshape(img,( 1,)+img.shape)
  88. #将测试图片扩展一个维度,与训练时的输入[2,256,256]保持一致
  89. yield img
  90. #上面这个函数主要是对测试图片进行规范,使其尺寸和维度上和训练图片保持一致
  91. def geneTrainNpy(image_path,mask_path,flag_multi_class = False,num_class = 2,image_prefix = "image",mask_prefix = "mask",image_as_gray = True,mask_as_gray = True):
  92. image_name_arr = glob.glob(os.path.join(image_path, "%s*.png"%image_prefix))
  93. #相当于文件搜索,搜索某路径下与字符匹配的文件https://blog.csdn.net/u010472607/article/details/76857493/
  94. image_arr = []
  95. mask_arr = []
  96. for index,item in enumerate(image_name_arr): #enumerate是枚举,输出[(0,item0),(1,item1),(2,item2)]
  97. img = io.imread(item,as_gray = image_as_gray)
  98. img = np.reshape(img,img.shape + ( 1,)) if image_as_gray else img
  99. mask = io.imread(item.replace(image_path,mask_path).replace(image_prefix,mask_prefix),as_gray = mask_as_gray)
  100. #重新在mask_path文件夹下搜索带有mask字符的图片(标签图片)
  101. mask = np.reshape(mask,mask.shape + ( 1,)) if mask_as_gray else mask
  102. img,mask = adjustData(img,mask,flag_multi_class,num_class)
  103. image_arr.append(img)
  104. mask_arr.append(mask)
  105. image_arr = np.array(image_arr)
  106. mask_arr = np.array(mask_arr) #转换成array
  107. return image_arr,mask_arr
  108. #该函数主要是分别在训练集文件夹下和标签文件夹下搜索图片,然后扩展一个维度后以array的形式返回,是为了在没用数据增强时的读取文件夹内自带的数据
  109. def labelVisualize(num_class,color_dict,img):
  110. img = img[:,:, 0] if len(img.shape) == 3 else img
  111. img_out = np.zeros(img.shape + ( 3,))
  112. #变成RGB空间,因为其他颜色只能再RGB空间才会显示
  113. for i in range(num_class):
  114. img_out[img == i,:] = color_dict[i]
  115. #为不同类别涂上不同的颜色,color_dict[i]是与类别数有关的颜色,img_out[img == i,:]是img_out在img中等于i类的位置上的点
  116. return img_out / 255
  117. #上面函数是给出测试后的输出之后,为输出涂上不同的颜色,多类情况下才起作用,两类的话无用
  118. def saveResult(save_path,npyfile,flag_multi_class = False,num_class = 2):
  119. for i,item in enumerate(npyfile):
  120. img = labelVisualize(num_class,COLOR_DICT,item) if flag_multi_class else item[:,:, 0]
  121. #多类的话就图成彩色,非多类(两类)的话就是黑白色
  122. io.imsave(os.path.join(save_path, "%d_predict.png"%i),img)

这里要说明一下,由于在预测的时候模型是直接输出的,下面模型的输出是在一个sigmoid函数之后的输出,也就是输出的数值是在0-1之间的,但是在这里直接就把这个0-1之间的数进行保存成图片了,这里有两个疑点:

1.为什么可以直接将在0-1的浮点数直接保存成图片?

是因为在skimage模块中,如果图片数据是float的话,那么值应该是0到1或者-1到1的浮点数,

2.为什么直接保存而不进行mask二值图像的产生?

这是因为输出数据值已经很两极分化了,也即是有的很接近于0,有的很接近于1了,中间的数值很少,所以就直接输出也没有关系,相当于输出的是灰度图,如果你感觉非要产生二值化图像,可以修改成下面代码:


   
   
  1. def saveResult(save_path,npyfile,flag_multi_class = False,num_class = 2):
  2. for i,item in enumerate(npyfile):
  3. if flag_multi_class:
  4. img = labelVisualize(num_class,COLOR_DICT,item)
  5. #多类的话就图成彩色,非多类(两类)的话就是黑白色
  6. else:
  7. img=item[:,:, 0]
  8. print(np.max(img),np.min(img))
  9. img[img> 0.5]= 1 #此时1是浮点数,下面的0也是
  10. img[img<= 0.5]= 0
  11. print(np.max(img),np.min(img))
  12. io.imsave(os.path.join(save_path, "%d_predict.png"%i),img)

下面是model.py:

 


   
   
  1. import numpy as np
  2. import os
  3. import skimage.io as io
  4. import skimage.transform as trans
  5. import numpy as np
  6. from keras.models import *
  7. from keras.layers import *
  8. from keras.optimizers import *
  9. from keras.callbacks import ModelCheckpoint, LearningRateScheduler
  10. from keras import backend as keras
  11. def unet(pretrained_weights = None,input_size = (256,256,1)):
  12. inputs = Input(input_size)
  13. conv1 = Conv2D( 64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
  14. conv1 = Conv2D( 64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
  15. pool1 = MaxPooling2D(pool_size=( 2, 2))(conv1)
  16. conv2 = Conv2D( 128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
  17. conv2 = Conv2D( 128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
  18. pool2 = MaxPooling2D(pool_size=( 2, 2))(conv2)
  19. conv3 = Conv2D( 256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
  20. conv3 = Conv2D( 256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
  21. pool3 = MaxPooling2D(pool_size=( 2, 2))(conv3)
  22. conv4 = Conv2D( 512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
  23. conv4 = Conv2D( 512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
  24. drop4 = Dropout( 0.5)(conv4)
  25. pool4 = MaxPooling2D(pool_size=( 2, 2))(drop4)
  26. conv5 = Conv2D( 1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
  27. conv5 = Conv2D( 1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
  28. drop5 = Dropout( 0.5)(conv5)
  29. up6 = Conv2D( 512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = ( 2, 2))(drop5)) #上采样之后再进行卷积,相当于转置卷积操作!
  30. merge6 = concatenate([drop4,up6],axis= 3)
  31. conv6 = Conv2D( 512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
  32. conv6 = Conv2D( 512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)
  33. up7 = Conv2D( 256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = ( 2, 2))(conv6))
  34. merge7 = concatenate([conv3,up7],axis = 3)
  35. conv7 = Conv2D( 256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
  36. conv7 = Conv2D( 256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)
  37. up8 = Conv2D( 128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = ( 2, 2))(conv7))
  38. merge8 = concatenate([conv2,up8],axis = 3)
  39. conv8 = Conv2D( 128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
  40. conv8 = Conv2D( 128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)
  41. up9 = Conv2D( 64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = ( 2, 2))(conv8))
  42. merge9 = concatenate([conv1,up9],axis = 3)
  43. conv9 = Conv2D( 64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
  44. conv9 = Conv2D( 64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
  45. conv9 = Conv2D( 2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
  46. conv10 = Conv2D( 1, 1, activation = 'sigmoid')(conv9) #我怀疑这个sigmoid激活函数是多余的,因为在后面的loss中用到的就是二进制交叉熵,包含了sigmoid
  47. model = Model(input = inputs, output = conv10)
  48. model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = [ 'accuracy']) #模型执行之前必须要编译https://keras-cn.readthedocs.io/en/latest/getting_started/sequential_model/
  49. #利用二进制交叉熵,也就是sigmoid交叉熵,metrics一般选用准确率,它会使准确率往高处发展
  50. #model.summary()
  51. if(pretrained_weights):
  52. model.load_weights(pretrained_weights)
  53. return model

到此结束:

看看测试的结果:

1.你会发现测试的输出是256*256,但是输入是512*512,这是因为在输入的时候被resize了,统一resize成256*256.

2.还有一个就是这个模型没有按照论文中的模型来创建,具体区别就是每次卷积的时候这里采用的是padding=same,而论文中是没有进行pad的,也就是这里的输入尺寸和输出尺寸是一样大的,而论文中是输入大于输出。具体请看

如果大家有爱好深度学习,爱好人工智能,还有YOLO,可以加下我创建的群825524664(深度学习交流),仅供学习交流,没有广告,谢谢大家捧场!

            </div>
  • 7
    点赞
  • 46
    收藏
    觉得还不错? 一键收藏
  • 16
    评论
评论 16
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值