unet图像分割
导入包,数据集
% reload_ext autoreload
% autoreload 2
% matplotlib inline
from fastai. vision import *
from fastai. callbacks. hooks import *
from fastai. utils. mem import *
path = untar_data( URLs. CAMVID)
path. ls( )
[WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/codes.txt'),
WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/images'),
WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/labels'),
WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/valid.txt')]
path_lbl = path/ 'labels'
path_img = path/ 'images'
查看数据集
查看图像
fnames = get_image_files( path_img)
fnames[ : 3 ]
[WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/images/0001TP_006690.png'),
WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/images/0001TP_006720.png'),
WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/images/0001TP_006750.png')]
img_f = fnames[ 0 ]
img = open_image( img_f)
img. show( figsize= ( 5 , 5 ) )
查看标签
lbl_names = get_image_files( path_lbl)
lbl_names[ : 3 ]
[WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/labels/0001TP_006690_P.png'),
WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/labels/0001TP_006720_P.png'),
WindowsPath('C:/Users/Wither8848/.fastai/data/camvid/labels/0001TP_006750_P.png')]
print ( fnames[ 0 ] . suffix)
fnames[ 0 ] . stem
.png
'0001TP_006690'
标签为数据集名字+‘_P’,生成这样的一个函数,自动寻找到便签
f’ 为字符串的代写格式,类似%f path 对象的使用.stem得到文件名.suffix得到后缀
get_y_fn = lambda x: path_lbl/ f'{x.stem}_P{x.suffix}'
这里使用了open_mask因为原来是掩码不太清楚
mask = open_mask( get_y_fn( img_f) )
mask. show( figsize= ( 5 , 5 ) , alpha= 1 )
src_size = np. array( mask. shape[ 1 : ] )
src_size, mask. data
(array([720, 960]),
tensor([[[ 4, 4, 4, ..., 4, 4, 4],
[ 4, 4, 4, ..., 4, 4, 4],
[ 4, 4, 4, ..., 4, 4, 4],
...,
[19, 19, 19, ..., 30, 30, 30],
[19, 19, 19, ..., 30, 30, 30],
[19, 19, 19, ..., 30, 30, 30]]]))
映射储存在codes.txt里面,如4-对应建筑物
codes = np. loadtxt( path/ 'codes.txt' , dtype= str ) ; codes
array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')
生成数据集
size = src_size// 2
free = gpu_mem_get_free_no_cache( )
if free > 8200 : bs= 8
else : bs= 4
print ( f"using bs={bs}, have {free}MB of GPU RAM free" )
using bs=4, have 6813MB of GPU RAM free
这里使用图像分割数据集,验证集来自于给定得valid.txt因为这个数据集是来自于一个视频,为了保证验证集不连续
src = ( SegmentationItemList. from_folder( path_img)
. split_by_fname_file( '../valid.txt' )
. label_from_func( get_y_fn, classes= codes) )
data = ( src. transform( get_transforms( ) , size= size, tfm_y= True )
. databunch( bs= bs)
. normalize( imagenet_stats) )
data. show_batch( 2 , figsize= ( 10 , 7 ) )
生成模型
name2id = { v: k for k, v in enumerate ( codes) }
void_code = name2id[ 'Void' ]
def acc_camvid ( input , target) :
target = target. squeeze( 1 )
mask = target != void_code
return ( input . argmax( dim= 1 ) [ mask] == target[ mask] ) . float ( ) . mean( )
metrics= acc_camvid
wd= 1e - 2
learn = unet_learner( data, models. resnet34, metrics= metrics, wd= wd)
lr_find( learn)
learn. recorder. plot( )
lr= 3e - 3
learn. fit_one_cycle( 10 , slice ( lr) , pct_start= 0.9 )
epoch train_loss valid_loss acc_camvid time 0 1.069481 0.792951 0.815857 01:54 1 0.744278 0.600508 0.847082 01:51 2 0.649365 0.559110 0.848990 01:49 3 0.598212 0.453965 0.875559 01:51 4 0.617887 0.492955 0.866971 01:49 5 0.573497 0.563069 0.854965 01:51 6 0.552920 0.494836 0.874791 01:48 7 0.519904 0.403810 0.891915 01:53 8 0.554230 0.464974 0.883040 01:52 9 0.427734 0.338918 0.903289 01:53
learn. save( 'stage-1' )
learn. load( 'stage-1' ) ;
learn. show_results( rows= 3 , figsize= ( 8 , 9 ) )
lr= 3e - 3
learn. unfreeze( )
lrs = slice ( lr/ 400 , lr/ 4 )
learn. fit_one_cycle( 12 , lrs, pct_start= 0.8 )
epoch train_loss valid_loss acc_camvid time 0 0.378398 0.326795 0.906832 02:02 1 0.378235 0.313857 0.910155 01:54 2 0.375283 0.307506 0.911930 01:56 3 0.364699 0.321208 0.909612 01:56 4 0.353212 0.285177 0.920163 01:56 5 0.347013 0.309324 0.912606 01:53 6 0.333858 0.323652 0.905763 01:55 7 0.332646 0.302886 0.914157 01:57 8 0.328257 0.286069 0.922094 01:56 9 0.320706 0.287675 0.922618 01:55 10 0.294463 0.279678 0.922595 01:53 11 0.266878 0.272097 0.925978 01:54
学习率曲线应该是先高后低,如果一直降低,尝试加大一点点学习率
learn. recorder. plot_losses( )
learn. recorder. plot_lr( )
learn. save( 'stage-2' ) ;
使用原始分辨率进一步训练
learn. destroy( )
size = src_size
free = gpu_mem_get_free_no_cache( )
if free > 8200 : bs= 3
else : bs= 1
print ( f"using bs={bs}, have {free}MB of GPU RAM free" )
this Learner object self-destroyed - it still exists, but no longer usable
using bs=1, have 6027MB of GPU RAM free
data = ( src. transform( get_transforms( ) , size= size, tfm_y= True )
. databunch( bs= bs)
. normalize( imagenet_stats) )
learn = unet_learner( data, models. resnet34, metrics= metrics, wd= wd) . to_fp16( )
learn. load( 'stage-2' ) ;
lr_find( learn)
learn. recorder. plot( )
ame}.recorder.plot() to see the graph.
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-GesvvZoT-1583647675339)(output_45_2.png)]
lr= 1e - 3
learn. fit_one_cycle( 10 , slice ( lr) , pct_start= 0.8 )
epoch train_loss valid_loss acc_camvid time 0 0.423464 0.330359 0.911409 04:13 1 0.373553 0.334004 0.907827 04:08 2 0.354978 0.316964 0.912270 04:08 3 0.393460 0.316810 0.917438 04:08 4 0.342301 0.332319 0.917796 04:10 5 0.343624 0.313800 0.919534 04:07 6 0.328565 0.312378 0.909933 04:14 7 0.336849 0.317563 0.913902 04:20 8 0.269040 0.300068 0.918740 04:22 9 0.238536 0.281892 0.923772 04:10
learn. save( 'stage-1-big' )
learn. load( 'stage-1-big' ) ;
learn. unfreeze( )
lrs = slice ( 1e - 6 , lr/ 10 )
learn. fit_one_cycle( 10 , lrs)
learn. save( 'stage-2-big' )
epoch train_loss valid_loss acc_camvid time 0 0.256014 0.284205 0.923452 04:41 1 0.232119 0.283108 0.923677 04:29 2 0.235852 0.288952 0.920897 04:26 3 0.219901 0.261550 0.928312 04:24 4 0.227803 0.264939 0.927502 04:23 5 0.207069 0.275547 0.927656 04:25 6 0.199358 0.262479 0.928366 04:28 7 0.200243 0.259015 0.930449 04:31 8 0.197228 0.250665 0.932849 04:32 9 0.194318 0.265213 0.928627 04:31
learn. load( 'stage-2-big' ) ;
learn. show_results( rows= 3 , figsize= ( 10 , 10 ) )