数据集制作
基于MSCNN的人群密度估计:
生成数据集
- 人群密度数据标记
- 在labelme中选择“points”对图片中的人头进行标记,标签随便填, 我记的是"crow";
- 解析第一步生成的json文件,生成所需的数据集;
json文件格式如下:
{
"version": "3.21.1",
"flags": {},
"shapes": [
{
"label": "crow",
"line_color": null,
"fill_color": null,
"points": [
[
59.74025974025974,
65.97402597402598
]
],
"shape_type": "point",
"flags": {}
},
{
"label": "crow",
"line_color": null,
"fill_color": null,
"points": [
[
42.42424242424242,
77.22943722943722
]
],
"shape_type": "point",
"flags": {}
},
……
"imagePath": "people7.jpg",
"imageData": "/9j/4AAQSkZJRgA",
"imageHeight": 189,
"imageWidth": 343
}
把其中包含的points数据解析出来即可,生成的数据集包含4项,分别是:图片名,人数,密度等级,坐标列表,存成*.mat格式
def create_crowLabel(self):
res_dic = {}
img_list = []
count_list = []
dense_list = []
potints_list = []
# 只包含1 - 99人头数的图片标签
json_files = glob.glob(json_dir + r'\*.json')
for json_file in tqdm.tqdm(json_files):
img_name, pnts = self.json_parese(json_file)
crow_cnt = len(pnts)
img_list.append(img_name)
count_list.append(crow_cnt)
dense_list.append(1)
potints_list.append(pnts)
# 打乱顺序
index_list = [i for i in range(len(img_list))]
random.shuffle(index_list)
img_temp = []
count_temp = []
dense_temp = []
point_temp = []
for index in index_list:
img_temp.append(img_list[index])
count_temp.append(count_list[index])
dense_temp.append(dense_list[index])
point_temp.append(potints_list[index])
res_dic['img'] = np.array(img_list)
res_dic['count'] = np.array(count_temp)
res_dic['dense'] = np.array(dense_list)
res_dic['points'] = np.array(point_temp)
savemat(os.path.join('../dataset/denselevel', 'crow_gt.mat'), res_dic)
- 图片密度等级标记
只需要将图片标记为0,1,2即可,我花了半天时间将之前开发的一个复杂打标工具修改成了这个简单的打标工具,源码请参加:https://github.com/zzubqh/CrowdCount/tree/master/CrawDenseTool
保存的文件格式为:图片名,密度等级。大致如下:
对这个文件进行解析,生成类似于人群密度数据的格式。
# 创建密度等级,粗分为0,1-99,100以上三个标签
def create_denselevelLabel(self):
res_dic = {}
img_list = []
dense_list = []
with open(dense_label, 'r') as rf:
for item in rf:
val = item.strip().split(',')
name = val[0]
dense_level = int(val[1])
img_list.append(name)
dense_list.append(dense_level)
res_dic['img'] = np.array(img_list)
res_dic['dense'] = np.array(dense_list)
savemat(os.path.join('../dataset/denselevel', 'dense_gt.mat'), res_dic)
用于训练的数据生成器
在训练的时候采用自动生成训练/验证数据的方式,按keras要求编写数据生成器即可,具体参加https://github.com/zzubqh/CrowdCount/src/data.py文件
def gen_train(self, batch_size, size):
"""
生成数据生成器
:param batch_size:
:param size:
:return:
"""
index_all = list(range(int(len(self.filenames) * 0.8))) # 取出所有训练数据下标,默认数据的前80%为训练集
i, n = 0, len(index_all)
if batch_size > n:
raise Exception('Batch size {} is larger than the number of dataset {}!'.format(batch_size, n))
while True:
if i + batch_size >= n:
np.random.shuffle(index_all)
i = 0
continue
batch_x, batch_y = [], []
for j in range(i, i + batch_size):
x, y = self.get_img_data(index_all[j], size)
batch_x.append(x)
batch_y.append(y)
i += batch_size
yield np.array(batch_x), np.array(batch_y)
def gen_valid(self, batch_size, size):
"""
生成数据生成器
:param batch_size:
:param size:
:return:
"""
index_all = list(range(int(len(self.filenames) * 0.8), len(self.filenames)))
i, n = 0, len(index_all)
if batch_size > n:
raise Exception('Batch size {} is larger than the number of dataset {}!'.format(batch_size, n))
while True:
if i + batch_size >= n:
np.random.shuffle(index_all)
i = 0
continue
batch_x, batch_y = [], []
for j in range(i, i + batch_size):
x, y = self.get_img_data(index_all[j], size)
batch_x.append(x)
batch_y.append(y)
i += batch_size
yield np.array(batch_x), np.array(batch_y)