4 篇文章 0 订阅

## 数据集

aistudio：https://aistudio.baidu.com/aistudio/datasetdetail/38429
kaggle：https://www.kaggle.com/c/state-farm-distracted-driver-detection

## 1.数据分析

!unzip -o -q data/data38429/distracted_driver_detection.zip
print("done")


• subject:拍摄的司机编号
• classname:分心行为对应的标签
• img:对应的图片数据名
先来看看司机数据的数据分布：
#csv文件路径
dir = "driver_imgs_list.csv"

#读取csv中subject的数据
#subject对应的是司机的编号
img_csv = os.path.join(dir)
driver_name = csv_data['subject']

#储存所有司机的编号
names = []
for name in driver_name:
if(name not in names):
names.append(name)

#计算每个司机有多少张图片
driver_num = csv_data.groupby('subject')['img'].count()

#显示每个司机图片数量
print(driver_num)

#显示总共多少个司机
print("drivers count = {}".format(len(names)))

#可视化
plt.bar(x=names, height=driver_num, width=0.8)
plt.show()


class_name = csv_data['classname']

#储存所有类别
class_names = []
for name in class_name:
if(name not in class_names):
class_names.append(name)

#计算每个类别有多少张图片
class_num = csv_data.groupby('classname')['img'].count()

#显示每个类别图片数量
print(class_num)

#显示总共多少类
print("class count = {}".format(len(class_names)))

#可视化
plt.bar(x=class_names, height=class_num, width=0.8)
plt.show()


## 2.数据处理

path = "train/"
folders_name = os.listdir(path)

a = open("train_list.txt", "w")
b = open("val_list.txt", "w")

count = 0
val_count = 0
train_count = 0

for name in folders_name:
image_names = os.listdir(path+name)
for img_name in image_names:
if(count % 20 == 0):
b.write(path+name+"/"+img_name+name.replace("c"," ")+'\n')
val_count = val_count + 1
else:
a.write(path+name+"/"+img_name+name.replace("c"," ")+'\n')
train_count = train_count + 1
count = count + 1

a.close()
b.close()
print("train_list生成完毕，train数据集共{}个数据".format(train_count))
print("val_list生成完毕，val数据集共{}个数据".format(val_count))
print("合计{}个数据".format(count))


f = open("predict_list.txt", "w")
predict_data = []

path = "test/"
folders_name = os.listdir(path)
for name in folders_name:
f.write(path+name+'\n')
predict_data.append(path+name)
print("predict_list.txt文件成功生成")
f.close()


w = open("label_list.txt", "w")
path = "train/"
folders_name = os.listdir(path)
for name in folders_name:
w.write(name+'\n')
print("label_list.txt文件成功生成")
w.close()


## 3.训练配置

import paddlehub as hub
module = hub.Module(name="mobilenet_v2_imagenet")
input_dict, output_dict, program = module.context(trainable=True)


• train_list_file：训练集

• validate_list_file：val集

• test_list_file：测试集

• predict_file：实际中做的要预测的数据

from paddlehub.dataset.base_cv_dataset import BaseCVDataset

class DemoDataset(BaseCVDataset):
def __init__(self):
# 数据集存放位置
self.dataset_dir = ""
super(DemoDataset, self).__init__(
base_path=self.dataset_dir,
train_list_file="train_list.txt",
validate_list_file="val_list.txt",
test_list_file="predict_list.txt",
#predict_file="predict_list.txt",
label_list_file="label_list.txt",
)
dataset = DemoDataset()


data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)


• learning_rate: 全局学习率。默认为1e-4。

• regularization_coeff: 正则化的λ参数。默认为1e-3。

config = hub.RunConfig(
use_cuda=True,                               #是否使用GPU训练
num_epoch=10,                                #Fine-tune的轮数
checkpoint_dir="source",                     #模型checkpoint保存路径
batch_size=16,                               #训练的批大小
eval_interval=100,                           #模型评估的间隔
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())  #Fine-tune优化策略；


feature_map = output_dict["feature_map"]
feed_list = [input_dict["image"].name]

feed_list=feed_list,               #待feed变量的名字列表
feature=feature_map,               #输入的特征矩阵
num_classes=dataset.num_labels,    #分类任务的类别数量
config=config)                     #运行配置



1. AttributeError: ‘Parameter’ object has no attribute
2. Error: Blocking queue is killed because the data reader raises an
literal for int() with base 10: 'c2’的语句，就铁锤是因为便签设置的问题
我这里是十分类问题，对应标签是c0-c9十个，但是最后分类对应的标签应该是0-9这种整数，在前面生成txt那里进行修改就可以解决了

## 4.开始训练

run_states = task.finetune_and_eval()


## 5.结果预测

import numpy as np
label_map = dataset.label_dict()
index = 0
results = [run_state.run_results for run_state in run_states]

images_data= []
res_data= []
for batch_result in results:
batch_result = np.argmax(batch_result, axis=2)[0]
for result in batch_result:
index += 1
result = label_map[result]
print("input %i is %s, and the predict result is %s" %
(index, predict_data[index - 1], result))
images_data.append(predict_data[index - 1])
res_data.append(result)
print(res_data)


• 0
点赞
• 7
收藏
觉得还不错? 一键收藏
• 打赏
• 2
评论
10-26 3310
07-08 1585

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

Fitzie

¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。