faster rcnn inception_resnet_v2物品辨识比赛demo记录

20 篇文章 5 订阅
10 篇文章 0 订阅

使用tensorflow model里面的object detection训练的,因为没有时间限制,使用faster rcnn inception_resnet_v2识别10个类别,在1080ti上训练了5个小时,在1050上测试的,2s一张图片。
定义 pascal_label_map.pbtxt

item {
  id: 1
  name: 'cola'
}

item {
  id: 2
  name: 'milk tea'
}

item {
  id: 3
  name: 'ice tea'
}
item {
  id: 4
  name: 'beer'
}
item {
  id: 5
  name: 'shampoo'
}
item {
  id: 6
  name: 'toothpaste'
}
item {
  id: 7
  name: 'soap'
}
item {
  id: 8
  name: 'pear'
}
item {
  id: 9
  name: 'apple'
}
item {
  id: 10
  name: 'orange'
}

测试代码

#-*-coding:utf-8-*-
import sys
import argparse
from PIL import Image
import os
import cv2
import numpy as np
import speech_recognition as sr

import wave
import requests
import time
import base64
from pyaudio import PyAudio, paInt16
import webbrowser
import serial
import speech
import numpy as np
import os
import sys
import tensorflow as tf
from PIL import Image
sys.path.append("..")
from utils import label_map_util
from utils import visualization_utils as vis_util
import cv2
from timeit import default_timer as timer

framerate = 16000  # 采样率
num_samples = 2000  # 采样点
channels = 1  # 声道
sampwidth = 2  # 采样宽度2bytes
FILEPATH = 'speech.wav'

base_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
APIKey = "***"
SecretKey = "***"

HOST = base_url % (APIKey, SecretKey)

PATH_TO_CKPT = 'F:/python_project/比赛' + '/frozen_inference_graph.pb'
PATH_TO_LABELS = 'F:/python_project/比赛/pascal_label_map.pbtxt'

NUM_CLASSES = 80
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)
print(category_index)

def detect():
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            state = True
            cap = cv2.VideoCapture(1)

            while state:
                start = timer()
                f, frame = cap.read()
                show = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(show)
                image_np = np.array(image)

                image_np_expanded = np.expand_dims(image_np, axis=0)

                image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                scores = detection_graph.get_tensor_by_name('detection_scores:0')
                classes = detection_graph.get_tensor_by_name('detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name('num_detections:0')
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
                #print(num_detections)
                end = timer()
                image_np,num=vis_util.visualize_boxes_and_labels_on_image_array_(
                    image_np, np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8)
                (r, g, b) = cv2.split(image_np)
                image_np = cv2.merge([b, g, r])
                print(num)
                if num >= 5: #识别5个物体
                    state = False
                    #cv2.imwrite("wxy-TRY-" + time.strftime("%H%M", time.localtime()) + ".jpg", image_np)
                    tmp = "wxy-TRY-" + time.strftime("%H%M", time.localtime()) + ".jpg" ##带中文路径要用imencode
                    cv2.imencode('.jpg', image_np)[1].tofile(tmp)
                    speech.say("识别完成")
                    print("写入成功,停止检测")

                cv2.imshow("test", image_np)
                cv2.waitKey(1)

                print(end - start)

def getToken(host):
    res = requests.post(host)
    return res.json()['access_token']


def save_wave_file(filepath, data):
    wf = wave.open(filepath, 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(sampwidth)
    wf.setframerate(framerate)
    wf.writeframes(b''.join(data))
    wf.close()


def my_record():
    pa = PyAudio()
    stream = pa.open(format=paInt16, channels=channels,
                     rate=framerate, input=True, frames_per_buffer=num_samples)
    my_buf = []
    # count = 0
    t = time.time()
    print('正在录音...')

    while time.time() < t + 4:  # 秒
        string_audio_data = stream.read(num_samples)
        my_buf.append(string_audio_data)
    print('录音结束.')
    save_wave_file(FILEPATH, my_buf)
    stream.close()


def get_audio(file):
    with open(file, 'rb') as f:
        data = f.read()
    return data


def speech2text(speech_data, token, dev_pid=1537):
    FORMAT = 'wav'
    RATE = '16000'
    CHANNEL = 1
    CUID = '*******'
    SPEECH = base64.b64encode(speech_data).decode('utf-8')

    data = {
        'format': FORMAT,
        'rate': RATE,
        'channel': CHANNEL,
        'cuid': CUID,
        'len': len(speech_data),
        'speech': SPEECH,
        'token': token,
        'dev_pid': dev_pid
    }
    url = 'https://vop.baidu.com/server_api'
    headers = {'Content-Type': 'application/json'}
    # r=requests.post(url,data=json.dumps(data),headers=headers)
    print('正在识别...')
    r = requests.post(url, json=data, headers=headers)
    Result = r.json()
    if 'result' in Result:
        return Result['result'][0]
    else:
        return Result

def detect_img(yolo):
    state = True
    cap = cv2.VideoCapture(0)
    while state:
        f, frame = cap.read()
        show = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(show)
        image_,num = yolo.detect_image_(image)
        image = cv2.cvtColor(np.asarray(image_), cv2.COLOR_RGB2BGR)
        #print(datetime.datetime.now())
        time_str = time.strftime("%H:%M:%S", time.localtime())
        if num == 2:
            state = False
            cv2.imwrite("WXY-TRY-"+time.strftime("%H%M", time.localtime())+".jpg",image)
            speech.say("识别完成")
            print("写入成功,停止检测")

        cv2.imshow("test", image)
        cv2.waitKey(1)




def test():
    state = True
    while state:
        my_record()
        TOKEN = getToken(HOST)
        speech_ = get_audio(FILEPATH)
        result = speech2text(speech_, TOKEN, int(1536))
        print(result)
        if result == "开始":
            state = False
    if not state:
        speech.say("开始识别")
        detect()

def test2():
    serialPort = "COM4"  # 串口
    baudRate = 115200  # 波特率
    ser = serial.Serial(serialPort, baudRate, timeout=0.5)
    print("参数设置:串口=%s ,波特率=%d" % (serialPort, baudRate))
    state = True
    # 收发数据
    while state:
        #ser.write((str + '\n').encode())
        #print(ser.readline(),"接收成功")  # 可以接收中文
        tmp = ser.readline()
        if tmp:
            detect()
            state = False
            ser.close()

def test3():
    speech.say("开始识别")
    print(time.strftime("%H%M", time.localtime()))
if __name__ == '__main__':
    #test()    #加百度语音识别
    #test2()   #加科大讯飞的语音唤醒
    #test3()   #测试windows下的speech模块
    detect()   #直接检测
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
在 PyTorch 中使用 `faster_rcnn_resnet50_fpn` 模型,可以按照以下步骤进行: 1. 安装 PyTorch 和 TorchVision 库(如果未安装的话)。 2. 导入必要的库和模块: ```python import torch import torchvision from torchvision.models.detection.faster_rcnn import FastRCNNPredictor ``` 3. 加载预训练模型 `faster_rcnn_resnet50_fpn`: ```python model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) ``` 4. 修改模型的分类器,将其调整为适合你的任务。由于 `faster_rcnn_resnet50_fpn` 是一个目标检测模型,它的分类器通常是用来检测物体类别的。如果你的任务不需要检测物体类别,可以将分类器替换为一个只有一个输出的线性层: ```python num_classes = 1 # 只检测一个类别 in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) ``` 5. 将模型转换为训练模式,并将其移动到所选设备(如GPU)上: ```python device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.train() # 转换为训练模式 ``` 6. 训练模型,可以使用自己的数据集来训练模型,或者使用 TorchVision 中的数据集,如 Coco 或 Pascal VOC 数据集。 7. 在测试阶段,可以使用以下代码来检测图像中的物体: ```python # 定义图像 image = Image.open('test.jpg') # 转换为Tensor,并将其移动到设备上 image_tensor = torchvision.transforms.functional.to_tensor(image) image_tensor = image_tensor.to(device) # 执行推理 model.eval() with torch.no_grad(): outputs = model([image_tensor]) # 处理输出 boxes = outputs[0]['boxes'].cpu().numpy() # 物体框 scores = outputs[0]['scores'].cpu().numpy() # 物体分数 ``` 需要注意的是,`faster_rcnn_resnet50_fpn` 是一个较大的模型,需要较高的计算资源和训练时间。在训练和测试时,建议使用GPU来加速计算。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值