(具体后续整理之后完善)
Tensorflow speech_cammd 训练自己的数据集
tensorflow 采用hash
# train
python train.py --data_url="" \
--data_dir=data --wanted_words="words1,words2" \
# build pb
python freeze.py --start_checkpoint=/home/ysr/project/rknn/model/conv.ckpt-1200 \
--output_file=/home/ysr/project/rknn/model/my_frozen_graph.pb
RKNN-tools 导出RKNN
import numpy as np
import re
import math
import random
#import cv2
from rknn.api import RKNN
if __name__ == '__main__':
# Create RKNN object
rknn = RKNN(verbose=False, verbose_file='./speech_command_build.log')
# Config for Model Input PreProcess
#rknn.config(quantized_dtype='dynamic_fixed_point-8')
#rknn.config(quantized_dtype='asymmetric_quantized-u8')
rknn.config(target_platform=['rv1126'])
# Load TensorFlow Model
print('--> Loading model')
rknn.load_tensorflow(tf_pb='./model/my_frozen_graph.pb',
inputs=['Reshape'],
outputs=['labels_softmax'],
input_size_list=[[1,3920]]) # 40 x 98
print('done')
# Build Model
print('--> Building model')
rknn.build(do_quantization=False, dataset='./dataset.txt', pre_compile=False)
print('done')
# Export RKNN Model
#rknn.export_rknn('./speech_command_quantized.rknn')
rknn.export_rknn('./model/speech_command.rknn')
#import time
#time.sleep(100)
导出rknn 测试
from rknn.api import RKNN
from tensorflow.python.ops import gen_audio_ops as contrib_audio
import tensorflow as tf
import numpy as np
wav_file = open("model/recoard.wav", "rb")
wav_data = wav_file.read()
#decoded_sample_data = audio_ops.decode_wav(wav_data, desired_channels=1, desired_samples=16000, name='decoded_sample_data')
#spectrogram = audio_ops.audio_spectrogram(decoded_sample_data.audio, window_size=480, stride=160, magnitude_squared=True)
#fingerprint_input = audio_ops.mfcc(decoded_sample_data, 16000, dct_coefficient_count=40) # 40 取40 个点
# shape = (1,98, 40)
# 一维矩阵 40 个
decoded_sample_data = contrib_audio.decode_wav(wav_data, desired_channels=1, desired_samples=16000, name='decoded_sample_data')
spectrogram = contrib_audio.audio_spectrogram(decoded_sample_data.audio, window_size=480, stride=160, magnitude_squared=True)
fingerprint_input = contrib_audio.mfcc(spectrogram, 16000, dct_coefficient_count=40)
print(fingerprint_input)
fingerprint_input_npy = fingerprint_input.numpy()
print(fingerprint_input_npy.size)
#np.save('fingerprint_input.npy',fingerprint_input_npy)
#np.savetxt('fingerprint_input.txt',fingerprint_input_npy)
#print(fingerprint_input_npy)
# Create RKNN object
rknn = RKNN()
# Load TensorFlow Model
ret = rknn.load_rknn(path='model/speech_command.rknn')
print("rknn runtime start")
ret = rknn.init_runtime(perf_debug=True)
#sdk_version = rknn.get_sdk_version()
#print(sdk_version)
outputs, = rknn.inference(inputs=fingerprint_input_npy,data_type='float32')
print("rknn runtime stop")
#outputs = rknn.inference(inputs=[fingerprint_input_npy])
# Release RKNN Context
rknn.release()
def load_labels(filename):
"""Read in labels, one label per line."""
return [line.rstrip() for line in tf.io.gfile.GFile(filename)]
## 后处理
labels = load_labels("model/conv_labels.txt")
predictions = np.array(outputs)
print(outputs)
print(predictions)
top_k = predictions[0].argsort()[-3:][::-1]
print(top_k)
for node_id in top_k:
human_string = labels[node_id]
score = predictions[0][node_id]
print('%s (score = %.5f)' % (human_string, score))
Tensorflow 提取MFCC 算法和 Spectrogram
不依赖tensorflow.so
NPU 调用
/*
* @Author: your name
* @Date: 2021-08-02 17:58:26
* @LastEditTime: 2021-08-05 13:50:22
* @LastEditors: Please set LastEditors
* @Description: In User Settings Edit
* @FilePath: \deploy\tfversion\demo.cc
*/
#include <cmath>
#include <cstdint>
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include <iomanip>
#include <chrono>
#include <dirent.h>
#include <cstring>
//#include "wav_header.h"
#include "mfcc.h"
#include "spectrogram.h"
#include "NanoDet.hpp"
#include "rknn_api.h"
extern "C"
{
//#include "rknn_inference.h"
}
static void printRKNNTensor(rknn_tensor_attr *attr);
static unsigned char *load_model(const char *filename, int *model_size);
int rknn_start(float *data, uint32_t size);
/* Steps to calculate MFCC
Step1, load wav file prepare audio data
Step2, Spectrogram sgram; sgram.Initialize(int window_length, int
step_length); in spectrogram.cc window_length=window_size=480,
step_length=stride=160(tf.audio_spectrogram); use
ComputeSquaredMagnitudeSpectrogram(input, output), get the final spectrogram
results.
Step3, then use mfcc to compute mfcc features in mfcc.cc
mfcc.Initialize(int input_length, double input_sample_rate),
input_length=input.size(),
*/
static const int16_t kint16min = static_cast<int16_t>(~0x7FFF);
static const int16_t kint16max = static_cast<int16_t>(0x7FFF);
typedef std::chrono::high_resolution_clock Clock;
typedef std::chrono::milliseconds Milliseconds;
inline float Int16SampleToFloat(int16_t data) {
constexpr float kMultiplier = 1.0f / (1 << 15);
return data * kMultiplier;
}
//inline int16_t FloatToInt16Sample(float data) {
// constexpr float kMult