有一个训练数据需要整理,主要是从阿里云实时ASR获取的结果,结果中有文本与时间点信息。
比如
#test begin end
yes i have found a better 20400 23530
记得应该可以使用python的pudub库处理该问题。
不过我没有使用python,直接使用c写了一个工具。数量比较大,考虑c来的快一点。
代码如下:
//
// Created by chris on 8/10/20.
//
#include <stdio.h>
#include <stdlib.h>
#include <sndfile.h>
#include <string.h>
static int name_id = 1;
static void usage(const char *self) {
printf("usage: %s <input wav file> [begin time 0] [end time 0] ...\n"
"example:\n"
"%s test.wav 20400 25300\n", self, self);
}
static SNDFILE * create_new_wav(const char *name, int sr) {
char path[strlen(name)+6];
memset(path, 0, strlen(name)+6);
//strcpy(path, name);
char *ext = strstr(name, ".wav");
strncpy(path, name, ext-name);
snprintf(path+(ext-name), 6, "_%d_", name_id++);
char *p = path;
while(*p!='\0') p++;
strncpy(p, ".wav", 4);
SF_INFO onfo;
onfo.channels = 1;
onfo.samplerate = sr;
onfo.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16 ;
onfo.sections = 0;
onfo.seekable = 1;
return sf_open(path, SFM_WRITE, &onfo);
}
struct split_time {
int begin;
int end;
};
static int extract_segment_wav(const char *name, int sr, const struct split_time *time, SNDFILE *in, int *offset) {
SNDFILE *out = create_new_wav(name, sr);
if (!out) return -1;
int bytes_ms = sr / 1000*2;
int b = bytes_ms * time->begin - *offset;
int len = bytes_ms * (time->end - time->begin);
// seek
if (b > 0) {
char tmp[b];
sf_read_raw(in, tmp, b);
*offset += b;
}
// write
char extract[len];
memset(extract, 0, len);
sf_count_t read = sf_read_raw(in, extract, len);
if (read>0) {
sf_write_raw(out, extract, read);
*offset += read;
} else {
fprintf(stderr, "%s cannot read any more bytes!\n", name);
}
sf_close(out);
return 0;
}
int main(int argv, const char *args[]) {
if (argv < 4 && argv%2!=0) {
usage(args[0]);
return 1;
}
const int count = (argv-2)/2;
struct split_time st[count];
for (int i=0;i<count;i++) {
st[i].begin = atoi(args[2 + i*2]);
st[i].end = atoi(args[2 + i*2 + 1]);
if (st[i].begin >= st[i].end) {
fprintf(stderr, "the begin time %d should be less than the end time %d!\n", st[i].begin, st[i].end);
return 2;
}
if (i > 0 && st[i].begin <= st[i - 1].end) {
fprintf(stderr, "the begin time %d should be greater than the last end time %d!\n", st[i].begin,
st[i - 1].end);
return 2;
}
}
const char *in_path = args[1];
char *ext = strstr(in_path, ".wav");
if (!ext) {
fprintf(stderr, "please input wav file!\n");
usage(args[0]);
return 1;
}
SF_INFO info;
SNDFILE *in = sf_open(in_path, SFM_READ, &info);
if (!in) {
fprintf(stderr, "cannot open file %s\n", in_path);
return 2;
}
int n =0;
int offset=0;
while (n<count) {
if(!extract_segment_wav(in_path, info.samplerate, &st[n], in, &offset)) {
n++;
}
}
sf_close(in);
return 0;
}