// Runs a TensorFlow graph to convert an audio file into a visualization.
tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
int32_t window_size, int32_t stride,
float brightness,
const tensorflow::string& output_image)
函数作用:使用 TensorFlow graph,将一音频文件可视化。
函数调用函数情况如下:
函数声明情况如下:
函数声明文件如下:
函数流程图如下:
函数逻辑顺序图如下:
函数原始代码如下:
// Runs a TensorFlow graph to convert an audio file into a visualization.图形化一个音频
tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
int32_t window_size, int32_t stride,
float brightness,
const tensorflow::string& output_image) {
auto root = tensorflow::Scope::NewRootScope();
using namespace tensorflow::ops; // NOLINT(build/namespaces)
// The following block creates a TensorFlow graph that:创建Tensoflow graph
// - Reads and decodes the audio file into a tensor of float samples.读入并解码音频到float sample
// - Creates a float spectrogram from those samples.从样品中创建float spectrogram
// - Scales, clamps, and converts that spectrogram to 0 to 255 uint8's.转换归一化
// - Reshapes the tensor so that it's [height, width, 1] for imaging.整形
// - Encodes it as a PNG stream and saves it out to a file.PNG编码
Output file_reader =
tensorflow::ops::ReadFile(root.WithOpName("input_wav"), input_wav);
DecodeWav wav_decoder =
DecodeWav(root.WithOpName("wav_decoder"), file_reader);
Output spectrogram = AudioSpectrogram(root.WithOpName("spectrogram"),
wav_decoder.audio, window_size, stride);
Output brightness_placeholder =
Placeholder(root.WithOpName("brightness_placeholder"), DT_FLOAT,
Placeholder::Attrs().Shape(TensorShape({})));
Output mul = Mul(root.WithOpName("mul"), spectrogram, brightness_placeholder);
Output min_const = Const(root.WithOpName("min_const"), 255.0f);
Output min = Minimum(root.WithOpName("min"), mul, min_const);
Output cast = Cast(root.WithOpName("cast"), min, DT_UINT8);
Output expand_dims_const = Const(root.WithOpName("expand_dims_const"), -1);
Output expand_dims =
ExpandDims(root.WithOpName("expand_dims"), cast, expand_dims_const);
Output squeeze = Squeeze(root.WithOpName("squeeze"), expand_dims,
Squeeze::Attrs().Axis({0}));
Output png_encoder = EncodePng(root.WithOpName("png_encoder"), squeeze);
tensorflow::ops::WriteFile file_writer = tensorflow::ops::WriteFile(
root.WithOpName("output_image"), output_image, png_encoder);
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
// Build a session object from this graph definition.从图形定义建立对话 The power of TensorFlow
// is that you can reuse complex computations like this我们可以重用复杂的计算, so usually we'd run a
// lot of different inputs through it. In this example, we're just doing a
// one-off run, so we'll create it and then use it immediately.
std::unique_ptr<tensorflow::Session> session(
tensorflow::NewSession(tensorflow::SessionOptions()));
TF_RETURN_IF_ERROR(session->Create(graph));
// We're passing in the brightness as an input, so create a tensor to hold the
// value.使用亮度作为输入
tensorflow::Tensor brightness_tensor(DT_FLOAT, TensorShape({}));
brightness_tensor.scalar<float>()() = brightness;
// Run the session to analyze the audio and write out the file.分析音频并输出文件
TF_RETURN_IF_ERROR(
session->Run({{"brightness_placeholder", brightness_tensor}}, {},
{"output_image"}, nullptr));
return tensorflow::Status::OK();
}