GStreamer播放教程01——playbin2的使用

最新推荐文章于 2024-06-24 10:17:36 发布

翻译最新推荐文章于 2024-06-24 10:17:36 发布 · 1.7w 阅读

文章标签：

#GStreamer

GStreamer 专栏收录该内容

32 篇文章

订阅专栏

本文介绍如何使用GStreamer的playbin2元素深度定制播放器，包括判断文件中的流数量、获取每个流的信息，并允许用户在播放过程中切换音频流。

目标

我们前面已经使用过了playbin2这个element，它可以让我们做的很少而实现很多。本教程会展示当这个element的默认设置在一些特殊情形下不符合我们的需求是可以做的一些深度定制，我们会看到：

如何判断一个文件中有多少个流并区分开

如何获得每一个流的信息

作为一个编注，虽然这个element名字是playbin2，但因为原来的playbin已经废弃不用了，所以还是可以称作playbin而不会引起混淆。

介绍

很多时候，一个文件里面会含有一个视频流，多个音频流和字幕流。最常见的时含有一个视频流，一个音频流（5.1声道也是一个音频流的），但现在越来越多的文件开始包含多个配音，也就是有多个音频流。在这种情况下，用户如果选择了某个音频流，那么就会播放这个音频流的内容，忽略其它的音频流。

为了选择正确地流，用户需要知道一定的信息。比如，它们的语言。这个信息室作为一种“元数据”被嵌在流里面的，本教程会演示一下如何获得这个信息。

字幕和音视频一样可以嵌在文件中，我们在下一讲再详细讲这个部分。最后提一下，一个文件里面有多个视频流也是可以的，最典型的例子是DVD，包含多个角度的画面，不过这个情况很少很少的。

下面的代码会显示文件里面流的数目，他们相关的元数据并允许在播放时切换音频流。

多语言播放器

#include <gst/gst.h>
  
/* Structure to contain all our information, so we can pass it around */
typedef struct _CustomData {
  GstElement *playbin2;  /* Our one and only element */
  
  gint n_video;          /* Number of embedded video streams */
  gint n_audio;          /* Number of embedded audio streams */
  gint n_text;           /* Number of embedded subtitle streams */
  
  gint current_video;    /* Currently playing video stream */
  gint current_audio;    /* Currently playing audio stream */
  gint current_text;     /* Currently playing subtitle stream */
  
  GMainLoop *main_loop;  /* GLib's Main Loop */
} CustomData;
  
/* playbin2 flags */
typedef enum {
  GST_PLAY_FLAG_VIDEO         = (1 << 0), /* We want video output */
  GST_PLAY_FLAG_AUDIO         = (1 << 1), /* We want audio output */
  GST_PLAY_FLAG_TEXT          = (1 << 2)  /* We want subtitle output */
} GstPlayFlags;
  
/* Forward definition for the message and keyboard processing functions */
static gboolean handle_message (GstBus *bus, GstMessage *msg, CustomData *data);
static gboolean handle_keyboard (GIOChannel *source, GIOCondition cond, CustomData *data);
  
int main(int argc, char *argv[]) {
  CustomData data;
  GstBus *bus;
  GstStateChangeReturn ret;
  gint flags;
  GIOChannel *io_stdin;
  
  /* Initialize GStreamer */
  gst_init (&argc, &argv);
   
  /* Create the elements */
  data.playbin2 = gst_element_factory_make ("playbin2", "playbin2");
  
  if (!data.playbin2) {
    g_printerr ("Not all elements could be created.\n");
    return -1;
  }
  
  /* Set the URI to play */
  g_object_set (data.playbin2, "uri", "http://docs.gstreamer.com/media/sintel_cropped_multilingual.webm", NULL);
  
  /* Set flags to show Audio and Video but ignore Subtitles */
  g_object_get (data.playbin2, "flags", &flags, NULL);
  flags |= GST_PLAY_FLAG_VIDEO | GST_PLAY_FLAG_AUDIO;
  flags &= ~GST_PLAY_FLAG_TEXT;
  g_object_set (data.playbin2, "flags", flags, NULL);
  
  /* Set connection speed. This will affect some internal decisions of playbin2 */
  g_object_set (data.playbin2, "connection-speed", 560, NULL);
  
  /* Add a bus watch, so we get notified when a message arrives */
  bus = gst_element_get_bus (data.playbin2);
  gst_bus_add_watch (bus, (GstBusFunc)handle_message, &data);
  
  /* Add a keyboard watch so we get notified of keystrokes */
#ifdef _WIN32
  io_stdin = g_io_channel_win32_new_fd (fileno (stdin));
#else
  io_stdin = g_io_channel_unix_new (fileno (stdin));
#endif
  g_io_add_watch (io_stdin, G_IO_IN, (GIOFunc)handle_keyboard, &data);
  
  /* Start playing */
  ret = gst_element_set_state (data.playbin2, GST_STATE_PLAYING);
  if (ret == GST_STATE_CHANGE_FAILURE) {
    g_printerr ("Unable to set the pipeline to the playing state.\n");
    gst_object_unref (data.playbin2);
    return -1;
  }
  
  /* Create a GLib Main Loop and set it to run */
  data.main_loop = g_main_loop_new (NULL, FALSE);
  g_main_loop_run (data.main_loop);
  
  /* Free resources */
  g_main_loop_unref (data.main_loop);
  g_io_channel_unref (io_stdin);
  gst_object_unref (bus);
  gst_element_set_state (data.playbin2, GST_STATE_NULL);
  gst_object_unref (data.playbin2);
  return 0;
}
  
/* Extract some metadata from the streams and print it on the screen */
static void analyze_streams (CustomData *data) {
  gint i;
  GstTagList *tags;
  gchar *str;
  guint rate;
  
  /* Read some properties */
  g_object_get (data->playbin2, "n-video", &data->n_video, NULL);
  g_object_get (data->playbin2, "n-audio", &data->n_audio, NULL);
  g_object_get (data->playbin2, "n-text", &data->n_text, NULL);
  
  g_print ("%d video stream(s), %d audio stream(s), %d text stream(s)\n",
    data->n_video, data->n_audio, data->n_text);
  
  g_print ("\n");
  for (i = 0; i < data->n_video; i++) {
    tags = NULL;
    /* Retrieve the stream's video tags */
    g_signal_emit_by_name (data->playbin2, "get-video-tags", i, &tags);
    if (tags) {
      g_print ("video stream %d:\n", i);
      gst_tag_list_get_string (tags, GST_TAG_VIDEO_CODEC, &str);
      g_print ("  codec: %s\n", str ? str : "unknown");
      g_free (str);
      gst_tag_list_free (tags);
    }
  }
  
  g_print ("\n");
  for (i = 0; i < data->n_audio; i++) {
    tags = NULL;
    /* Retrieve the stream's audio tags */
    g_signal_emit_by_name (data->playbin2, "get-audio-tags", i, &tags);
    if (tags) {
      g_print ("audio stream %d:\n", i);
      if (gst_tag_list_get_string (tags, GST_TAG_AUDIO_CODEC, &str)) {
        g_print ("  codec: %s\n", str);
        g_free (str);
      }
      if (gst_tag_list_get_string (tags, GST_TAG_LANGUAGE_CODE, &str)) {
        g_print ("  language: %s\n", str);
        g_free (str);
      }
      if (gst_tag_list_get_uint (tags, GST_TAG_BITRATE, &rate)) {
        g_print ("  bitrate: %d\n", rate);
      }
      gst_tag_list_free (tags);
    }
  }
  
  g_print ("\n");
  for (i = 0; i < data->n_text; i++) {
    tags = NULL;
    /* Retrieve the stream's subtitle tags */
    g_signal_emit_by_name (data->playbin2, "get-text-tags", i, &tags);
    if (tags) {
      g_print ("subtitle stream %d:\n", i);
      if (gst_tag_list_get_string (tags, GST_TAG_LANGUAGE_CODE, &str)) {
        g_print ("  language: %s\n", str);
        g_free (str);
      }
      gst_tag_list_free (tags);
    }
  }
  
  g_object_get (data->playbin2, "current-video", &data->current_video, NULL);
  g_object_get (data->playbin2, "current-audio", &data->current_audio, NULL);
  g_object_get (data->playbin2, "current-text", &data->current_text, NULL);
  
  g_print ("\n");
  g_print ("Currently playing video stream %d, audio stream %d and text stream %d\n",
    data->current_video, data->current_audio, data->current_text);
  g_print ("Type any number and hit ENTER to select a different audio stream\n");
}
  
/* Process messages from GStreamer */
static gboolean handle_message (GstBus *bus, GstMessage *msg, CustomData *data) {
  GError *err;
  gchar *debug_info;
  
  switch (GST_MESSAGE_TYPE (msg)) {
    case GST_MESSAGE_ERROR:
      gst_message_parse_error (msg, &err, &debug_info);
      g_printerr ("Error received from element %s: %s\n", GST_OBJECT_NAME (msg->src), err->message);
      g_printerr ("Debugging information: %s\n", debug_info ? debug_info : "none");
      g_clear_error (&err);
      g_free (debug_info);
      g_main_loop_quit (data->main_loop);
      break;
    case GST_MESSAGE_EOS:
      g_print ("End-Of-Stream reached.\n");
      g_main_loop_quit (data->main_loop);
      break;
    case GST_MESSAGE_STATE_CHANGED: {
      GstState old_state, new_state, pending_state;
      gst_message_parse_state_changed (msg, &old_state, &new_state, &pending_state);
      if (GST_MESSAGE_SRC (msg) == GST_OBJECT (data->playbin2)) {
        if (new_state == GST_STATE_PLAYING) {
          /* Once we are in the playing state, analyze the streams */
          analyze_streams (data);
        }
      }
    } break;
    default:
      break;
  }
  
  /* We want to keep receiving messages */
  return TRUE;
}
  
/* Process keyboard input */
static gboolean handle_keyboard (GIOChannel *source, GIOCondition cond, CustomData *data) {
  gchar *str = NULL;
  
  if (g_io_channel_read_line (source, &str, NULL, NULL, NULL) == G_IO_STATUS_NORMAL) {
    int index = atoi (str);
    if (index < 0 || index >= data->n_audio) {
      g_printerr ("Index out of bounds\n");
    } else {
      /* If the input was a valid audio stream index, set the current audio stream */
      g_print ("Setting current audio stream to %d\n", index);
      g_object_set (data->playbin2, "current-audio", index, NULL);
    }
  }
  g_free (str);
  return TRUE;
}

工作流程

/* Structure to contain all our information, so we can pass it around */
typedef struct _CustomData {
  GstElement *playbin2;  /* Our one and only element */
  
  gint n_video;          /* Number of embedded video streams */
  gint n_audio;          /* Number of embedded audio streams */
  gint n_text;           /* Number of embedded subtitle streams */
  
  gint current_video;    /* Currently playing video stream */
  gint current_audio;    /* Currently playing audio stream */
  gint current_text;     /* Currently playing subtitle stream */
  
  GMainLoop *main_loop;  /* GLib's Main Loop */
} CustomData;

像以前一样，我们把所有的数据都放在一个结构体中方便访问。在本教程中，我们需要知道每种流的数目以及当前播放的时哪个流。而且我们会使用一种不同的机制来等待可以进行交换的消息，所以我们需要一个GLib的主循环。

/* playbin2 flags */
typedef enum {
  GST_PLAY_FLAG_VIDEO         = (1 << 0), /* We want video output */
  GST_PLAY_FLAG_AUDIO         = (1 << 1), /* We want audio output */
  GST_PLAY_FLAG_TEXT          = (1 << 2)  /* We want subtitle output */
} GstPlayFlags;

在后面我们会设置一些playbin2的标志，我们当然喜欢一个有意义的枚举类型，这样可以方便理解和操控。但playbin2只是一个plugin而不是GStreamer核心的一部分，这个枚举是看不见的，所以只能在代码里面再定义一次，这些标志的详细解释请参考playbib2文档的GstPlayFlags部分。GObject是内省得，所以这些标志在运行时是可以获得的，只是比较笨重。

/* Forward definition for the message and keyboard processing functions */
static gboolean handle_message (GstBus *bus, GstMessage *msg, CustomData *data);
static gboolean handle_keyboard (GIOChannel *source, GIOCondition cond, CustomData *data);

上面声明了我们会用到的两个回调函数。handle_message用来处理GStreamer的消息，我们已经比较熟悉了；handle_keyboard是用来处理按键，因为这篇教程里面需要一些交互。

因为playbin2一个element组成的pipeline，所以我们跳过了创建pipeline，初始化playbin2并赋予URI这部分。

下面我们来讲一些playbin2的属性：

  /* Set flags to show Audio and Video but ignore Subtitles */
  g_object_get (data.playbin2, "flags", &flags, NULL);
  flags |= GST_PLAY_FLAG_VIDEO | GST_PLAY_FLAG_AUDIO;
  flags &= ~GST_PLAY_FLAG_TEXT;
  g_object_set (data.playbin2, "flags", flags, NULL);

通过设置flags属性，playbin2的行为是可以设置的。flags的属性可以有很多GstPlayFlags来组合而成，最主要的值是：

GST_PLAY_FLAG_VIDEO	允许视频渲染，如果这个标志没有设置，则没有视频输出
GST_PLAY_FLAG_AUDIO	允许音频渲染，如果这个标志没有设置，则没有音频输出
GST_PLAY_FLAG_TEXT	允许字幕渲染，如果这个标志没有设置，则没有字幕显示
GST_PLAY_FLAG_VIS	允许在没有视频流时进行可视化渲染，后面教程会讲到
GST_PLAY_FLAG_DOWNLOAD	参见《GStreamer基础教程12——流》以及后续教程
GST_PLAY_FLAG_BUFFERING	参见《GStreamer基础教程12——流》以及后续教程
GST_PLAY_FLAG_DEINTERLACE	如果视频是隔行扫描的，那么在显示时改成逐行扫描

在我们这个例子中，因为仅仅是demo，我们仅仅打开是音频和视频，关闭了字幕。其他标志仍然保持原样。

  /* Set connection speed. This will affect some internal decisions of playbin2 */
  g_object_set (data.playbin2, "connection-speed", 56, NULL);

该属性在这个例子中并非真的有用。connection-speed是设置playbin2的最大网络连接速度，为了防止服务器上有多个版本的媒体文件，playbin2会选择最合适的。这个主要是结合流协议（mms或者rstp）来用的。

我们逐个的设置这些属性，但我们也可以仅调用g_object_set()一次。

g_object_set (data.playbin2, "uri", "http://docs.gstreamer.com/media/sintel_cropped_multilingual.webm", "flags", flags, "connection-speed", 56, NULL);

这就是为什么g_object_set()这个方法需要NULL来做最后一个参数。

  /* Add a keyboard watch so we get notified of keystrokes */
#ifdef _WIN32
  io_stdin = g_io_channel_win32_new_fd (fileno (stdin));
#else
  io_stdin = g_io_channel_unix_new (fileno (stdin));
#endif
  g_io_add_watch (io_stdin, G_IO_IN, (GIOFunc)handle_keyboard, &data);

这几行连接了一个标准输入（键盘）和一个回调函数。这里使用的机制是GLib的，并非是基于GStreamer的，所以就不展开了。应用一般都有自己的处理输入的方式，GStreamer和这个方面没有什么关系。

  /* Create a GLib Main Loop and set it to run */
  data.main_loop = g_main_loop_new (NULL, FALSE);
  g_main_loop_run (data.main_loop);

为了交互，我们不再手动轮询GStreamer总线了。我们创建了GMainLoop并且通过g_main_loop_run()来让它运行起来。这个函数会阻塞住线程，直到g_main_loop_quit()被调用才返回。在这段时间里，它会调用我们注册的回调函数：在总线上有消息时调用handle_message，在有按键时调用handle_keyboard。

在handle_message里面除了在pipeline转移到PLAYING状态时调用analyze_stream函数外没什么新内容。

/* Extract some metadata from the streams and print it on the screen */
static void analyze_streams (CustomData *data) {
  gint i;
  GstTagList *tags;
  gchar *str;
  guint rate;
  
  /* Read some properties */
  g_object_get (data->playbin2, "n-video", &data->n_video, NULL);
  g_object_get (data->playbin2, "n-audio", &data->n_audio, NULL);
  g_object_get (data->playbin2, "n-text", &data->n_text, NULL);

正如注释所说的，这个函数是用来手机媒体的信息并打印出来。视频流的数目、音频流的数目合字幕的数目都可以直接从n-video，n-audio，n-text这几个属性读出来。

  for (i = 0; i < data->n_video; i++) {
    tags = NULL;
    /* Retrieve the stream's video tags */
    g_signal_emit_by_name (data->playbin2, "get-video-tags", i, &tags);
    if (tags) {
      g_print ("video stream %d:\n", i);
      gst_tag_list_get_string (tags, GST_TAG_VIDEO_CODEC, &str);
      g_print ("  codec: %s\n", str ? str : "unknown");
      g_free (str);
      gst_tag_list_free (tags);
    }
  }

现在，对于每一个流来说，我们需要获得它的元数据。元数据是存在一个GstTagList的结构体里面，这个GstTagList通过g_signal_emit_by_name()可以把流里面对应的tag都取出来。然后可以用gst_tag_list_get_*这一类函数来访问这些tag，这个例子中用的就是gst_tag_list_get_string()方法。

playbin2定义了2个action信号来获得元数据：get-video-tags，get-audio-tags和get-text-tags。这些tags的名字是标准的并可以在GstTagList文档里面找到。在这个例子中我们关注的是GST_TAG_LANGUAGE_CODE这个tag和GST_TAG_*_CODEC（audio，video和text）。

  g_object_get (data->playbin2, "current-video", &data->current_video, NULL);
  g_object_get (data->playbin2, "current-audio", &data->current_audio, NULL);
  g_object_get (data->playbin2, "current-text", &data->current_text, NULL);

一旦我们获得了所有需要的元数据，我们就可以通过playbin2的3个属性：current-video、current-audio、current-text来获得当前的选择。

/* Process keyboard input */
static gboolean handle_keyboard (GIOChannel *source, GIOCondition cond, CustomData *data) {
  gchar *str = NULL;
  
  if (g_io_channel_read_line (source, &str, NULL, NULL, NULL) == G_IO_STATUS_NORMAL) {
    int index = atoi (str);
    if (index < 0 || index >= data->n_audio) {
      g_printerr ("Index out of bounds\n");
    } else {
      /* If the input was a valid audio stream index, set the current audio stream */
      g_print ("Setting current audio stream to %d\n", index);
      g_object_set (data->playbin2, "current-audio", index, NULL);
    }
  }
  g_free (str);
  return TRUE;
}

最后，我们允许用户在播放中更换音频流。这个基本的功能实现是通过在标准输入（键盘）读取到一个值，然后设置current-audio这个属性。

请记住切换不是立刻生效的。前面已经解析出来的音频内容会继续播放一阵，直到新的流解析出来的数据被用来发声。这个延时是根据容器里面流的解复用情况以及前面缓冲了多少数据来确定的。

如果你运行这个教程，那么你可以通过按0，1或者2来选择语言。