WebM Codec SDK
vp9_spatial_svc_encoder
1 /*
2  * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  * Use of this source code is governed by a BSD-style license
5  * that can be found in the LICENSE file in the root of the source
6  * tree. An additional intellectual property rights grant can be found
7  * in the file PATENTS. All contributing project authors may
8  * be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This is an example demonstrating how to implement a multi-layer
13  * VP9 encoding scheme based on spatial scalability for video applications
14  * that benefit from a scalable bitstream.
15  */
16 
17 #include <math.h>
18 #include <stdarg.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 
23 
24 #include "../args.h"
25 #include "../tools_common.h"
26 #include "../video_writer.h"
27 
28 #include "../vpx_ports/vpx_timer.h"
29 #include "vpx/svc_context.h"
30 #include "vpx/vp8cx.h"
31 #include "vpx/vpx_encoder.h"
32 #include "../vpxstats.h"
33 #define OUTPUT_RC_STATS 1
34 
35 static const arg_def_t skip_frames_arg =
36  ARG_DEF("s", "skip-frames", 1, "input frames to skip");
37 static const arg_def_t frames_arg =
38  ARG_DEF("f", "frames", 1, "number of frames to encode");
39 static const arg_def_t threads_arg =
40  ARG_DEF("th", "threads", 1, "number of threads to use");
41 #if OUTPUT_RC_STATS
42 static const arg_def_t output_rc_stats_arg =
43  ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
44 #endif
45 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
46 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
47 static const arg_def_t timebase_arg =
48  ARG_DEF("t", "timebase", 1, "timebase (num/den)");
49 static const arg_def_t bitrate_arg = ARG_DEF(
50  "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
51 static const arg_def_t spatial_layers_arg =
52  ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
53 static const arg_def_t temporal_layers_arg =
54  ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
55 static const arg_def_t temporal_layering_mode_arg =
56  ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
57  "VP9E_TEMPORAL_LAYERING_MODE");
58 static const arg_def_t kf_dist_arg =
59  ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
60 static const arg_def_t scale_factors_arg =
61  ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
62 static const arg_def_t passes_arg =
63  ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
64 static const arg_def_t pass_arg =
65  ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
66 static const arg_def_t fpf_name_arg =
67  ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
68 static const arg_def_t min_q_arg =
69  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
70 static const arg_def_t max_q_arg =
71  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
72 static const arg_def_t min_bitrate_arg =
73  ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
74 static const arg_def_t max_bitrate_arg =
75  ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
76 static const arg_def_t lag_in_frame_arg =
77  ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before "
78  "generating any outputs");
79 static const arg_def_t rc_end_usage_arg =
80  ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
81 static const arg_def_t speed_arg =
82  ARG_DEF("sp", "speed", 1, "speed configuration");
83 static const arg_def_t aqmode_arg =
84  ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
85 
86 #if CONFIG_VP9_HIGHBITDEPTH
87 static const struct arg_enum_list bitdepth_enum[] = {
88  {"8", VPX_BITS_8},
89  {"10", VPX_BITS_10},
90  {"12", VPX_BITS_12},
91  {NULL, 0}
92 };
93 
94 static const arg_def_t bitdepth_arg =
95  ARG_DEF_ENUM("d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ",
96  bitdepth_enum);
97 #endif // CONFIG_VP9_HIGHBITDEPTH
98 
99 
100 static const arg_def_t *svc_args[] = {
101  &frames_arg, &width_arg, &height_arg,
102  &timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg,
103  &kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg,
104  &fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
105  &max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg,
106  &lag_in_frame_arg, &threads_arg, &aqmode_arg,
107 #if OUTPUT_RC_STATS
108  &output_rc_stats_arg,
109 #endif
110 
111 #if CONFIG_VP9_HIGHBITDEPTH
112  &bitdepth_arg,
113 #endif
114  &speed_arg,
115  &rc_end_usage_arg, NULL
116 };
117 
118 static const uint32_t default_frames_to_skip = 0;
119 static const uint32_t default_frames_to_code = 60 * 60;
120 static const uint32_t default_width = 1920;
121 static const uint32_t default_height = 1080;
122 static const uint32_t default_timebase_num = 1;
123 static const uint32_t default_timebase_den = 60;
124 static const uint32_t default_bitrate = 1000;
125 static const uint32_t default_spatial_layers = 5;
126 static const uint32_t default_temporal_layers = 1;
127 static const uint32_t default_kf_dist = 100;
128 static const uint32_t default_temporal_layering_mode = 0;
129 static const uint32_t default_output_rc_stats = 0;
130 static const int32_t default_speed = -1; // -1 means use library default.
131 static const uint32_t default_threads = 0; // zero means use library default.
132 
133 typedef struct {
134  const char *input_filename;
135  const char *output_filename;
136  uint32_t frames_to_code;
137  uint32_t frames_to_skip;
138  struct VpxInputContext input_ctx;
139  stats_io_t rc_stats;
140  int passes;
141  int pass;
142 } AppInput;
143 
144 static const char *exec_name;
145 
146 void usage_exit(void) {
147  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
148  exec_name);
149  fprintf(stderr, "Options:\n");
150  arg_show_usage(stderr, svc_args);
151  exit(EXIT_FAILURE);
152 }
153 
154 static void parse_command_line(int argc, const char **argv_,
155  AppInput *app_input, SvcContext *svc_ctx,
156  vpx_codec_enc_cfg_t *enc_cfg) {
157  struct arg arg = {0};
158  char **argv = NULL;
159  char **argi = NULL;
160  char **argj = NULL;
161  vpx_codec_err_t res;
162  int passes = 0;
163  int pass = 0;
164  const char *fpf_file_name = NULL;
165  unsigned int min_bitrate = 0;
166  unsigned int max_bitrate = 0;
167  char string_options[1024] = {0};
168 
169  // initialize SvcContext with parameters that will be passed to vpx_svc_init
170  svc_ctx->log_level = SVC_LOG_DEBUG;
171  svc_ctx->spatial_layers = default_spatial_layers;
172  svc_ctx->temporal_layers = default_temporal_layers;
173  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
174 #if OUTPUT_RC_STATS
175  svc_ctx->output_rc_stat = default_output_rc_stats;
176 #endif
177  svc_ctx->speed = default_speed;
178  svc_ctx->threads = default_threads;
179 
180  // start with default encoder configuration
181  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
182  if (res) {
183  die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
184  }
185  // update enc_cfg with app default values
186  enc_cfg->g_w = default_width;
187  enc_cfg->g_h = default_height;
188  enc_cfg->g_timebase.num = default_timebase_num;
189  enc_cfg->g_timebase.den = default_timebase_den;
190  enc_cfg->rc_target_bitrate = default_bitrate;
191  enc_cfg->kf_min_dist = default_kf_dist;
192  enc_cfg->kf_max_dist = default_kf_dist;
193  enc_cfg->rc_end_usage = VPX_CQ;
194 
195  // initialize AppInput with default values
196  app_input->frames_to_code = default_frames_to_code;
197  app_input->frames_to_skip = default_frames_to_skip;
198 
199  // process command line options
200  argv = argv_dup(argc - 1, argv_ + 1);
201  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
202  arg.argv_step = 1;
203 
204  if (arg_match(&arg, &frames_arg, argi)) {
205  app_input->frames_to_code = arg_parse_uint(&arg);
206  } else if (arg_match(&arg, &width_arg, argi)) {
207  enc_cfg->g_w = arg_parse_uint(&arg);
208  } else if (arg_match(&arg, &height_arg, argi)) {
209  enc_cfg->g_h = arg_parse_uint(&arg);
210  } else if (arg_match(&arg, &timebase_arg, argi)) {
211  enc_cfg->g_timebase = arg_parse_rational(&arg);
212  } else if (arg_match(&arg, &bitrate_arg, argi)) {
213  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
214  } else if (arg_match(&arg, &skip_frames_arg, argi)) {
215  app_input->frames_to_skip = arg_parse_uint(&arg);
216  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
217  svc_ctx->spatial_layers = arg_parse_uint(&arg);
218  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
219  svc_ctx->temporal_layers = arg_parse_uint(&arg);
220 #if OUTPUT_RC_STATS
221  } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
222  svc_ctx->output_rc_stat = arg_parse_uint(&arg);
223 #endif
224  } else if (arg_match(&arg, &speed_arg, argi)) {
225  svc_ctx->speed = arg_parse_uint(&arg);
226  } else if (arg_match(&arg, &aqmode_arg, argi)) {
227  svc_ctx->aqmode = arg_parse_uint(&arg);
228  } else if (arg_match(&arg, &threads_arg, argi)) {
229  svc_ctx->threads = arg_parse_uint(&arg);
230  } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
231  svc_ctx->temporal_layering_mode =
232  enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
233  if (svc_ctx->temporal_layering_mode) {
234  enc_cfg->g_error_resilient = 1;
235  }
236  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
237  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
238  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
239  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
240  snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
241  string_options, arg.val);
242  } else if (arg_match(&arg, &passes_arg, argi)) {
243  passes = arg_parse_uint(&arg);
244  if (passes < 1 || passes > 2) {
245  die("Error: Invalid number of passes (%d)\n", passes);
246  }
247  } else if (arg_match(&arg, &pass_arg, argi)) {
248  pass = arg_parse_uint(&arg);
249  if (pass < 1 || pass > 2) {
250  die("Error: Invalid pass selected (%d)\n", pass);
251  }
252  } else if (arg_match(&arg, &fpf_name_arg, argi)) {
253  fpf_file_name = arg.val;
254  } else if (arg_match(&arg, &min_q_arg, argi)) {
255  snprintf(string_options, sizeof(string_options), "%s min-quantizers=%s",
256  string_options, arg.val);
257  } else if (arg_match(&arg, &max_q_arg, argi)) {
258  snprintf(string_options, sizeof(string_options), "%s max-quantizers=%s",
259  string_options, arg.val);
260  } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
261  min_bitrate = arg_parse_uint(&arg);
262  } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
263  max_bitrate = arg_parse_uint(&arg);
264  } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
265  enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
266  } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
267  enc_cfg->rc_end_usage = arg_parse_uint(&arg);
268 #if CONFIG_VP9_HIGHBITDEPTH
269  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
270  enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
271  switch (enc_cfg->g_bit_depth) {
272  case VPX_BITS_8:
273  enc_cfg->g_input_bit_depth = 8;
274  enc_cfg->g_profile = 0;
275  break;
276  case VPX_BITS_10:
277  enc_cfg->g_input_bit_depth = 10;
278  enc_cfg->g_profile = 2;
279  break;
280  case VPX_BITS_12:
281  enc_cfg->g_input_bit_depth = 12;
282  enc_cfg->g_profile = 2;
283  break;
284  default:
285  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
286  break;
287  }
288 #endif // CONFIG_VP9_HIGHBITDEPTH
289  } else {
290  ++argj;
291  }
292  }
293 
294  // There will be a space in front of the string options
295  if (strlen(string_options) > 0)
296  vpx_svc_set_options(svc_ctx, string_options + 1);
297 
298  if (passes == 0 || passes == 1) {
299  if (pass) {
300  fprintf(stderr, "pass is ignored since there's only one pass\n");
301  }
302  enc_cfg->g_pass = VPX_RC_ONE_PASS;
303  } else {
304  if (pass == 0) {
305  die("pass must be specified when passes is 2\n");
306  }
307 
308  if (fpf_file_name == NULL) {
309  die("fpf must be specified when passes is 2\n");
310  }
311 
312  if (pass == 1) {
313  enc_cfg->g_pass = VPX_RC_FIRST_PASS;
314  if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
315  fatal("Failed to open statistics store");
316  }
317  } else {
318  enc_cfg->g_pass = VPX_RC_LAST_PASS;
319  if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
320  fatal("Failed to open statistics store");
321  }
322  enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);
323  }
324  app_input->passes = passes;
325  app_input->pass = pass;
326  }
327 
328  if (enc_cfg->rc_target_bitrate > 0) {
329  if (min_bitrate > 0) {
330  enc_cfg->rc_2pass_vbr_minsection_pct =
331  min_bitrate * 100 / enc_cfg->rc_target_bitrate;
332  }
333  if (max_bitrate > 0) {
334  enc_cfg->rc_2pass_vbr_maxsection_pct =
335  max_bitrate * 100 / enc_cfg->rc_target_bitrate;
336  }
337  }
338 
339  // Check for unrecognized options
340  for (argi = argv; *argi; ++argi)
341  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
342  die("Error: Unrecognized option %s\n", *argi);
343 
344  if (argv[0] == NULL || argv[1] == 0) {
345  usage_exit();
346  }
347  app_input->input_filename = argv[0];
348  app_input->output_filename = argv[1];
349  free(argv);
350 
351  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
352  enc_cfg->g_h % 2)
353  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
354 
355  printf(
356  "Codec %s\nframes: %d, skip: %d\n"
357  "layers: %d\n"
358  "width %d, height: %d,\n"
359  "num: %d, den: %d, bitrate: %d,\n"
360  "gop size: %d\n",
361  vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
362  app_input->frames_to_skip,
363  svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
364  enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
365  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
366 }
367 
368 #if OUTPUT_RC_STATS
369 // For rate control encoding stats.
370 struct RateControlStats {
371  // Number of input frames per layer.
372  int layer_input_frames[VPX_MAX_LAYERS];
373  // Total (cumulative) number of encoded frames per layer.
374  int layer_tot_enc_frames[VPX_MAX_LAYERS];
375  // Number of encoded non-key frames per layer.
376  int layer_enc_frames[VPX_MAX_LAYERS];
377  // Framerate per layer (cumulative).
378  double layer_framerate[VPX_MAX_LAYERS];
379  // Target average frame size per layer (per-frame-bandwidth per layer).
380  double layer_pfb[VPX_MAX_LAYERS];
381  // Actual average frame size per layer.
382  double layer_avg_frame_size[VPX_MAX_LAYERS];
383  // Average rate mismatch per layer (|target - actual| / target).
384  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
385  // Actual encoding bitrate per layer (cumulative).
386  double layer_encoding_bitrate[VPX_MAX_LAYERS];
387  // Average of the short-time encoder actual bitrate.
388  // TODO(marpan): Should we add these short-time stats for each layer?
389  double avg_st_encoding_bitrate;
390  // Variance of the short-time encoder actual bitrate.
391  double variance_st_encoding_bitrate;
392  // Window (number of frames) for computing short-time encoding bitrate.
393  int window_size;
394  // Number of window measurements.
395  int window_count;
396 };
397 
398 // Note: these rate control stats assume only 1 key frame in the
399 // sequence (i.e., first frame only).
400 static void set_rate_control_stats(struct RateControlStats *rc,
401  vpx_codec_enc_cfg_t *cfg) {
402  unsigned int sl, tl;
403  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
404  // per-frame-bandwidth, for the rate control encoding stats below.
405  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
406 
407  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
408  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
409  const int layer = sl * cfg->ts_number_layers + tl;
410  const int tlayer0 = sl * cfg->ts_number_layers;
411  rc->layer_framerate[layer] =
412  framerate / cfg->ts_rate_decimator[tl];
413  if (tl > 0) {
414  rc->layer_pfb[layer] = 1000.0 *
415  (cfg->layer_target_bitrate[layer] -
416  cfg->layer_target_bitrate[layer - 1]) /
417  (rc->layer_framerate[layer] -
418  rc->layer_framerate[layer - 1]);
419  } else {
420  rc->layer_pfb[tlayer0] = 1000.0 *
421  cfg->layer_target_bitrate[tlayer0] /
422  rc->layer_framerate[tlayer0];
423  }
424  rc->layer_input_frames[layer] = 0;
425  rc->layer_enc_frames[layer] = 0;
426  rc->layer_tot_enc_frames[layer] = 0;
427  rc->layer_encoding_bitrate[layer] = 0.0;
428  rc->layer_avg_frame_size[layer] = 0.0;
429  rc->layer_avg_rate_mismatch[layer] = 0.0;
430  }
431  }
432  rc->window_count = 0;
433  rc->window_size = 15;
434  rc->avg_st_encoding_bitrate = 0.0;
435  rc->variance_st_encoding_bitrate = 0.0;
436 }
437 
438 static void printout_rate_control_summary(struct RateControlStats *rc,
439  vpx_codec_enc_cfg_t *cfg,
440  int frame_cnt) {
441  unsigned int sl, tl;
442  int tot_num_frames = 0;
443  double perc_fluctuation = 0.0;
444  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
445  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
447  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
448  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
449  const int layer = sl * cfg->ts_number_layers + tl;
450  const int num_dropped = (tl > 0) ?
451  (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
452  (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
453  if (!sl)
454  tot_num_frames += rc->layer_input_frames[layer];
455  rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
456  rc->layer_encoding_bitrate[layer] / tot_num_frames;
457  rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
458  rc->layer_enc_frames[layer];
459  rc->layer_avg_rate_mismatch[layer] =
460  100.0 * rc->layer_avg_rate_mismatch[layer] /
461  rc->layer_enc_frames[layer];
462  printf("For layer#: sl%d tl%d \n", sl, tl);
463  printf("Bitrate (target vs actual): %d %f.0 kbps\n",
464  cfg->layer_target_bitrate[layer],
465  rc->layer_encoding_bitrate[layer]);
466  printf("Average frame size (target vs actual): %f %f bits\n",
467  rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
468  printf("Average rate_mismatch: %f\n",
469  rc->layer_avg_rate_mismatch[layer]);
470  printf("Number of input frames, encoded (non-key) frames, "
471  "and percent dropped frames: %d %d %f.0 \n",
472  rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
473  100.0 * num_dropped / rc->layer_input_frames[layer]);
474  printf("\n");
475  }
476  }
477  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
478  rc->variance_st_encoding_bitrate =
479  rc->variance_st_encoding_bitrate / rc->window_count -
480  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
481  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
482  rc->avg_st_encoding_bitrate;
483  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
484  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
485  rc->avg_st_encoding_bitrate,
486  sqrt(rc->variance_st_encoding_bitrate),
487  perc_fluctuation);
488  if (frame_cnt != tot_num_frames)
489  die("Error: Number of input frames not equal to output encoded frames != "
490  "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
491 }
492 
493 vpx_codec_err_t parse_superframe_index(const uint8_t *data,
494  size_t data_sz,
495  uint32_t sizes[8], int *count) {
496  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
497  // it is a super frame index. If the last byte of real video compression
498  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
499  // not the associated matching marker byte at the front of the index we have
500  // an invalid bitstream and need to return an error.
501 
502  uint8_t marker;
503 
504  marker = *(data + data_sz - 1);
505  *count = 0;
506 
507 
508  if ((marker & 0xe0) == 0xc0) {
509  const uint32_t frames = (marker & 0x7) + 1;
510  const uint32_t mag = ((marker >> 3) & 0x3) + 1;
511  const size_t index_sz = 2 + mag * frames;
512 
513  // This chunk is marked as having a superframe index but doesn't have
514  // enough data for it, thus it's an invalid superframe index.
515  if (data_sz < index_sz)
517 
518  {
519  const uint8_t marker2 = *(data + data_sz - index_sz);
520 
521  // This chunk is marked as having a superframe index but doesn't have
522  // the matching marker byte at the front of the index therefore it's an
523  // invalid chunk.
524  if (marker != marker2)
526  }
527 
528  {
529  // Found a valid superframe index.
530  uint32_t i, j;
531  const uint8_t *x = &data[data_sz - index_sz + 1];
532 
533  for (i = 0; i < frames; ++i) {
534  uint32_t this_sz = 0;
535 
536  for (j = 0; j < mag; ++j)
537  this_sz |= (*x++) << (j * 8);
538  sizes[i] = this_sz;
539  }
540  *count = frames;
541  }
542  }
543  return VPX_CODEC_OK;
544 }
545 #endif
546 
547 // Example pattern for spatial layers and 2 temporal layers used in the
548 // bypass/flexible mode. The pattern corresponds to the pattern
549 // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
550 // non-flexible mode.
551 void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
552  int is_key_frame,
553  vpx_svc_ref_frame_config_t *ref_frame_config) {
554  for (sl = 0; sl < num_spatial_layers; ++sl) {
555  if (!tl) {
556  if (!sl) {
557  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
561  } else {
562  if (is_key_frame) {
563  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_LAST |
567  } else {
568  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
571  }
572  }
573  } else if (tl == 1) {
574  if (!sl) {
575  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
579  } else {
580  ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
583  }
584  }
585  if (tl == 0) {
586  ref_frame_config->lst_fb_idx[sl] = sl;
587  if (sl)
588  ref_frame_config->gld_fb_idx[sl] = sl - 1;
589  else
590  ref_frame_config->gld_fb_idx[sl] = 0;
591  ref_frame_config->alt_fb_idx[sl] = 0;
592  } else if (tl == 1) {
593  ref_frame_config->lst_fb_idx[sl] = sl;
594  ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
595  ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
596  }
597  }
598 }
599 
600 int main(int argc, const char **argv) {
601  AppInput app_input = {0};
602  VpxVideoWriter *writer = NULL;
603  VpxVideoInfo info = {0};
604  vpx_codec_ctx_t codec;
605  vpx_codec_enc_cfg_t enc_cfg;
606  SvcContext svc_ctx;
607  uint32_t i;
608  uint32_t frame_cnt = 0;
609  vpx_image_t raw;
610  vpx_codec_err_t res;
611  int pts = 0; /* PTS starts at 0 */
612  int frame_duration = 1; /* 1 timebase tick per frame */
613  FILE *infile = NULL;
614  int end_of_stream = 0;
615  int frames_received = 0;
616 #if OUTPUT_RC_STATS
617  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
618  struct RateControlStats rc;
619  vpx_svc_layer_id_t layer_id;
620  vpx_svc_ref_frame_config_t ref_frame_config;
621  int sl, tl;
622  double sum_bitrate = 0.0;
623  double sum_bitrate2 = 0.0;
624  double framerate = 30.0;
625 #endif
626  struct vpx_usec_timer timer;
627  int64_t cx_time = 0;
628  memset(&svc_ctx, 0, sizeof(svc_ctx));
629  svc_ctx.log_print = 1;
630  exec_name = argv[0];
631  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
632 
633  // Allocate image buffer
634 #if CONFIG_VP9_HIGHBITDEPTH
635  if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ?
636  VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016,
637  enc_cfg.g_w, enc_cfg.g_h, 32)) {
638  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
639  }
640 #else
641  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
642  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
643  }
644 #endif // CONFIG_VP9_HIGHBITDEPTH
645 
646  if (!(infile = fopen(app_input.input_filename, "rb")))
647  die("Failed to open %s for reading\n", app_input.input_filename);
648 
649  // Initialize codec
650  if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
651  VPX_CODEC_OK)
652  die("Failed to initialize encoder\n");
653 
654 #if OUTPUT_RC_STATS
655  if (svc_ctx.output_rc_stat) {
656  set_rate_control_stats(&rc, &enc_cfg);
657  framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
658  }
659 #endif
660 
661  info.codec_fourcc = VP9_FOURCC;
662  info.time_base.numerator = enc_cfg.g_timebase.num;
663  info.time_base.denominator = enc_cfg.g_timebase.den;
664 
665  if (!(app_input.passes == 2 && app_input.pass == 1)) {
666  // We don't save the bitstream for the 1st pass on two pass rate control
667  writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
668  &info);
669  if (!writer)
670  die("Failed to open %s for writing\n", app_input.output_filename);
671  }
672 #if OUTPUT_RC_STATS
673  // For now, just write temporal layer streams.
674  // TODO(wonkap): do spatial by re-writing superframe.
675  if (svc_ctx.output_rc_stat) {
676  for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
677  char file_name[PATH_MAX];
678 
679  snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
680  app_input.output_filename, tl);
681  outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
682  if (!outfile[tl])
683  die("Failed to open %s for writing", file_name);
684  }
685  }
686 #endif
687 
688  // skip initial frames
689  for (i = 0; i < app_input.frames_to_skip; ++i)
690  vpx_img_read(&raw, infile);
691 
692  if (svc_ctx.speed != -1)
693  vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
694  if (svc_ctx.threads)
695  vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
696  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
698 
699 
700  // Encode frames
701  while (!end_of_stream) {
702  vpx_codec_iter_t iter = NULL;
703  const vpx_codec_cx_pkt_t *cx_pkt;
704  if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
705  // We need one extra vpx_svc_encode call at end of stream to flush
706  // encoder and get remaining data
707  end_of_stream = 1;
708  }
709 
710  // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
711  // and the buffer indices for each spatial layer of the current
712  // (super)frame to be encoded. The temporal layer_id for the current frame
713  // also needs to be set.
714  // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
715  // mode to "VP9E_LAYERING_MODE_BYPASS".
716  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
717  // Example for 2 temporal layers.
718  if (frame_cnt % 2 == 0)
719  layer_id.temporal_layer_id = 0;
720  else
721  layer_id.temporal_layer_id = 1;
722  // Note that we only set the temporal layer_id, since we are calling
723  // the encode for the whole superframe. The encoder will internally loop
724  // over all the spatial layers for the current superframe.
725  vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
726  set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
727  svc_ctx.spatial_layers,
728  frame_cnt == 0,
729  &ref_frame_config);
731  &ref_frame_config);
732  }
733 
734  vpx_usec_timer_start(&timer);
735  res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
736  pts, frame_duration, svc_ctx.speed >= 5 ?
738  vpx_usec_timer_mark(&timer);
739  cx_time += vpx_usec_timer_elapsed(&timer);
740 
741  printf("%s", vpx_svc_get_message(&svc_ctx));
742  if (res != VPX_CODEC_OK) {
743  die_codec(&codec, "Failed to encode frame");
744  }
745 
746  while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
747  switch (cx_pkt->kind) {
748  case VPX_CODEC_CX_FRAME_PKT: {
749  if (cx_pkt->data.frame.sz > 0) {
750 #if OUTPUT_RC_STATS
751  uint32_t sizes[8];
752  int count = 0;
753 #endif
754  vpx_video_writer_write_frame(writer,
755  cx_pkt->data.frame.buf,
756  cx_pkt->data.frame.sz,
757  cx_pkt->data.frame.pts);
758 #if OUTPUT_RC_STATS
759  // TODO(marpan/wonkap): Put this (to line728) in separate function.
760  if (svc_ctx.output_rc_stat) {
761  vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
762  parse_superframe_index(cx_pkt->data.frame.buf,
763  cx_pkt->data.frame.sz, sizes, &count);
764  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
765  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
766  layer_id.temporal_layer_id];
767  }
768  for (tl = layer_id.temporal_layer_id;
769  tl < enc_cfg.ts_number_layers; ++tl) {
770  vpx_video_writer_write_frame(outfile[tl],
771  cx_pkt->data.frame.buf,
772  cx_pkt->data.frame.sz,
773  cx_pkt->data.frame.pts);
774  }
775 
776  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
777  for (tl = layer_id.temporal_layer_id;
778  tl < enc_cfg.ts_number_layers; ++tl) {
779  const int layer = sl * enc_cfg.ts_number_layers + tl;
780  ++rc.layer_tot_enc_frames[layer];
781  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
782  // Keep count of rate control stats per layer, for non-key
783  // frames.
784  if (tl == layer_id.temporal_layer_id &&
785  !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
786  rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
787  rc.layer_avg_rate_mismatch[layer] +=
788  fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
789  rc.layer_pfb[layer];
790  ++rc.layer_enc_frames[layer];
791  }
792  }
793  }
794 
795  // Update for short-time encoding bitrate states, for moving
796  // window of size rc->window, shifted by rc->window / 2.
797  // Ignore first window segment, due to key frame.
798  if (frame_cnt > rc.window_size) {
799  tl = layer_id.temporal_layer_id;
800  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
801  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
802  }
803  if (frame_cnt % rc.window_size == 0) {
804  rc.window_count += 1;
805  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
806  rc.variance_st_encoding_bitrate +=
807  (sum_bitrate / rc.window_size) *
808  (sum_bitrate / rc.window_size);
809  sum_bitrate = 0.0;
810  }
811  }
812 
813  // Second shifted window.
814  if (frame_cnt > rc.window_size + rc.window_size / 2) {
815  tl = layer_id.temporal_layer_id;
816  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
817  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
818  }
819 
820  if (frame_cnt > 2 * rc.window_size &&
821  frame_cnt % rc.window_size == 0) {
822  rc.window_count += 1;
823  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
824  rc.variance_st_encoding_bitrate +=
825  (sum_bitrate2 / rc.window_size) *
826  (sum_bitrate2 / rc.window_size);
827  sum_bitrate2 = 0.0;
828  }
829  }
830  }
831 #endif
832  }
833 
834  printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
835  !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
836  (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
837  ++frames_received;
838  break;
839  }
840  case VPX_CODEC_STATS_PKT: {
841  stats_write(&app_input.rc_stats,
842  cx_pkt->data.twopass_stats.buf,
843  cx_pkt->data.twopass_stats.sz);
844  break;
845  }
846  default: {
847  break;
848  }
849  }
850  }
851 
852  if (!end_of_stream) {
853  ++frame_cnt;
854  pts += frame_duration;
855  }
856  }
857  printf("Processed %d frames\n", frame_cnt);
858  fclose(infile);
859 #if OUTPUT_RC_STATS
860  if (svc_ctx.output_rc_stat) {
861  printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
862  printf("\n");
863  }
864 #endif
865  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
866  if (app_input.passes == 2)
867  stats_close(&app_input.rc_stats, 1);
868  if (writer) {
869  vpx_video_writer_close(writer);
870  }
871 #if OUTPUT_RC_STATS
872  if (svc_ctx.output_rc_stat) {
873  for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
874  vpx_video_writer_close(outfile[tl]);
875  }
876  }
877 #endif
878  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
879  frame_cnt,
880  1000 * (float)cx_time / (double)(frame_cnt * 1000000),
881  1000000 * (double)frame_cnt / (double)cx_time);
882  vpx_img_free(&raw);
883  // display average size, psnr
884  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
885  vpx_svc_release(&svc_ctx);
886  return EXIT_SUCCESS;
887 }
vpx_fixed_buf_t twopass_stats
Definition: vpx_encoder.h:214
unsigned int ts_number_layers
Number of temporal coding layers.
Definition: vpx_encoder.h:715
Codec control function to set encoder internal speed settings.
Definition: vp8cx.h:173
#define VPX_MAX_LAYERS
Definition: vpx_encoder.h:46
#define VP8_EFLAG_NO_REF_LAST
Don&#39;t reference the last frame.
Definition: vp8cx.h:67
#define VP8_EFLAG_NO_UPD_GF
Don&#39;t update the golden frame.
Definition: vp8cx.h:101
Image Descriptor.
Definition: vpx_image.h:88
Describes the encoder algorithm interface to applications.
const char * vpx_codec_iface_name(vpx_codec_iface_t *iface)
Return the name for a given interface.
const char * vpx_codec_err_to_string(vpx_codec_err_t err)
Convert error number to printable string.
int lst_fb_idx[5]
Definition: vp8cx.h:693
#define VPX_TS_MAX_LAYERS
Definition: vpx_encoder.h:40
struct vpx_rational g_timebase
Stream timebase units.
Definition: vpx_encoder.h:397
unsigned int layer_target_bitrate[12]
Target bitrate for each spatial/temporal layer.
Definition: vpx_encoder.h:755
#define VP8_EFLAG_NO_REF_GF
Don&#39;t reference the golden frame.
Definition: vp8cx.h:76
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: vpx_encoder.h:383
int den
Definition: vpx_encoder.h:261
Definition: vpx_encoder.h:177
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: vpx_encoder.h:685
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: vpx_encoder.h:429
Encoder configuration structure.
Definition: vpx_encoder.h:314
The coded data for this stream is corrupt or incomplete.
Definition: vpx_codec.h:129
Encoder output packet.
Definition: vpx_encoder.h:195
void * buf
Definition: vpx_encoder.h:109
unsigned int ts_rate_decimator[5]
Frame rate decimation factor for each temporal layer.
Definition: vpx_encoder.h:729
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: vpx_encoder.h:675
Definition: vpx_encoder.h:268
unsigned int g_profile
Bitstream profile to use.
Definition: vpx_encoder.h:346
Definition: vpx_encoder.h:269
Codec control function to set number of tile columns.
Definition: vp8cx.h:362
struct vpx_codec_cx_pkt::@1::@2 frame
int frame_flags[5]
Definition: vp8cx.h:692
vpx_image_t * vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
Definition: vpx_image.h:56
unsigned int g_w
Width of the frame.
Definition: vpx_encoder.h:357
Codec control function to set adaptive quantization mode.
Definition: vp8cx.h:409
Codec control function to get svc layer ID.
Definition: vp8cx.h:475
unsigned int g_h
Height of the frame.
Definition: vpx_encoder.h:367
enum vpx_codec_cx_pkt_kind kind
Definition: vpx_encoder.h:196
vp9 svc layer parameters
Definition: vp8cx.h:678
Operation completed without error.
Definition: vpx_codec.h:91
#define VP8_EFLAG_NO_UPD_LAST
Don&#39;t update the last frame.
Definition: vp8cx.h:93
void vpx_img_free(vpx_image_t *img)
Close an image descriptor.
unsigned int rc_target_bitrate
Target data rate.
Definition: vpx_encoder.h:525
#define VPX_DL_REALTIME
Definition: vpx_encoder.h:911
int num
Definition: vpx_encoder.h:260
Definition: vpx_codec.h:222
Codec control function to set the frame flags and buffer indices for spatial layers. The frame flags and buffer indices are set using the struct vpx_svc_ref_frame_config defined below.
Definition: vp8cx.h:548
enum vpx_enc_pass g_pass
Multi-pass Encoding Mode.
Definition: vpx_encoder.h:414
#define VPX_DL_GOOD_QUALITY
Definition: vpx_encoder.h:914
unsigned int ss_number_layers
Number of spatial coding layers.
Definition: vpx_encoder.h:695
vpx_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: vpx_encoder.h:375
Provides definitions for using VP8 or VP9 encoder algorithm within the vpx Codec Interface.
Bypass mode. Used when application needs to control temporal layering. This will only work when the n...
Definition: vp8cx.h:586
vpx_codec_err_t
Algorithm return codes.
Definition: vpx_codec.h:89
const vpx_codec_cx_pkt_t * vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter)
Encoded data iterator.
union vpx_codec_cx_pkt::@1 data
int temporal_layering_mode
Temporal layering mode indicating which temporal layering scheme to use.
Definition: vpx_encoder.h:763
vpx_fixed_buf_t rc_twopass_stats_in
Two-pass stats buffer.
Definition: vpx_encoder.h:512
vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, unsigned int reserved)
Get a default configuration.
Definition: vpx_encoder.h:277
#define vpx_codec_control(ctx, id, data)
vpx_codec_control wrapper macro
Definition: vpx_codec.h:407
#define VP8_EFLAG_NO_REF_ARF
Don&#39;t reference the alternate reference frame.
Definition: vp8cx.h:85
vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx)
Destroy a codec instance.
size_t sz
Definition: vpx_encoder.h:110
Definition: vpx_codec.h:220
vp9 svc frame flag parameters.
Definition: vp8cx.h:691
#define VPX_FRAME_IS_KEY
Definition: vpx_encoder.h:130
Definition: vpx_codec.h:221
int alt_fb_idx[5]
Definition: vp8cx.h:695
const void * vpx_codec_iter_t
Iterator.
Definition: vpx_codec.h:188
Definition: vpx_encoder.h:176
unsigned int rc_2pass_vbr_maxsection_pct
Two-pass mode per-GOP maximum bitrate.
Definition: vpx_encoder.h:652
vpx_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: vpx_encoder.h:406
#define VP8_EFLAG_NO_UPD_ARF
Don&#39;t update the alternate reference frame.
Definition: vp8cx.h:109
unsigned int rc_2pass_vbr_minsection_pct
Two-pass mode per-GOP minimum bitrate.
Definition: vpx_encoder.h:644
int gld_fb_idx[5]
Definition: vp8cx.h:694
Codec control function to set svc layer for spatial and temporal.
Definition: vp8cx.h:458
enum vpx_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: vpx_encoder.h:504
Definition: vpx_encoder.h:267
Codec context structure.
Definition: vpx_codec.h:199