AOMedia AV1 Codec
svc_encoder_rtc
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
3  *
4  * Use of this source code is governed by a BSD-style license
5  * that can be found in the LICENSE file in the root of the source
6  * tree. An additional intellectual property rights grant can be found
7  * in the file PATENTS. All contributing project authors may
8  * be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 // This is an example demonstrating how to implement a multi-layer AOM
12 // encoding scheme for RTC video applications.
13 
14 #include <assert.h>
15 #include <limits.h>
16 #include <math.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 
21 #include <memory>
22 
23 #include "config/aom_config.h"
24 
25 #if CONFIG_AV1_DECODER
26 #include "aom/aom_decoder.h"
27 #endif
28 #include "aom/aom_encoder.h"
29 #include "aom/aomcx.h"
30 #include "common/args.h"
31 #include "common/tools_common.h"
32 #include "common/video_writer.h"
33 #include "examples/encoder_util.h"
34 #include "aom_ports/aom_timer.h"
35 #include "av1/ratectrl_rtc.h"
36 
37 #define OPTION_BUFFER_SIZE 1024
38 
39 typedef struct {
40  const char *output_filename;
41  char options[OPTION_BUFFER_SIZE];
42  struct AvxInputContext input_ctx;
43  int speed;
44  int aq_mode;
45  int layering_mode;
46  int output_obu;
47  int decode;
48  int tune_content;
49  int show_psnr;
50  bool use_external_rc;
51 } AppInput;
52 
53 typedef enum {
54  QUANTIZER = 0,
55  BITRATE,
56  SCALE_FACTOR,
57  AUTO_ALT_REF,
58  ALL_OPTION_TYPES
59 } LAYER_OPTION_TYPE;
60 
61 static const arg_def_t outputfile =
62  ARG_DEF("o", "output", 1, "Output filename");
63 static const arg_def_t frames_arg =
64  ARG_DEF("f", "frames", 1, "Number of frames to encode");
65 static const arg_def_t threads_arg =
66  ARG_DEF("th", "threads", 1, "Number of threads to use");
67 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
68 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
69 static const arg_def_t timebase_arg =
70  ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
71 static const arg_def_t bitrate_arg = ARG_DEF(
72  "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
73 static const arg_def_t spatial_layers_arg =
74  ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
75 static const arg_def_t temporal_layers_arg =
76  ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
77 static const arg_def_t layering_mode_arg =
78  ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
79 static const arg_def_t kf_dist_arg =
80  ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
81 static const arg_def_t scale_factors_arg =
82  ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
83 static const arg_def_t min_q_arg =
84  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
85 static const arg_def_t max_q_arg =
86  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
87 static const arg_def_t speed_arg =
88  ARG_DEF("sp", "speed", 1, "Speed configuration");
89 static const arg_def_t aqmode_arg =
90  ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
91 static const arg_def_t bitrates_arg =
92  ARG_DEF("bl", "bitrates", 1,
93  "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
94 static const arg_def_t dropframe_thresh_arg =
95  ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
96 static const arg_def_t error_resilient_arg =
97  ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
98 static const arg_def_t output_obu_arg =
99  ARG_DEF(NULL, "output-obu", 1,
100  "Write OBUs when set to 1. Otherwise write IVF files.");
101 static const arg_def_t test_decode_arg =
102  ARG_DEF(NULL, "test-decode", 1,
103  "Attempt to test decoding the output when set to 1. Default is 1.");
104 static const arg_def_t psnr_arg =
105  ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
106 static const arg_def_t ext_rc_arg =
107  ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
108 static const struct arg_enum_list tune_content_enum[] = {
109  { "default", AOM_CONTENT_DEFAULT },
110  { "screen", AOM_CONTENT_SCREEN },
111  { "film", AOM_CONTENT_FILM },
112  { NULL, 0 }
113 };
114 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
115  NULL, "tune-content", 1, "Tune content type", tune_content_enum);
116 
117 #if CONFIG_AV1_HIGHBITDEPTH
118 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
119  { "10", AOM_BITS_10 },
120  { NULL, 0 } };
121 
122 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
123  "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
124 #endif // CONFIG_AV1_HIGHBITDEPTH
125 
126 static const arg_def_t *svc_args[] = {
127  &frames_arg, &outputfile, &width_arg,
128  &height_arg, &timebase_arg, &bitrate_arg,
129  &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg,
130  &min_q_arg, &max_q_arg, &temporal_layers_arg,
131  &layering_mode_arg, &threads_arg, &aqmode_arg,
132 #if CONFIG_AV1_HIGHBITDEPTH
133  &bitdepth_arg,
134 #endif
135  &speed_arg, &bitrates_arg, &dropframe_thresh_arg,
136  &error_resilient_arg, &output_obu_arg, &test_decode_arg,
137  &tune_content_arg, &psnr_arg, NULL,
138 };
139 
140 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
141 
142 static const char *exec_name;
143 
144 void usage_exit(void) {
145  fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
146  exec_name);
147  fprintf(stderr, "Options:\n");
148  arg_show_usage(stderr, svc_args);
149  exit(EXIT_FAILURE);
150 }
151 
152 static int file_is_y4m(const char detect[4]) {
153  return memcmp(detect, "YUV4", 4) == 0;
154 }
155 
156 static int fourcc_is_ivf(const char detect[4]) {
157  if (memcmp(detect, "DKIF", 4) == 0) {
158  return 1;
159  }
160  return 0;
161 }
162 
163 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
164  1 };
165 
166 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
167 
168 static void open_input_file(struct AvxInputContext *input,
170  /* Parse certain options from the input file, if possible */
171  input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
172  : set_binary_mode(stdin);
173 
174  if (!input->file) fatal("Failed to open input file");
175 
176  if (!fseeko(input->file, 0, SEEK_END)) {
177  /* Input file is seekable. Figure out how long it is, so we can get
178  * progress info.
179  */
180  input->length = ftello(input->file);
181  rewind(input->file);
182  }
183 
184  /* Default to 1:1 pixel aspect ratio. */
185  input->pixel_aspect_ratio.numerator = 1;
186  input->pixel_aspect_ratio.denominator = 1;
187 
188  /* For RAW input sources, these bytes will applied on the first frame
189  * in read_frame().
190  */
191  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
192  input->detect.position = 0;
193 
194  if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
195  if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
196  input->only_i420) >= 0) {
197  input->file_type = FILE_TYPE_Y4M;
198  input->width = input->y4m.pic_w;
199  input->height = input->y4m.pic_h;
200  input->pixel_aspect_ratio.numerator = input->y4m.par_n;
201  input->pixel_aspect_ratio.denominator = input->y4m.par_d;
202  input->framerate.numerator = input->y4m.fps_n;
203  input->framerate.denominator = input->y4m.fps_d;
204  input->fmt = input->y4m.aom_fmt;
205  input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
206  } else {
207  fatal("Unsupported Y4M stream.");
208  }
209  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
210  fatal("IVF is not supported as input.");
211  } else {
212  input->file_type = FILE_TYPE_RAW;
213  }
214 }
215 
216 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
217  int *value0, int *value1) {
218  if (type == SCALE_FACTOR) {
219  *value0 = (int)strtol(input, &input, 10);
220  if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
221  *value1 = (int)strtol(input, &input, 10);
222 
223  if (*value0 < option_min_values[SCALE_FACTOR] ||
224  *value1 < option_min_values[SCALE_FACTOR] ||
225  *value0 > option_max_values[SCALE_FACTOR] ||
226  *value1 > option_max_values[SCALE_FACTOR] ||
227  *value0 > *value1) // num shouldn't be greater than den
229  } else {
230  *value0 = atoi(input);
231  if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
233  }
234  return AOM_CODEC_OK;
235 }
236 
237 static aom_codec_err_t parse_layer_options_from_string(
238  aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
239  int *option0, int *option1) {
241  char *input_string;
242  char *token;
243  const char *delim = ",";
244  int num_layers = svc_params->number_spatial_layers;
245  int i = 0;
246 
247  if (type == BITRATE)
248  num_layers =
249  svc_params->number_spatial_layers * svc_params->number_temporal_layers;
250 
251  if (input == NULL || option0 == NULL ||
252  (option1 == NULL && type == SCALE_FACTOR))
254 
255  const size_t input_length = strlen(input);
256  input_string = reinterpret_cast<char *>(malloc(input_length + 1));
257  if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
258  memcpy(input_string, input, input_length + 1);
259  token = strtok(input_string, delim); // NOLINT
260  for (i = 0; i < num_layers; ++i) {
261  if (token != NULL) {
262  res = extract_option(type, token, option0 + i, option1 + i);
263  if (res != AOM_CODEC_OK) break;
264  token = strtok(NULL, delim); // NOLINT
265  } else {
267  break;
268  }
269  }
270  free(input_string);
271  return res;
272 }
273 
274 static void parse_command_line(int argc, const char **argv_,
275  AppInput *app_input,
276  aom_svc_params_t *svc_params,
277  aom_codec_enc_cfg_t *enc_cfg) {
278  struct arg arg;
279  char **argv = NULL;
280  char **argi = NULL;
281  char **argj = NULL;
282  char string_options[1024] = { 0 };
283 
284  // Default settings
285  svc_params->number_spatial_layers = 1;
286  svc_params->number_temporal_layers = 1;
287  app_input->layering_mode = 0;
288  app_input->output_obu = 0;
289  app_input->decode = 1;
290  enc_cfg->g_threads = 1;
291  enc_cfg->rc_end_usage = AOM_CBR;
292 
293  // process command line options
294  argv = argv_dup(argc - 1, argv_ + 1);
295  if (!argv) {
296  fprintf(stderr, "Error allocating argument list\n");
297  exit(EXIT_FAILURE);
298  }
299  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
300  arg.argv_step = 1;
301 
302  if (arg_match(&arg, &outputfile, argi)) {
303  app_input->output_filename = arg.val;
304  } else if (arg_match(&arg, &width_arg, argi)) {
305  enc_cfg->g_w = arg_parse_uint(&arg);
306  } else if (arg_match(&arg, &height_arg, argi)) {
307  enc_cfg->g_h = arg_parse_uint(&arg);
308  } else if (arg_match(&arg, &timebase_arg, argi)) {
309  enc_cfg->g_timebase = arg_parse_rational(&arg);
310  } else if (arg_match(&arg, &bitrate_arg, argi)) {
311  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
312  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
313  svc_params->number_spatial_layers = arg_parse_uint(&arg);
314  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
315  svc_params->number_temporal_layers = arg_parse_uint(&arg);
316  } else if (arg_match(&arg, &speed_arg, argi)) {
317  app_input->speed = arg_parse_uint(&arg);
318  if (app_input->speed > 11) {
319  aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
320  }
321  } else if (arg_match(&arg, &aqmode_arg, argi)) {
322  app_input->aq_mode = arg_parse_uint(&arg);
323  } else if (arg_match(&arg, &threads_arg, argi)) {
324  enc_cfg->g_threads = arg_parse_uint(&arg);
325  } else if (arg_match(&arg, &layering_mode_arg, argi)) {
326  app_input->layering_mode = arg_parse_int(&arg);
327  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
328  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
329  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
330  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
331  aom_codec_err_t res = parse_layer_options_from_string(
332  svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
333  svc_params->scaling_factor_den);
334  if (res != AOM_CODEC_OK) {
335  die("Failed to parse scale factors: %s\n",
337  }
338  } else if (arg_match(&arg, &min_q_arg, argi)) {
339  enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
340  } else if (arg_match(&arg, &max_q_arg, argi)) {
341  enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
342 #if CONFIG_AV1_HIGHBITDEPTH
343  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
344  enc_cfg->g_bit_depth =
345  static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
346  switch (enc_cfg->g_bit_depth) {
347  case AOM_BITS_8:
348  enc_cfg->g_input_bit_depth = 8;
349  enc_cfg->g_profile = 0;
350  break;
351  case AOM_BITS_10:
352  enc_cfg->g_input_bit_depth = 10;
353  enc_cfg->g_profile = 0;
354  break;
355  default:
356  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
357  }
358 #endif // CONFIG_VP9_HIGHBITDEPTH
359  } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
360  enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
361  } else if (arg_match(&arg, &error_resilient_arg, argi)) {
362  enc_cfg->g_error_resilient = arg_parse_uint(&arg);
363  if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
364  die("Invalid value for error resilient (0, 1): %d.",
365  enc_cfg->g_error_resilient);
366  } else if (arg_match(&arg, &output_obu_arg, argi)) {
367  app_input->output_obu = arg_parse_uint(&arg);
368  if (app_input->output_obu != 0 && app_input->output_obu != 1)
369  die("Invalid value for obu output flag (0, 1): %d.",
370  app_input->output_obu);
371  } else if (arg_match(&arg, &test_decode_arg, argi)) {
372  app_input->decode = arg_parse_uint(&arg);
373  if (app_input->decode != 0 && app_input->decode != 1)
374  die("Invalid value for test decode flag (0, 1): %d.",
375  app_input->decode);
376  } else if (arg_match(&arg, &tune_content_arg, argi)) {
377  app_input->tune_content = arg_parse_enum_or_int(&arg);
378  printf("tune content %d\n", app_input->tune_content);
379  } else if (arg_match(&arg, &psnr_arg, argi)) {
380  app_input->show_psnr = 1;
381  } else if (arg_match(&arg, &ext_rc_arg, argi)) {
382  app_input->use_external_rc = true;
383  } else {
384  ++argj;
385  }
386  }
387 
388  // Total bitrate needs to be parsed after the number of layers.
389  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
390  arg.argv_step = 1;
391  if (arg_match(&arg, &bitrates_arg, argi)) {
392  aom_codec_err_t res = parse_layer_options_from_string(
393  svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
394  if (res != AOM_CODEC_OK) {
395  die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
396  }
397  } else {
398  ++argj;
399  }
400  }
401 
402  // There will be a space in front of the string options
403  if (strlen(string_options) > 0)
404  strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
405 
406  // Check for unrecognized options
407  for (argi = argv; *argi; ++argi)
408  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
409  die("Error: Unrecognized option %s\n", *argi);
410 
411  if (argv[0] == NULL) {
412  usage_exit();
413  }
414 
415  app_input->input_ctx.filename = argv[0];
416  free(argv);
417 
418  open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN);
419  if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
420  enc_cfg->g_w = app_input->input_ctx.width;
421  enc_cfg->g_h = app_input->input_ctx.height;
422  }
423 
424  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
425  enc_cfg->g_h % 2)
426  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
427 
428  printf(
429  "Codec %s\n"
430  "layers: %d\n"
431  "width %u, height: %u\n"
432  "num: %d, den: %d, bitrate: %u\n"
433  "gop size: %u\n",
435  svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
436  enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
437  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
438 }
439 
440 static int mode_to_num_temporal_layers[12] = {
441  1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
442 };
443 static int mode_to_num_spatial_layers[12] = {
444  1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
445 };
446 
447 // For rate control encoding stats.
448 struct RateControlMetrics {
449  // Number of input frames per layer.
450  int layer_input_frames[AOM_MAX_TS_LAYERS];
451  // Number of encoded non-key frames per layer.
452  int layer_enc_frames[AOM_MAX_TS_LAYERS];
453  // Framerate per layer layer (cumulative).
454  double layer_framerate[AOM_MAX_TS_LAYERS];
455  // Target average frame size per layer (per-frame-bandwidth per layer).
456  double layer_pfb[AOM_MAX_LAYERS];
457  // Actual average frame size per layer.
458  double layer_avg_frame_size[AOM_MAX_LAYERS];
459  // Average rate mismatch per layer (|target - actual| / target).
460  double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
461  // Actual encoding bitrate per layer (cumulative across temporal layers).
462  double layer_encoding_bitrate[AOM_MAX_LAYERS];
463  // Average of the short-time encoder actual bitrate.
464  // TODO(marpan): Should we add these short-time stats for each layer?
465  double avg_st_encoding_bitrate;
466  // Variance of the short-time encoder actual bitrate.
467  double variance_st_encoding_bitrate;
468  // Window (number of frames) for computing short-timee encoding bitrate.
469  int window_size;
470  // Number of window measurements.
471  int window_count;
472  int layer_target_bitrate[AOM_MAX_LAYERS];
473 };
474 
475 static const int REF_FRAMES = 8;
476 
477 static const int INTER_REFS_PER_FRAME = 7;
478 
479 // Reference frames used in this example encoder.
480 enum {
481  SVC_LAST_FRAME = 0,
482  SVC_LAST2_FRAME,
483  SVC_LAST3_FRAME,
484  SVC_GOLDEN_FRAME,
485  SVC_BWDREF_FRAME,
486  SVC_ALTREF2_FRAME,
487  SVC_ALTREF_FRAME
488 };
489 
490 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
491  FILE *f = input_ctx->file;
492  y4m_input *y4m = &input_ctx->y4m;
493  int shortread = 0;
494 
495  if (input_ctx->file_type == FILE_TYPE_Y4M) {
496  if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
497  } else {
498  shortread = read_yuv_frame(input_ctx, img);
499  }
500 
501  return !shortread;
502 }
503 
504 static void close_input_file(struct AvxInputContext *input) {
505  fclose(input->file);
506  if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
507 }
508 
509 // Note: these rate control metrics assume only 1 key frame in the
510 // sequence (i.e., first frame only). So for temporal pattern# 7
511 // (which has key frame for every frame on base layer), the metrics
512 // computation will be off/wrong.
513 // TODO(marpan): Update these metrics to account for multiple key frames
514 // in the stream.
515 static void set_rate_control_metrics(struct RateControlMetrics *rc,
516  double framerate, int ss_number_layers,
517  int ts_number_layers) {
518  int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
519  ts_rate_decimator[0] = 1;
520  if (ts_number_layers == 2) {
521  ts_rate_decimator[0] = 2;
522  ts_rate_decimator[1] = 1;
523  }
524  if (ts_number_layers == 3) {
525  ts_rate_decimator[0] = 4;
526  ts_rate_decimator[1] = 2;
527  ts_rate_decimator[2] = 1;
528  }
529  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
530  // per-frame-bandwidth, for the rate control encoding stats below.
531  for (int sl = 0; sl < ss_number_layers; ++sl) {
532  int i = sl * ts_number_layers;
533  rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
534  rc->layer_pfb[i] =
535  1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
536  for (int tl = 0; tl < ts_number_layers; ++tl) {
537  i = sl * ts_number_layers + tl;
538  if (tl > 0) {
539  rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
540  rc->layer_pfb[i] =
541  1000.0 *
542  (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
543  (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
544  }
545  rc->layer_input_frames[tl] = 0;
546  rc->layer_enc_frames[tl] = 0;
547  rc->layer_encoding_bitrate[i] = 0.0;
548  rc->layer_avg_frame_size[i] = 0.0;
549  rc->layer_avg_rate_mismatch[i] = 0.0;
550  }
551  }
552  rc->window_count = 0;
553  rc->window_size = 15;
554  rc->avg_st_encoding_bitrate = 0.0;
555  rc->variance_st_encoding_bitrate = 0.0;
556 }
557 
558 static void printout_rate_control_summary(struct RateControlMetrics *rc,
559  int frame_cnt, int ss_number_layers,
560  int ts_number_layers) {
561  int tot_num_frames = 0;
562  double perc_fluctuation = 0.0;
563  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
564  printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
565  for (int sl = 0; sl < ss_number_layers; ++sl) {
566  tot_num_frames = 0;
567  for (int tl = 0; tl < ts_number_layers; ++tl) {
568  int i = sl * ts_number_layers + tl;
569  const int num_dropped =
570  tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
571  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
572  tot_num_frames += rc->layer_input_frames[tl];
573  rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
574  rc->layer_encoding_bitrate[i] /
575  tot_num_frames;
576  rc->layer_avg_frame_size[i] =
577  rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
578  rc->layer_avg_rate_mismatch[i] =
579  100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
580  printf("For layer#: %d %d \n", sl, tl);
581  printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
582  rc->layer_encoding_bitrate[i]);
583  printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
584  rc->layer_avg_frame_size[i]);
585  printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
586  printf(
587  "Number of input frames, encoded (non-key) frames, "
588  "and perc dropped frames: %d %d %f\n",
589  rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
590  100.0 * num_dropped / rc->layer_input_frames[tl]);
591  printf("\n");
592  }
593  }
594  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
595  rc->variance_st_encoding_bitrate =
596  rc->variance_st_encoding_bitrate / rc->window_count -
597  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
598  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
599  rc->avg_st_encoding_bitrate;
600  printf("Short-time stats, for window of %d frames:\n", rc->window_size);
601  printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
602  rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
603  perc_fluctuation);
604  if (frame_cnt - 1 != tot_num_frames)
605  die("Error: Number of input frames not equal to output!\n");
606 }
607 
608 // Layer pattern configuration.
609 static void set_layer_pattern(
610  int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
611  aom_svc_ref_frame_config_t *ref_frame_config,
612  aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
613  int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
614  // Setting this flag to 1 enables simplex example of
615  // RPS (Reference Picture Selection) for 1 layer.
616  int use_rps_example = 0;
617  int i;
618  int enable_longterm_temporal_ref = 1;
619  int shift = (layering_mode == 8) ? 2 : 0;
620  int simulcast_mode = (layering_mode == 11);
621  *use_svc_control = 1;
622  layer_id->spatial_layer_id = spatial_layer_id;
623  int lag_index = 0;
624  int base_count = superframe_cnt >> 2;
625  ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
626  ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
627  ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
628  // Set the reference map buffer idx for the 7 references:
629  // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
630  // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
631  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
632  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
633  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
634 
635  if (ksvc_mode) {
636  // Same pattern as case 9, but the reference strucutre will be constrained
637  // below.
638  layering_mode = 9;
639  }
640  switch (layering_mode) {
641  case 0:
642  if (use_rps_example == 0) {
643  // 1-layer: update LAST on every frame, reference LAST.
644  layer_id->temporal_layer_id = 0;
645  layer_id->spatial_layer_id = 0;
646  ref_frame_config->refresh[0] = 1;
647  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
648  } else {
649  // Pattern of 2 references (ALTREF and GOLDEN) trailing
650  // LAST by 4 and 8 frames, with some switching logic to
651  // sometimes only predict from the longer-term reference
652  //(golden here). This is simple example to test RPS
653  // (reference picture selection).
654  int last_idx = 0;
655  int last_idx_refresh = 0;
656  int gld_idx = 0;
657  int alt_ref_idx = 0;
658  int lag_alt = 4;
659  int lag_gld = 8;
660  layer_id->temporal_layer_id = 0;
661  layer_id->spatial_layer_id = 0;
662  int sh = 8; // slots 0 - 7.
663  // Moving index slot for last: 0 - (sh - 1)
664  if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
665  // Moving index for refresh of last: one ahead for next frame.
666  last_idx_refresh = superframe_cnt % sh;
667  // Moving index for gld_ref, lag behind current by lag_gld
668  if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
669  // Moving index for alt_ref, lag behind LAST by lag_alt frames.
670  if (superframe_cnt > lag_alt)
671  alt_ref_idx = (superframe_cnt - lag_alt) % sh;
672  // Set the ref_idx.
673  // Default all references to slot for last.
674  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
675  ref_frame_config->ref_idx[i] = last_idx;
676  // Set the ref_idx for the relevant references.
677  ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
678  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
679  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
680  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
681  // Refresh this slot, which will become LAST on next frame.
682  ref_frame_config->refresh[last_idx_refresh] = 1;
683  // Reference LAST, ALTREF, and GOLDEN
684  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
685  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
686  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
687  // Switch to only GOLDEN every 300 frames.
688  if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
689  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
690  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
691  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
692  // Test if the long-term is LAST instead, this is just a renaming
693  // but its tests if encoder behaves the same, whether its
694  // LAST or GOLDEN.
695  if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
696  ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
697  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
698  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
699  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
700  }
701  }
702  }
703  break;
704  case 1:
705  // 2-temporal layer.
706  // 1 3 5
707  // 0 2 4
708  // Keep golden fixed at slot 3.
709  base_count = superframe_cnt >> 1;
710  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
711  // Cyclically refresh slots 5, 6, 7, for lag alt ref.
712  lag_index = 5;
713  if (base_count > 0) {
714  lag_index = 5 + (base_count % 3);
715  if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
716  }
717  // Set the altref slot to lag_index.
718  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
719  if (superframe_cnt % 2 == 0) {
720  layer_id->temporal_layer_id = 0;
721  // Update LAST on layer 0, reference LAST.
722  ref_frame_config->refresh[0] = 1;
723  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
724  // Refresh lag_index slot, needed for lagging golen.
725  ref_frame_config->refresh[lag_index] = 1;
726  // Refresh GOLDEN every x base layer frames.
727  if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
728  } else {
729  layer_id->temporal_layer_id = 1;
730  // No updates on layer 1, reference LAST (TL0).
731  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
732  }
733  // Always reference golden and altref on TL0.
734  if (layer_id->temporal_layer_id == 0) {
735  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
736  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
737  }
738  break;
739  case 2:
740  // 3-temporal layer:
741  // 1 3 5 7
742  // 2 6
743  // 0 4 8
744  if (superframe_cnt % 4 == 0) {
745  // Base layer.
746  layer_id->temporal_layer_id = 0;
747  // Update LAST on layer 0, reference LAST.
748  ref_frame_config->refresh[0] = 1;
749  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
750  } else if ((superframe_cnt - 1) % 4 == 0) {
751  layer_id->temporal_layer_id = 2;
752  // First top layer: no updates, only reference LAST (TL0).
753  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
754  } else if ((superframe_cnt - 2) % 4 == 0) {
755  layer_id->temporal_layer_id = 1;
756  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
757  ref_frame_config->refresh[1] = 1;
758  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
759  } else if ((superframe_cnt - 3) % 4 == 0) {
760  layer_id->temporal_layer_id = 2;
761  // Second top layer: no updates, only reference LAST.
762  // Set buffer idx for LAST to slot 1, since that was the slot
763  // updated in previous frame. So LAST is TL1 frame.
764  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
765  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
766  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
767  }
768  break;
769  case 3:
770  // 3 TL, same as above, except allow for predicting
771  // off 2 more references (GOLDEN and ALTREF), with
772  // GOLDEN updated periodically, and ALTREF lagging from
773  // LAST from ~4 frames. Both GOLDEN and ALTREF
774  // can only be updated on base temporal layer.
775 
776  // Keep golden fixed at slot 3.
777  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
778  // Cyclically refresh slots 5, 6, 7, for lag altref.
779  lag_index = 5;
780  if (base_count > 0) {
781  lag_index = 5 + (base_count % 3);
782  if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
783  }
784  // Set the altref slot to lag_index.
785  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
786  if (superframe_cnt % 4 == 0) {
787  // Base layer.
788  layer_id->temporal_layer_id = 0;
789  // Update LAST on layer 0, reference LAST.
790  ref_frame_config->refresh[0] = 1;
791  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
792  // Refresh GOLDEN every x ~10 base layer frames.
793  if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
794  // Refresh lag_index slot, needed for lagging altref.
795  ref_frame_config->refresh[lag_index] = 1;
796  } else if ((superframe_cnt - 1) % 4 == 0) {
797  layer_id->temporal_layer_id = 2;
798  // First top layer: no updates, only reference LAST (TL0).
799  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
800  } else if ((superframe_cnt - 2) % 4 == 0) {
801  layer_id->temporal_layer_id = 1;
802  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
803  ref_frame_config->refresh[1] = 1;
804  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
805  } else if ((superframe_cnt - 3) % 4 == 0) {
806  layer_id->temporal_layer_id = 2;
807  // Second top layer: no updates, only reference LAST.
808  // Set buffer idx for LAST to slot 1, since that was the slot
809  // updated in previous frame. So LAST is TL1 frame.
810  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
811  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
812  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
813  }
814  // Every frame can reference GOLDEN AND ALTREF.
815  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
816  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
817  // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
818  if (speed >= 7) {
819  ref_frame_comp_pred->use_comp_pred[2] = 1;
820  ref_frame_comp_pred->use_comp_pred[0] = 1;
821  }
822  break;
823  case 4:
824  // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
825  // only reference GF (not LAST). Other frames only reference LAST.
826  // 1 3 5 7
827  // 2 6
828  // 0 4 8
829  if (superframe_cnt % 4 == 0) {
830  // Base layer.
831  layer_id->temporal_layer_id = 0;
832  // Update LAST on layer 0, only reference LAST.
833  ref_frame_config->refresh[0] = 1;
834  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
835  } else if ((superframe_cnt - 1) % 4 == 0) {
836  layer_id->temporal_layer_id = 2;
837  // First top layer: no updates, only reference LAST (TL0).
838  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
839  } else if ((superframe_cnt - 2) % 4 == 0) {
840  layer_id->temporal_layer_id = 1;
841  // Middle layer (TL1): update GF, only reference LAST (TL0).
842  ref_frame_config->refresh[3] = 1;
843  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
844  } else if ((superframe_cnt - 3) % 4 == 0) {
845  layer_id->temporal_layer_id = 2;
846  // Second top layer: no updates, only reference GF.
847  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
848  }
849  break;
850  case 5:
851  // 2 spatial layers, 1 temporal.
852  layer_id->temporal_layer_id = 0;
853  if (layer_id->spatial_layer_id == 0) {
854  // Reference LAST, update LAST.
855  ref_frame_config->refresh[0] = 1;
856  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
857  } else if (layer_id->spatial_layer_id == 1) {
858  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
859  // and GOLDEN to slot 0. Update slot 1 (LAST).
860  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
861  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
862  ref_frame_config->refresh[1] = 1;
863  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
864  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
865  }
866  break;
867  case 6:
868  // 3 spatial layers, 1 temporal.
869  // Note for this case, we set the buffer idx for all references to be
870  // either LAST or GOLDEN, which are always valid references, since decoder
871  // will check if any of the 7 references is valid scale in
872  // valid_ref_frame_size().
873  layer_id->temporal_layer_id = 0;
874  if (layer_id->spatial_layer_id == 0) {
875  // Reference LAST, update LAST. Set all buffer_idx to 0.
876  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
877  ref_frame_config->ref_idx[i] = 0;
878  ref_frame_config->refresh[0] = 1;
879  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
880  } else if (layer_id->spatial_layer_id == 1) {
881  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
882  // and GOLDEN (and all other refs) to slot 0.
883  // Update slot 1 (LAST).
884  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
885  ref_frame_config->ref_idx[i] = 0;
886  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
887  ref_frame_config->refresh[1] = 1;
888  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
889  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
890  } else if (layer_id->spatial_layer_id == 2) {
891  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
892  // and GOLDEN (and all other refs) to slot 1.
893  // Update slot 2 (LAST).
894  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
895  ref_frame_config->ref_idx[i] = 1;
896  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
897  ref_frame_config->refresh[2] = 1;
898  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
899  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
900  // For 3 spatial layer case: allow for top spatial layer to use
901  // additional temporal reference. Update every 10 frames.
902  if (enable_longterm_temporal_ref) {
903  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
904  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
905  if (base_count % 10 == 0)
906  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
907  }
908  }
909  break;
910  case 7:
911  // 2 spatial and 3 temporal layer.
912  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
913  if (superframe_cnt % 4 == 0) {
914  // Base temporal layer
915  layer_id->temporal_layer_id = 0;
916  if (layer_id->spatial_layer_id == 0) {
917  // Reference LAST, update LAST
918  // Set all buffer_idx to 0
919  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
920  ref_frame_config->ref_idx[i] = 0;
921  ref_frame_config->refresh[0] = 1;
922  } else if (layer_id->spatial_layer_id == 1) {
923  // Reference LAST and GOLDEN.
924  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
925  ref_frame_config->ref_idx[i] = 0;
926  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
927  ref_frame_config->refresh[1] = 1;
928  }
929  } else if ((superframe_cnt - 1) % 4 == 0) {
930  // First top temporal enhancement layer.
931  layer_id->temporal_layer_id = 2;
932  if (layer_id->spatial_layer_id == 0) {
933  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
934  ref_frame_config->ref_idx[i] = 0;
935  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
936  ref_frame_config->refresh[3] = 1;
937  } else if (layer_id->spatial_layer_id == 1) {
938  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
939  // GOLDEN (and all other refs) to slot 3.
940  // No update.
941  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
942  ref_frame_config->ref_idx[i] = 3;
943  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
944  }
945  } else if ((superframe_cnt - 2) % 4 == 0) {
946  // Middle temporal enhancement layer.
947  layer_id->temporal_layer_id = 1;
948  if (layer_id->spatial_layer_id == 0) {
949  // Reference LAST.
950  // Set all buffer_idx to 0.
951  // Set GOLDEN to slot 5 and update slot 5.
952  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
953  ref_frame_config->ref_idx[i] = 0;
954  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
955  ref_frame_config->refresh[5 - shift] = 1;
956  } else if (layer_id->spatial_layer_id == 1) {
957  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
958  // GOLDEN (and all other refs) to slot 5.
959  // Set LAST3 to slot 6 and update slot 6.
960  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
961  ref_frame_config->ref_idx[i] = 5 - shift;
962  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
963  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
964  ref_frame_config->refresh[6 - shift] = 1;
965  }
966  } else if ((superframe_cnt - 3) % 4 == 0) {
967  // Second top temporal enhancement layer.
968  layer_id->temporal_layer_id = 2;
969  if (layer_id->spatial_layer_id == 0) {
970  // Set LAST to slot 5 and reference LAST.
971  // Set GOLDEN to slot 3 and update slot 3.
972  // Set all other buffer_idx to 0.
973  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
974  ref_frame_config->ref_idx[i] = 0;
975  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
976  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
977  ref_frame_config->refresh[3] = 1;
978  } else if (layer_id->spatial_layer_id == 1) {
979  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
980  // GOLDEN to slot 3. No update.
981  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
982  ref_frame_config->ref_idx[i] = 0;
983  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
984  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
985  }
986  }
987  break;
988  case 8:
989  // 3 spatial and 3 temporal layer.
990  // Same as case 9 but overalap in the buffer slot updates.
991  // (shift = 2). The slots 3 and 4 updated by first TL2 are
992  // reused for update in TL1 superframe.
993  // Note for this case, frame order hint must be disabled for
994  // lower resolutios (operating points > 0) to be decoedable.
995  case 9:
996  // 3 spatial and 3 temporal layer.
997  // No overlap in buffer updates between TL2 and TL1.
998  // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
999  // Set the references via the svc_ref_frame_config control.
1000  // Always reference LAST.
1001  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1002  if (superframe_cnt % 4 == 0) {
1003  // Base temporal layer.
1004  layer_id->temporal_layer_id = 0;
1005  if (layer_id->spatial_layer_id == 0) {
1006  // Reference LAST, update LAST.
1007  // Set all buffer_idx to 0.
1008  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1009  ref_frame_config->ref_idx[i] = 0;
1010  ref_frame_config->refresh[0] = 1;
1011  } else if (layer_id->spatial_layer_id == 1) {
1012  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1013  // GOLDEN (and all other refs) to slot 0.
1014  // Update slot 1 (LAST).
1015  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1016  ref_frame_config->ref_idx[i] = 0;
1017  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1018  ref_frame_config->refresh[1] = 1;
1019  } else if (layer_id->spatial_layer_id == 2) {
1020  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1021  // GOLDEN (and all other refs) to slot 1.
1022  // Update slot 2 (LAST).
1023  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1024  ref_frame_config->ref_idx[i] = 1;
1025  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1026  ref_frame_config->refresh[2] = 1;
1027  }
1028  } else if ((superframe_cnt - 1) % 4 == 0) {
1029  // First top temporal enhancement layer.
1030  layer_id->temporal_layer_id = 2;
1031  if (layer_id->spatial_layer_id == 0) {
1032  // Reference LAST (slot 0).
1033  // Set GOLDEN to slot 3 and update slot 3.
1034  // Set all other buffer_idx to slot 0.
1035  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1036  ref_frame_config->ref_idx[i] = 0;
1037  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1038  ref_frame_config->refresh[3] = 1;
1039  } else if (layer_id->spatial_layer_id == 1) {
1040  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1041  // GOLDEN (and all other refs) to slot 3.
1042  // Set LAST2 to slot 4 and Update slot 4.
1043  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1044  ref_frame_config->ref_idx[i] = 3;
1045  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1046  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1047  ref_frame_config->refresh[4] = 1;
1048  } else if (layer_id->spatial_layer_id == 2) {
1049  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1050  // GOLDEN (and all other refs) to slot 4.
1051  // No update.
1052  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1053  ref_frame_config->ref_idx[i] = 4;
1054  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1055  }
1056  } else if ((superframe_cnt - 2) % 4 == 0) {
1057  // Middle temporal enhancement layer.
1058  layer_id->temporal_layer_id = 1;
1059  if (layer_id->spatial_layer_id == 0) {
1060  // Reference LAST.
1061  // Set all buffer_idx to 0.
1062  // Set GOLDEN to slot 5 and update slot 5.
1063  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1064  ref_frame_config->ref_idx[i] = 0;
1065  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1066  ref_frame_config->refresh[5 - shift] = 1;
1067  } else if (layer_id->spatial_layer_id == 1) {
1068  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1069  // GOLDEN (and all other refs) to slot 5.
1070  // Set LAST3 to slot 6 and update slot 6.
1071  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1072  ref_frame_config->ref_idx[i] = 5 - shift;
1073  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1074  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1075  ref_frame_config->refresh[6 - shift] = 1;
1076  } else if (layer_id->spatial_layer_id == 2) {
1077  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1078  // GOLDEN (and all other refs) to slot 6.
1079  // Set LAST3 to slot 7 and update slot 7.
1080  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1081  ref_frame_config->ref_idx[i] = 6 - shift;
1082  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1083  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1084  ref_frame_config->refresh[7 - shift] = 1;
1085  }
1086  } else if ((superframe_cnt - 3) % 4 == 0) {
1087  // Second top temporal enhancement layer.
1088  layer_id->temporal_layer_id = 2;
1089  if (layer_id->spatial_layer_id == 0) {
1090  // Set LAST to slot 5 and reference LAST.
1091  // Set GOLDEN to slot 3 and update slot 3.
1092  // Set all other buffer_idx to 0.
1093  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1094  ref_frame_config->ref_idx[i] = 0;
1095  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1096  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1097  ref_frame_config->refresh[3] = 1;
1098  } else if (layer_id->spatial_layer_id == 1) {
1099  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1100  // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1101  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1102  ref_frame_config->ref_idx[i] = 0;
1103  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1104  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1105  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1106  ref_frame_config->refresh[4] = 1;
1107  } else if (layer_id->spatial_layer_id == 2) {
1108  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1109  // GOLDEN to slot 4. No update.
1110  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1111  ref_frame_config->ref_idx[i] = 0;
1112  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1113  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1114  }
1115  }
1116  break;
1117  case 11:
1118  // Simulcast mode for 3 spatial and 3 temporal layers.
1119  // No inter-layer predicton, only prediction is temporal and single
1120  // reference (LAST).
1121  // No overlap in buffer slots between spatial layers. So for example,
1122  // SL0 only uses slots 0 and 1.
1123  // SL1 only uses slots 2 and 3.
1124  // SL2 only uses slots 4 and 5.
1125  // All 7 references for each inter-frame must only access buffer slots
1126  // for that spatial layer.
1127  // On key (super)frames: SL1 and SL2 must have no references set
1128  // and must refresh all the slots for that layer only (so 2 and 3
1129  // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1130  // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1131  // internally as Intra-only frames that allow that stream to be decoded.
1132  // These conditions will allow for each spatial stream to be
1133  // independently decodeable.
1134 
1135  // Initialize all references to 0 (don't use reference).
1136  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1137  ref_frame_config->reference[i] = 0;
1138  // Initialize as no refresh/update for all slots.
1139  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1140  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1141  ref_frame_config->ref_idx[i] = 0;
1142 
1143  if (is_key_frame) {
1144  if (layer_id->spatial_layer_id == 0) {
1145  // Assign LAST/GOLDEN to slot 0/1.
1146  // Refesh slots 0 and 1 for SL0.
1147  // SL0: this will get set to KEY frame internally.
1148  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1149  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1150  ref_frame_config->refresh[0] = 1;
1151  ref_frame_config->refresh[1] = 1;
1152  } else if (layer_id->spatial_layer_id == 1) {
1153  // Assign LAST/GOLDEN to slot 2/3.
1154  // Refesh slots 2 and 3 for SL1.
1155  // This will get set to Intra-only frame internally.
1156  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1157  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1158  ref_frame_config->refresh[2] = 1;
1159  ref_frame_config->refresh[3] = 1;
1160  } else if (layer_id->spatial_layer_id == 2) {
1161  // Assign LAST/GOLDEN to slot 4/5.
1162  // Refresh slots 4 and 5 for SL2.
1163  // This will get set to Intra-only frame internally.
1164  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1165  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1166  ref_frame_config->refresh[4] = 1;
1167  ref_frame_config->refresh[5] = 1;
1168  }
1169  } else if (superframe_cnt % 4 == 0) {
1170  // Base temporal layer: TL0
1171  layer_id->temporal_layer_id = 0;
1172  if (layer_id->spatial_layer_id == 0) { // SL0
1173  // Reference LAST. Assign all references to either slot
1174  // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1175  // Update slot 0 (LAST).
1176  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1177  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1178  ref_frame_config->ref_idx[i] = 1;
1179  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1180  ref_frame_config->refresh[0] = 1;
1181  } else if (layer_id->spatial_layer_id == 1) { // SL1
1182  // Reference LAST. Assign all references to either slot
1183  // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1184  // Update slot 2 (LAST).
1185  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1186  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1187  ref_frame_config->ref_idx[i] = 3;
1188  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1189  ref_frame_config->refresh[2] = 1;
1190  } else if (layer_id->spatial_layer_id == 2) { // SL2
1191  // Reference LAST. Assign all references to either slot
1192  // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1193  // Update slot 4 (LAST).
1194  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1195  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1196  ref_frame_config->ref_idx[i] = 5;
1197  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1198  ref_frame_config->refresh[4] = 1;
1199  }
1200  } else if ((superframe_cnt - 1) % 4 == 0) {
1201  // First top temporal enhancement layer: TL2
1202  layer_id->temporal_layer_id = 2;
1203  if (layer_id->spatial_layer_id == 0) { // SL0
1204  // Reference LAST (slot 0). Assign other references to slot 1.
1205  // No update/refresh on any slots.
1206  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1207  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1208  ref_frame_config->ref_idx[i] = 1;
1209  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1210  } else if (layer_id->spatial_layer_id == 1) { // SL1
1211  // Reference LAST (slot 2). Assign other references to slot 3.
1212  // No update/refresh on any slots.
1213  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1214  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1215  ref_frame_config->ref_idx[i] = 3;
1216  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1217  } else if (layer_id->spatial_layer_id == 2) { // SL2
1218  // Reference LAST (slot 4). Assign other references to slot 4.
1219  // No update/refresh on any slots.
1220  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1221  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1222  ref_frame_config->ref_idx[i] = 5;
1223  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1224  }
1225  } else if ((superframe_cnt - 2) % 4 == 0) {
1226  // Middle temporal enhancement layer: TL1
1227  layer_id->temporal_layer_id = 1;
1228  if (layer_id->spatial_layer_id == 0) { // SL0
1229  // Reference LAST (slot 0).
1230  // Set GOLDEN to slot 1 and update slot 1.
1231  // This will be used as reference for next TL2.
1232  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1233  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1234  ref_frame_config->ref_idx[i] = 1;
1235  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1236  ref_frame_config->refresh[1] = 1;
1237  } else if (layer_id->spatial_layer_id == 1) { // SL1
1238  // Reference LAST (slot 2).
1239  // Set GOLDEN to slot 3 and update slot 3.
1240  // This will be used as reference for next TL2.
1241  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1242  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1243  ref_frame_config->ref_idx[i] = 3;
1244  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1245  ref_frame_config->refresh[3] = 1;
1246  } else if (layer_id->spatial_layer_id == 2) { // SL2
1247  // Reference LAST (slot 4).
1248  // Set GOLDEN to slot 5 and update slot 5.
1249  // This will be used as reference for next TL2.
1250  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1251  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1252  ref_frame_config->ref_idx[i] = 5;
1253  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1254  ref_frame_config->refresh[5] = 1;
1255  }
1256  } else if ((superframe_cnt - 3) % 4 == 0) {
1257  // Second top temporal enhancement layer: TL2
1258  layer_id->temporal_layer_id = 2;
1259  if (layer_id->spatial_layer_id == 0) { // SL0
1260  // Reference LAST (slot 1). Assign other references to slot 0.
1261  // No update/refresh on any slots.
1262  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1263  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1264  ref_frame_config->ref_idx[i] = 0;
1265  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1266  } else if (layer_id->spatial_layer_id == 1) { // SL1
1267  // Reference LAST (slot 3). Assign other references to slot 2.
1268  // No update/refresh on any slots.
1269  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1270  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1271  ref_frame_config->ref_idx[i] = 2;
1272  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1273  } else if (layer_id->spatial_layer_id == 2) { // SL2
1274  // Reference LAST (slot 5). Assign other references to slot 4.
1275  // No update/refresh on any slots.
1276  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1277  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1278  ref_frame_config->ref_idx[i] = 4;
1279  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1280  }
1281  }
1282  if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1283  // Always reference GOLDEN (inter-layer prediction).
1284  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1285  if (ksvc_mode) {
1286  // KSVC: only keep the inter-layer reference (GOLDEN) for
1287  // superframes whose base is key.
1288  if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1289  }
1290  if (is_key_frame && layer_id->spatial_layer_id > 1) {
1291  // On superframes whose base is key: remove LAST to avoid prediction
1292  // off layer two levels below.
1293  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1294  }
1295  }
1296  // For 3 spatial layer case 8 (where there is free buffer slot):
1297  // allow for top spatial layer to use additional temporal reference.
1298  // Additional reference is only updated on base temporal layer, every
1299  // 10 TL0 frames here.
1300  if (!simulcast_mode && enable_longterm_temporal_ref &&
1301  layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1302  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1303  if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1304  if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1305  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1306  }
1307  break;
1308  default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1309  }
1310 }
1311 
1312 #if CONFIG_AV1_DECODER
1313 // Returns whether there is a mismatch between the encoder's new frame and the
1314 // decoder's new frame.
1315 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1316  const int frames_out) {
1317  aom_image_t enc_img, dec_img;
1318  int mismatch = 0;
1319 
1320  /* Get the internal new frame */
1323 
1324 #if CONFIG_AV1_HIGHBITDEPTH
1325  if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1326  (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1327  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1328  aom_image_t enc_hbd_img;
1329  aom_img_alloc(
1330  &enc_hbd_img,
1331  static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1332  enc_img.d_w, enc_img.d_h, 16);
1333  aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1334  enc_img = enc_hbd_img;
1335  }
1336  if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1337  aom_image_t dec_hbd_img;
1338  aom_img_alloc(
1339  &dec_hbd_img,
1340  static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1341  dec_img.d_w, dec_img.d_h, 16);
1342  aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1343  dec_img = dec_hbd_img;
1344  }
1345  }
1346 #endif
1347 
1348  if (!aom_compare_img(&enc_img, &dec_img)) {
1349  int y[4], u[4], v[4];
1350 #if CONFIG_AV1_HIGHBITDEPTH
1351  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1352  aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1353  } else {
1354  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1355  }
1356 #else
1357  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1358 #endif
1359  fprintf(stderr,
1360  "Encode/decode mismatch on frame %d at"
1361  " Y[%d, %d] {%d/%d},"
1362  " U[%d, %d] {%d/%d},"
1363  " V[%d, %d] {%d/%d}\n",
1364  frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1365  v[1], v[2], v[3]);
1366  mismatch = 1;
1367  }
1368 
1369  aom_img_free(&enc_img);
1370  aom_img_free(&dec_img);
1371  return mismatch;
1372 }
1373 #endif // CONFIG_AV1_DECODER
1374 
1375 struct psnr_stats {
1376  // The second element of these arrays is reserved for high bitdepth.
1377  uint64_t psnr_sse_total[2];
1378  uint64_t psnr_samples_total[2];
1379  double psnr_totals[2][4];
1380  int psnr_count[2];
1381 };
1382 
1383 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1384  double ovpsnr;
1385 
1386  if (!psnr_stream->psnr_count[0]) return;
1387 
1388  fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1389  ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1390  (double)psnr_stream->psnr_sse_total[0]);
1391  fprintf(stderr, " %.3f", ovpsnr);
1392 
1393  for (int i = 0; i < 4; i++) {
1394  fprintf(stderr, " %.3f",
1395  psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1396  }
1397  fprintf(stderr, "\n");
1398 }
1399 
1400 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1401  const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1402  aom::AV1RateControlRtcConfig rc_cfg;
1403  rc_cfg.width = cfg.g_w;
1404  rc_cfg.height = cfg.g_h;
1405  rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1406  rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1407  rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1408  rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1409  rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1410  rc_cfg.buf_sz = cfg.rc_buf_sz;
1411  rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1412  rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1413  // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1414  rc_cfg.max_intra_bitrate_pct = 300;
1415  rc_cfg.framerate = cfg.g_timebase.den;
1416  // TODO(jianj): Add suppor for SVC.
1417  rc_cfg.ss_number_layers = 1;
1418  rc_cfg.ts_number_layers = 1;
1419  rc_cfg.scaling_factor_num[0] = 1;
1420  rc_cfg.scaling_factor_den[0] = 1;
1421  rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1422  rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1423  rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1424  rc_cfg.aq_mode = app_input.aq_mode;
1425 
1426  return rc_cfg;
1427 }
1428 
1429 static int qindex_to_quantizer(int qindex) {
1430  // Table that converts 0-63 range Q values passed in outside to the 0-255
1431  // range Qindex used internally.
1432  static const int quantizer_to_qindex[] = {
1433  0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1434  52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1435  104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1436  156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1437  208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1438  };
1439  for (int quantizer = 0; quantizer < 64; ++quantizer)
1440  if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1441 
1442  return 63;
1443 }
1444 
1445 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1446  aom_codec_ctx_t *codec, int frame_cnt) {
1447  aom_active_map_t map = { 0, 0, 0 };
1448 
1449  map.rows = (cfg->g_h + 15) / 16;
1450  map.cols = (cfg->g_w + 15) / 16;
1451 
1452  map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1453  if (!map.active_map) die("Failed to allocate active map");
1454 
1455  // Example map for testing.
1456  for (unsigned int i = 0; i < map.rows; ++i) {
1457  for (unsigned int j = 0; j < map.cols; ++j) {
1458  int index = map.cols * i + j;
1459  map.active_map[index] = 1;
1460  if (frame_cnt < 300) {
1461  if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1462  } else if (frame_cnt >= 300) {
1463  if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1464  }
1465  }
1466  }
1467 
1468  if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1469  die_codec(codec, "Failed to set active map");
1470 
1471  free(map.active_map);
1472 }
1473 
1474 int main(int argc, const char **argv) {
1475  AppInput app_input;
1476  AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1477  FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1478  AvxVideoWriter *total_layer_file = NULL;
1479  FILE *total_layer_obu_file = NULL;
1480  aom_codec_enc_cfg_t cfg;
1481  int frame_cnt = 0;
1482  aom_image_t raw;
1483  int frame_avail;
1484  int got_data = 0;
1485  int flags = 0;
1486  int i;
1487  int pts = 0; // PTS starts at 0.
1488  int frame_duration = 1; // 1 timebase tick per frame.
1489  aom_svc_layer_id_t layer_id;
1490  aom_svc_params_t svc_params;
1491  aom_svc_ref_frame_config_t ref_frame_config;
1492  aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1493 
1494 #if CONFIG_INTERNAL_STATS
1495  FILE *stats_file = fopen("opsnr.stt", "a");
1496  if (stats_file == NULL) {
1497  die("Cannot open opsnr.stt\n");
1498  }
1499 #endif
1500 #if CONFIG_AV1_DECODER
1501  aom_codec_ctx_t decoder;
1502 #endif
1503 
1504  struct RateControlMetrics rc;
1505  int64_t cx_time = 0;
1506  int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1507  int frame_cnt_layer[AOM_MAX_LAYERS];
1508  double sum_bitrate = 0.0;
1509  double sum_bitrate2 = 0.0;
1510  double framerate = 30.0;
1511  int use_svc_control = 1;
1512  int set_err_resil_frame = 0;
1513  int test_changing_bitrate = 0;
1514  zero(rc.layer_target_bitrate);
1515  memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1516  memset(&app_input, 0, sizeof(AppInput));
1517  memset(&svc_params, 0, sizeof(svc_params));
1518 
1519  // Flag to test dynamic scaling of source frames for single
1520  // spatial stream, using the scaling_mode control.
1521  const int test_dynamic_scaling_single_layer = 0;
1522 
1523  // Flag to test setting speed per layer.
1524  const int test_speed_per_layer = 0;
1525 
1526  // Flag for testing active maps.
1527  const int test_active_maps = 0;
1528 
1529  /* Setup default input stream settings */
1530  app_input.input_ctx.framerate.numerator = 30;
1531  app_input.input_ctx.framerate.denominator = 1;
1532  app_input.input_ctx.only_i420 = 0;
1533  app_input.input_ctx.bit_depth = AOM_BITS_8;
1534  app_input.speed = 7;
1535  exec_name = argv[0];
1536 
1537  // start with default encoder configuration
1540  if (res != AOM_CODEC_OK) {
1541  die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1542  }
1543 
1544  // Real time parameters.
1546 
1547  cfg.rc_end_usage = AOM_CBR;
1548  cfg.rc_min_quantizer = 2;
1549  cfg.rc_max_quantizer = 52;
1550  cfg.rc_undershoot_pct = 50;
1551  cfg.rc_overshoot_pct = 50;
1552  cfg.rc_buf_initial_sz = 600;
1553  cfg.rc_buf_optimal_sz = 600;
1554  cfg.rc_buf_sz = 1000;
1555  cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1556  cfg.g_lag_in_frames = 0;
1557  cfg.kf_mode = AOM_KF_AUTO;
1558 
1559  parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1560 
1561  int ts_number_layers = svc_params.number_temporal_layers;
1562  int ss_number_layers = svc_params.number_spatial_layers;
1563 
1564  unsigned int width = cfg.g_w;
1565  unsigned int height = cfg.g_h;
1566 
1567  if (app_input.layering_mode >= 0) {
1568  if (ts_number_layers !=
1569  mode_to_num_temporal_layers[app_input.layering_mode] ||
1570  ss_number_layers !=
1571  mode_to_num_spatial_layers[app_input.layering_mode]) {
1572  die("Number of layers doesn't match layering mode.");
1573  }
1574  }
1575 
1576  // Y4M reader has its own allocation.
1577  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1578  if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1579  die("Failed to allocate image (%dx%d)", width, height);
1580  }
1581  }
1582 
1583  aom_codec_iface_t *encoder = aom_codec_av1_cx();
1584 
1585  memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1586  sizeof(svc_params.layer_target_bitrate));
1587 
1588  unsigned int total_rate = 0;
1589  for (i = 0; i < ss_number_layers; i++) {
1590  total_rate +=
1591  svc_params
1592  .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1593  }
1594  if (total_rate != cfg.rc_target_bitrate) {
1595  die("Incorrect total target bitrate");
1596  }
1597 
1598  svc_params.framerate_factor[0] = 1;
1599  if (ts_number_layers == 2) {
1600  svc_params.framerate_factor[0] = 2;
1601  svc_params.framerate_factor[1] = 1;
1602  } else if (ts_number_layers == 3) {
1603  svc_params.framerate_factor[0] = 4;
1604  svc_params.framerate_factor[1] = 2;
1605  svc_params.framerate_factor[2] = 1;
1606  }
1607 
1608  if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1609  // Override these settings with the info from Y4M file.
1610  cfg.g_w = app_input.input_ctx.width;
1611  cfg.g_h = app_input.input_ctx.height;
1612  // g_timebase is the reciprocal of frame rate.
1613  cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
1614  cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
1615  }
1616  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1617  set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1618 
1619  AvxVideoInfo info;
1620  info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1621  info.frame_width = cfg.g_w;
1622  info.frame_height = cfg.g_h;
1623  info.time_base.numerator = cfg.g_timebase.num;
1624  info.time_base.denominator = cfg.g_timebase.den;
1625  // Open an output file for each stream.
1626  for (int sl = 0; sl < ss_number_layers; ++sl) {
1627  for (int tl = 0; tl < ts_number_layers; ++tl) {
1628  i = sl * ts_number_layers + tl;
1629  char file_name[PATH_MAX];
1630  snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1631  app_input.output_filename, i);
1632  if (app_input.output_obu) {
1633  obu_files[i] = fopen(file_name, "wb");
1634  if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1635  } else {
1636  outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1637  if (!outfile[i]) die("Failed to open %s for writing", file_name);
1638  }
1639  }
1640  }
1641  if (app_input.output_obu) {
1642  total_layer_obu_file = fopen(app_input.output_filename, "wb");
1643  if (!total_layer_obu_file)
1644  die("Failed to open %s for writing", app_input.output_filename);
1645  } else {
1646  total_layer_file =
1647  aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1648  if (!total_layer_file)
1649  die("Failed to open %s for writing", app_input.output_filename);
1650  }
1651 
1652  // Initialize codec.
1653  aom_codec_ctx_t codec;
1654  aom_codec_flags_t flag = 0;
1656  flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1657  if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1658  die_codec(&codec, "Failed to initialize encoder");
1659 
1660 #if CONFIG_AV1_DECODER
1661  if (app_input.decode) {
1662  if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1663  die_codec(&decoder, "Failed to initialize decoder");
1664  }
1665 #endif
1666 
1667  aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1668  aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1683 
1684  // Settings to reduce key frame encoding time.
1690 
1691  if (cfg.g_threads > 1) {
1693  (unsigned int)log2(cfg.g_threads));
1694  }
1695 
1696  aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1697  if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1700  // INTRABC is currently disabled for rt mode, as it's too slow.
1702  }
1703 
1704  if (app_input.use_external_rc) {
1706  }
1707 
1709 
1712 
1713  svc_params.number_spatial_layers = ss_number_layers;
1714  svc_params.number_temporal_layers = ts_number_layers;
1715  for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1716  svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1717  svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1718  }
1719  for (i = 0; i < ss_number_layers; ++i) {
1720  svc_params.scaling_factor_num[i] = 1;
1721  svc_params.scaling_factor_den[i] = 1;
1722  }
1723  if (ss_number_layers == 2) {
1724  svc_params.scaling_factor_num[0] = 1;
1725  svc_params.scaling_factor_den[0] = 2;
1726  } else if (ss_number_layers == 3) {
1727  svc_params.scaling_factor_num[0] = 1;
1728  svc_params.scaling_factor_den[0] = 4;
1729  svc_params.scaling_factor_num[1] = 1;
1730  svc_params.scaling_factor_den[1] = 2;
1731  }
1732  aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1733  // TODO(aomedia:3032): Configure KSVC in fixed mode.
1734 
1735  // This controls the maximum target size of the key frame.
1736  // For generating smaller key frames, use a smaller max_intra_size_pct
1737  // value, like 100 or 200.
1738  {
1739  const int max_intra_size_pct = 300;
1741  max_intra_size_pct);
1742  }
1743 
1744  for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1745  cx_time_layer[lx] = 0;
1746  frame_cnt_layer[lx] = 0;
1747  }
1748 
1749  std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1750  if (app_input.use_external_rc) {
1751  const aom::AV1RateControlRtcConfig rc_cfg =
1752  create_rtc_rc_config(cfg, app_input);
1753  rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1754  }
1755 
1756  frame_avail = 1;
1757  struct psnr_stats psnr_stream;
1758  memset(&psnr_stream, 0, sizeof(psnr_stream));
1759  while (frame_avail || got_data) {
1760  struct aom_usec_timer timer;
1761  frame_avail = read_frame(&(app_input.input_ctx), &raw);
1762  // Loop over spatial layers.
1763  for (int slx = 0; slx < ss_number_layers; slx++) {
1764  aom_codec_iter_t iter = NULL;
1765  const aom_codec_cx_pkt_t *pkt;
1766  int layer = 0;
1767  // Flag for superframe whose base is key.
1768  int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
1769  // For flexible mode:
1770  if (app_input.layering_mode >= 0) {
1771  // Set the reference/update flags, layer_id, and reference_map
1772  // buffer index.
1773  set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1774  &ref_frame_config, &ref_frame_comp_pred,
1775  &use_svc_control, slx, is_key_frame,
1776  (app_input.layering_mode == 10), app_input.speed);
1777  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1778  if (use_svc_control) {
1780  &ref_frame_config);
1782  &ref_frame_comp_pred);
1783  }
1784  // Set the speed per layer.
1785  if (test_speed_per_layer) {
1786  int speed_per_layer = 10;
1787  if (layer_id.spatial_layer_id == 0) {
1788  if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
1789  if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
1790  if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
1791  } else if (layer_id.spatial_layer_id == 1) {
1792  if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
1793  if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
1794  if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
1795  } else if (layer_id.spatial_layer_id == 2) {
1796  if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
1797  if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
1798  if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
1799  }
1800  aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
1801  }
1802  } else {
1803  // Only up to 3 temporal layers supported in fixed mode.
1804  // Only need to set spatial and temporal layer_id: reference
1805  // prediction, refresh, and buffer_idx are set internally.
1806  layer_id.spatial_layer_id = slx;
1807  layer_id.temporal_layer_id = 0;
1808  if (ts_number_layers == 2) {
1809  layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
1810  } else if (ts_number_layers == 3) {
1811  if (frame_cnt % 2 != 0)
1812  layer_id.temporal_layer_id = 2;
1813  else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
1814  layer_id.temporal_layer_id = 1;
1815  }
1816  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1817  }
1818 
1819  if (set_err_resil_frame && cfg.g_error_resilient == 0) {
1820  // Set error_resilient per frame: off/0 for base layer and
1821  // on/1 for enhancement layer frames.
1822  // Note that this is can only be done on the fly/per-frame/layer
1823  // if the config error_resilience is off/0. See the logic for updating
1824  // in set_encoder_config():
1825  // tool_cfg->error_resilient_mode =
1826  // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
1827  const int err_resil_mode =
1828  layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
1830  err_resil_mode);
1831  }
1832 
1833  layer = slx * ts_number_layers + layer_id.temporal_layer_id;
1834  if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1835 
1836  if (test_dynamic_scaling_single_layer) {
1837  // Example to scale source down by 2x2, then 4x4, and then back up to
1838  // 2x2, and then back to original.
1839  int frame_2x2 = 200;
1840  int frame_4x4 = 400;
1841  int frame_2x2up = 600;
1842  int frame_orig = 800;
1843  if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
1844  // Scale source down by 2x2.
1845  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1846  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1847  } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
1848  // Scale source down by 4x4.
1849  struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
1850  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1851  } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
1852  // Source back up to 2x2.
1853  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1854  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1855  } else if (frame_cnt >= frame_orig) {
1856  // Source back up to original resolution (no scaling).
1857  struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
1858  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1859  }
1860  if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
1861  frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
1862  // For dynamic resize testing on single layer: refresh all references
1863  // on the resized frame: this is to avoid decode error:
1864  // if resize goes down by >= 4x4 then libaom decoder will throw an
1865  // error that some reference (even though not used) is beyond the
1866  // limit size (must be smaller than 4x4).
1867  for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
1868  if (use_svc_control) {
1870  &ref_frame_config);
1872  &ref_frame_comp_pred);
1873  }
1874  }
1875  }
1876 
1877  // Change target_bitrate every other frame.
1878  if (test_changing_bitrate && frame_cnt % 2 == 0) {
1879  if (frame_cnt < 500)
1880  cfg.rc_target_bitrate += 10;
1881  else
1882  cfg.rc_target_bitrate -= 10;
1883  // Do big increase and decrease.
1884  if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
1885  if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
1886  if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
1887  // Call change_config, or bypass with new control.
1888  // res = aom_codec_enc_config_set(&codec, &cfg);
1890  cfg.rc_target_bitrate))
1891  die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
1892  }
1893 
1894  if (rc_api) {
1895  aom::AV1FrameParamsRTC frame_params;
1896  // TODO(jianj): Add support for SVC.
1897  frame_params.spatial_layer_id = 0;
1898  frame_params.temporal_layer_id = 0;
1899  frame_params.frame_type =
1900  is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
1901  rc_api->ComputeQP(frame_params);
1902  const int current_qp = rc_api->GetQP();
1904  qindex_to_quantizer(current_qp))) {
1905  die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
1906  }
1907  }
1908 
1909  if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
1910 
1911  // Do the layer encode.
1912  aom_usec_timer_start(&timer);
1913  if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
1914  die_codec(&codec, "Failed to encode frame");
1915  aom_usec_timer_mark(&timer);
1916  cx_time += aom_usec_timer_elapsed(&timer);
1917  cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
1918  frame_cnt_layer[layer] += 1;
1919 
1920  got_data = 0;
1921  // For simulcast (mode 11): write out each spatial layer to the file.
1922  int ss_layers_write = (app_input.layering_mode == 11)
1923  ? layer_id.spatial_layer_id + 1
1924  : ss_number_layers;
1925  while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
1926  switch (pkt->kind) {
1928  for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
1929  ++sl) {
1930  for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
1931  ++tl) {
1932  int j = sl * ts_number_layers + tl;
1933  if (app_input.output_obu) {
1934  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1935  obu_files[j]);
1936  } else {
1937  aom_video_writer_write_frame(
1938  outfile[j],
1939  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1940  pkt->data.frame.sz, pts);
1941  }
1942  if (sl == layer_id.spatial_layer_id)
1943  rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
1944  }
1945  }
1946  got_data = 1;
1947  // Write everything into the top layer.
1948  if (app_input.output_obu) {
1949  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1950  total_layer_obu_file);
1951  } else {
1952  aom_video_writer_write_frame(
1953  total_layer_file,
1954  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1955  pkt->data.frame.sz, pts);
1956  }
1957  // Keep count of rate control stats per layer (for non-key).
1958  if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
1959  int j = layer_id.spatial_layer_id * ts_number_layers +
1960  layer_id.temporal_layer_id;
1961  assert(j >= 0);
1962  rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
1963  rc.layer_avg_rate_mismatch[j] +=
1964  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
1965  rc.layer_pfb[j];
1966  if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
1967  }
1968 
1969  if (rc_api) {
1970  rc_api->PostEncodeUpdate(pkt->data.frame.sz);
1971  }
1972  // Update for short-time encoding bitrate states, for moving window
1973  // of size rc->window, shifted by rc->window / 2.
1974  // Ignore first window segment, due to key frame.
1975  // For spatial layers: only do this for top/highest SL.
1976  if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1977  sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1978  rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1979  if (frame_cnt % rc.window_size == 0) {
1980  rc.window_count += 1;
1981  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1982  rc.variance_st_encoding_bitrate +=
1983  (sum_bitrate / rc.window_size) *
1984  (sum_bitrate / rc.window_size);
1985  sum_bitrate = 0.0;
1986  }
1987  }
1988  // Second shifted window.
1989  if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1990  slx == ss_number_layers - 1) {
1991  sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1992  if (frame_cnt > 2 * rc.window_size &&
1993  frame_cnt % rc.window_size == 0) {
1994  rc.window_count += 1;
1995  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
1996  rc.variance_st_encoding_bitrate +=
1997  (sum_bitrate2 / rc.window_size) *
1998  (sum_bitrate2 / rc.window_size);
1999  sum_bitrate2 = 0.0;
2000  }
2001  }
2002 
2003 #if CONFIG_AV1_DECODER
2004  if (app_input.decode) {
2005  if (aom_codec_decode(
2006  &decoder,
2007  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2008  pkt->data.frame.sz, NULL))
2009  die_codec(&decoder, "Failed to decode frame");
2010  }
2011 #endif
2012 
2013  break;
2014  case AOM_CODEC_PSNR_PKT:
2015  if (app_input.show_psnr) {
2016  psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2017  psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2018  for (int plane = 0; plane < 4; plane++) {
2019  psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2020  }
2021  psnr_stream.psnr_count[0]++;
2022  }
2023  break;
2024  default: break;
2025  }
2026  }
2027 #if CONFIG_AV1_DECODER
2028  if (got_data && app_input.decode) {
2029  // Don't look for mismatch on top spatial and top temporal layers as
2030  // they are non reference frames.
2031  if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2032  !(layer_id.temporal_layer_id > 0 &&
2033  layer_id.temporal_layer_id == ts_number_layers - 1)) {
2034  if (test_decode(&codec, &decoder, frame_cnt)) {
2035 #if CONFIG_INTERNAL_STATS
2036  fprintf(stats_file, "First mismatch occurred in frame %d\n",
2037  frame_cnt);
2038  fclose(stats_file);
2039 #endif
2040  fatal("Mismatch seen");
2041  }
2042  }
2043  }
2044 #endif
2045  } // loop over spatial layers
2046  ++frame_cnt;
2047  pts += frame_duration;
2048  }
2049 
2050  close_input_file(&(app_input.input_ctx));
2051  printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2052  ts_number_layers);
2053 
2054  printf("\n");
2055  for (int slx = 0; slx < ss_number_layers; slx++)
2056  for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2057  int lx = slx * ts_number_layers + tlx;
2058  printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2059  slx, tlx, frame_cnt_layer[lx],
2060  (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2061  1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2062  }
2063 
2064  printf("\n");
2065  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2066  frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2067  1000000 * (double)frame_cnt / (double)cx_time);
2068 
2069  if (app_input.show_psnr) {
2070  show_psnr(&psnr_stream, 255.0);
2071  }
2072 
2073  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2074 
2075 #if CONFIG_AV1_DECODER
2076  if (app_input.decode) {
2077  if (aom_codec_destroy(&decoder))
2078  die_codec(&decoder, "Failed to destroy decoder");
2079  }
2080 #endif
2081 
2082 #if CONFIG_INTERNAL_STATS
2083  fprintf(stats_file, "No mismatch detected in recon buffers\n");
2084  fclose(stats_file);
2085 #endif
2086 
2087  // Try to rewrite the output file headers with the actual frame count.
2088  for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2089  aom_video_writer_close(outfile[i]);
2090  aom_video_writer_close(total_layer_file);
2091 
2092  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
2093  aom_img_free(&raw);
2094  }
2095  return EXIT_SUCCESS;
2096 }
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
@ AOM_CSP_UNKNOWN
Definition: aom_image.h:143
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition: aom_image.h:38
@ AOM_IMG_FMT_I420
Definition: aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition: aomcx.h:1662
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
#define AOM_MAX_TS_LAYERS
Definition: aomcx.h:1664
@ AOM_FULL_SUPERFRAME_DROP
Definition: aomcx.h:1711
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition: aomcx.h:1528
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition: aomcx.h:1070
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition: aomcx.h:408
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition: aomcx.h:468
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition: aomcx.h:1276
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set reference frame config: the ref_idx and the refresh flags for each buff...
Definition: aomcx.h:1287
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition: aomcx.h:497
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition: aomcx.h:506
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR
Codec control to set the maximum number of consecutive frame drops allowed for the frame dropper in 1...
Definition: aomcx.h:1534
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition: aomcx.h:1117
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition: aomcx.h:1254
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition: aomcx.h:1203
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition: aomcx.h:1392
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition: aomcx.h:1113
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition: aomcx.h:1038
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition: aomcx.h:1427
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition: aomcx.h:1234
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition: aomcx.h:670
@ AOME_SET_ACTIVEMAP
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition: aomcx.h:190
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition: aomcx.h:1358
@ AV1E_SET_SVC_FRAME_DROP_MODE
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition: aomcx.h:1540
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition: aomcx.h:1281
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition: aomcx.h:1059
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition: aomcx.h:1109
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition: aomcx.h:1088
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition: aomcx.h:306
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition: aomcx.h:442
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition: aomcx.h:697
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition: aomcx.h:1407
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition: aomcx.h:197
@ AV1E_SET_TILE_COLUMNS
Codec control function to set number of tile columns. unsigned int parameter.
Definition: aomcx.h:380
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition: aomcx.h:865
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition: aomcx.h:1131
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition: aomcx.h:1028
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition: aomcx.h:220
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition: aomcx.h:339
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition: aomcx.h:1490
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition: aomcx.h:1244
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition: aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition: aom_codec.h:228
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition: aom_codec.h:254
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
aom_codec_err_t
Algorithm return codes.
Definition: aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition: aom_codec.h:525
const void * aom_codec_iter_t
Iterator.
Definition: aom_codec.h:288
#define AOM_FRAME_IS_KEY
Definition: aom_codec.h:271
@ AOM_BITS_8
Definition: aom_codec.h:319
@ AOM_BITS_10
Definition: aom_codec.h:320
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition: aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition: aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition: aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition: aom_decoder.h:129
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition: aom_encoder.h:939
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition: aom_encoder.h:1012
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition: aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition: aom_encoder.h:79
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
@ AOM_CBR
Definition: aom_encoder.h:185
@ AOM_KF_AUTO
Definition: aom_encoder.h:200
@ AOM_CODEC_PSNR_PKT
Definition: aom_encoder.h:111
@ AOM_CODEC_CX_FRAME_PKT
Definition: aom_encoder.h:108
aom active region map
Definition: aomcx.h:1596
unsigned int rows
Definition: aomcx.h:1599
unsigned int cols
Definition: aomcx.h:1600
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition: aomcx.h:1598
Codec context structure.
Definition: aom_codec.h:298
Encoder output packet.
Definition: aom_encoder.h:120
enum aom_codec_cx_pkt_kind kind
Definition: aom_encoder.h:121
double psnr[4]
Definition: aom_encoder.h:143
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
Encoder configuration structure.
Definition: aom_encoder.h:385
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: aom_encoder.h:473
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: aom_encoder.h:538
struct aom_rational g_timebase
Stream timebase units.
Definition: aom_encoder.h:487
unsigned int g_usage
Algorithm specific "usage" value.
Definition: aom_encoder.h:397
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition: aom_encoder.h:703
unsigned int g_h
Height of the frame.
Definition: aom_encoder.h:433
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition: aom_encoder.h:766
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: aom_encoder.h:621
unsigned int g_threads
Maximum number of threads to use.
Definition: aom_encoder.h:405
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: aom_encoder.h:775
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: aom_encoder.h:516
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition: aom_encoder.h:712
unsigned int g_profile
Bitstream profile to use.
Definition: aom_encoder.h:415
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: aom_encoder.h:465
unsigned int g_w
Width of the frame.
Definition: aom_encoder.h:424
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition: aom_encoder.h:679
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: aom_encoder.h:784
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: aom_encoder.h:495
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition: aom_encoder.h:666
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition: aom_encoder.h:721
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition: aom_encoder.h:656
unsigned int rc_target_bitrate
Target data rate.
Definition: aom_encoder.h:642
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition: aom_encoder.h:547
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition: aom_encoder.h:688
Image Descriptor.
Definition: aom_image.h:182
aom_img_fmt_t fmt
Definition: aom_image.h:183
unsigned int d_w
Definition: aom_image.h:197
unsigned int d_h
Definition: aom_image.h:198
int num
Definition: aom_encoder.h:163
int den
Definition: aom_encoder.h:164
aom image scaling mode
Definition: aomcx.h:1608
Definition: aomcx.h:1667
int temporal_layer_id
Definition: aomcx.h:1669
int spatial_layer_id
Definition: aomcx.h:1668
Definition: aomcx.h:1678
int max_quantizers[32]
Definition: aomcx.h:1681
int number_spatial_layers
Definition: aomcx.h:1679
int layer_target_bitrate[32]
Definition: aomcx.h:1686
int framerate_factor[8]
Definition: aomcx.h:1688
int min_quantizers[32]
Definition: aomcx.h:1682
int scaling_factor_den[4]
Definition: aomcx.h:1684
int number_temporal_layers
Definition: aomcx.h:1680
int scaling_factor_num[4]
Definition: aomcx.h:1683
Definition: aomcx.h:1702
int use_comp_pred[3]
Definition: aomcx.h:1705
Definition: aomcx.h:1692
int reference[7]
Definition: aomcx.h:1695
int refresh[8]
Definition: aomcx.h:1698
int ref_idx[7]
Definition: aomcx.h:1697