WebM Codec SDK
vp9_spatial_svc_encoder
1 /*
2  * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  * Use of this source code is governed by a BSD-style license
5  * that can be found in the LICENSE file in the root of the source
6  * tree. An additional intellectual property rights grant can be found
7  * in the file PATENTS. All contributing project authors may
8  * be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This is an example demonstrating how to implement a multi-layer
13  * VP9 encoding scheme based on spatial scalability for video applications
14  * that benefit from a scalable bitstream.
15  */
16 
17 #include <math.h>
18 #include <stdarg.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 
23 #include "../args.h"
24 #include "../tools_common.h"
25 #include "../video_writer.h"
26 
27 #include "../vpx_ports/vpx_timer.h"
28 #include "./svc_context.h"
29 #include "vpx/vp8cx.h"
30 #include "vpx/vpx_encoder.h"
31 #include "../vpxstats.h"
32 #include "vp9/encoder/vp9_encoder.h"
33 #include "./y4minput.h"
34 
35 #define OUTPUT_RC_STATS 1
36 
37 #define SIMULCAST_MODE 0
38 
39 static const arg_def_t outputfile =
40  ARG_DEF("o", "output", 1, "Output filename");
41 static const arg_def_t skip_frames_arg =
42  ARG_DEF("s", "skip-frames", 1, "input frames to skip");
43 static const arg_def_t frames_arg =
44  ARG_DEF("f", "frames", 1, "number of frames to encode");
45 static const arg_def_t threads_arg =
46  ARG_DEF("th", "threads", 1, "number of threads to use");
47 #if OUTPUT_RC_STATS
48 static const arg_def_t output_rc_stats_arg =
49  ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
50 #endif
51 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
52 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
53 static const arg_def_t timebase_arg =
54  ARG_DEF("t", "timebase", 1, "timebase (num/den)");
55 static const arg_def_t bitrate_arg = ARG_DEF(
56  "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
57 static const arg_def_t spatial_layers_arg =
58  ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
59 static const arg_def_t temporal_layers_arg =
60  ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
61 static const arg_def_t temporal_layering_mode_arg =
62  ARG_DEF("tlm", "temporal-layering-mode", 1,
63  "temporal layering scheme."
64  "VP9E_TEMPORAL_LAYERING_MODE");
65 static const arg_def_t kf_dist_arg =
66  ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
67 static const arg_def_t scale_factors_arg =
68  ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
69 static const arg_def_t passes_arg =
70  ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
71 static const arg_def_t pass_arg =
72  ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
73 static const arg_def_t fpf_name_arg =
74  ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
75 static const arg_def_t min_q_arg =
76  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
77 static const arg_def_t max_q_arg =
78  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
79 static const arg_def_t min_bitrate_arg =
80  ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
81 static const arg_def_t max_bitrate_arg =
82  ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
83 static const arg_def_t lag_in_frame_arg =
84  ARG_DEF(NULL, "lag-in-frames", 1,
85  "Number of frame to input before "
86  "generating any outputs");
87 static const arg_def_t rc_end_usage_arg =
88  ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
89 static const arg_def_t speed_arg =
90  ARG_DEF("sp", "speed", 1, "speed configuration");
91 static const arg_def_t aqmode_arg =
92  ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
93 static const arg_def_t bitrates_arg =
94  ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]");
95 static const arg_def_t dropframe_thresh_arg =
96  ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
97 static const struct arg_enum_list tune_content_enum[] = {
98  { "default", VP9E_CONTENT_DEFAULT },
99  { "screen", VP9E_CONTENT_SCREEN },
100  { "film", VP9E_CONTENT_FILM },
101  { NULL, 0 }
102 };
103 
104 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
105  NULL, "tune-content", 1, "Tune content type", tune_content_enum);
106 static const arg_def_t inter_layer_pred_arg = ARG_DEF(
107  NULL, "inter-layer-pred", 1, "0 - 3: On, Off, Key-frames, Constrained");
108 
109 #if CONFIG_VP9_HIGHBITDEPTH
110 static const struct arg_enum_list bitdepth_enum[] = {
111  { "8", VPX_BITS_8 }, { "10", VPX_BITS_10 }, { "12", VPX_BITS_12 }, { NULL, 0 }
112 };
113 
114 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
115  "d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ", bitdepth_enum);
116 #endif // CONFIG_VP9_HIGHBITDEPTH
117 
118 static const arg_def_t *svc_args[] = { &frames_arg,
119  &outputfile,
120  &width_arg,
121  &height_arg,
122  &timebase_arg,
123  &bitrate_arg,
124  &skip_frames_arg,
125  &spatial_layers_arg,
126  &kf_dist_arg,
127  &scale_factors_arg,
128  &passes_arg,
129  &pass_arg,
130  &fpf_name_arg,
131  &min_q_arg,
132  &max_q_arg,
133  &min_bitrate_arg,
134  &max_bitrate_arg,
135  &temporal_layers_arg,
136  &temporal_layering_mode_arg,
137  &lag_in_frame_arg,
138  &threads_arg,
139  &aqmode_arg,
140 #if OUTPUT_RC_STATS
141  &output_rc_stats_arg,
142 #endif
143 
144 #if CONFIG_VP9_HIGHBITDEPTH
145  &bitdepth_arg,
146 #endif
147  &speed_arg,
148  &rc_end_usage_arg,
149  &bitrates_arg,
150  &dropframe_thresh_arg,
151  &tune_content_arg,
152  &inter_layer_pred_arg,
153  NULL };
154 
155 static const uint32_t default_frames_to_skip = 0;
156 static const uint32_t default_frames_to_code = 60 * 60;
157 static const uint32_t default_width = 1920;
158 static const uint32_t default_height = 1080;
159 static const uint32_t default_timebase_num = 1;
160 static const uint32_t default_timebase_den = 60;
161 static const uint32_t default_bitrate = 1000;
162 static const uint32_t default_spatial_layers = 5;
163 static const uint32_t default_temporal_layers = 1;
164 static const uint32_t default_kf_dist = 100;
165 static const uint32_t default_temporal_layering_mode = 0;
166 static const uint32_t default_output_rc_stats = 0;
167 static const int32_t default_speed = -1; // -1 means use library default.
168 static const uint32_t default_threads = 0; // zero means use library default.
169 
170 typedef struct {
171  const char *output_filename;
172  uint32_t frames_to_code;
173  uint32_t frames_to_skip;
174  struct VpxInputContext input_ctx;
175  stats_io_t rc_stats;
176  int passes;
177  int pass;
178  int tune_content;
179  int inter_layer_pred;
180 } AppInput;
181 
182 static const char *exec_name;
183 
184 void usage_exit(void) {
185  fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
186  exec_name);
187  fprintf(stderr, "Options:\n");
188  arg_show_usage(stderr, svc_args);
189  exit(EXIT_FAILURE);
190 }
191 
192 static void parse_command_line(int argc, const char **argv_,
193  AppInput *app_input, SvcContext *svc_ctx,
194  vpx_codec_enc_cfg_t *enc_cfg) {
195  struct arg arg;
196  char **argv = NULL;
197  char **argi = NULL;
198  char **argj = NULL;
199  vpx_codec_err_t res;
200  int passes = 0;
201  int pass = 0;
202  const char *fpf_file_name = NULL;
203  unsigned int min_bitrate = 0;
204  unsigned int max_bitrate = 0;
205  char string_options[1024] = { 0 };
206 
207  // initialize SvcContext with parameters that will be passed to vpx_svc_init
208  svc_ctx->log_level = SVC_LOG_DEBUG;
209  svc_ctx->spatial_layers = default_spatial_layers;
210  svc_ctx->temporal_layers = default_temporal_layers;
211  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
212 #if OUTPUT_RC_STATS
213  svc_ctx->output_rc_stat = default_output_rc_stats;
214 #endif
215  svc_ctx->speed = default_speed;
216  svc_ctx->threads = default_threads;
217 
218  // start with default encoder configuration
219  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
220  if (res) {
221  die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
222  }
223  // update enc_cfg with app default values
224  enc_cfg->g_w = default_width;
225  enc_cfg->g_h = default_height;
226  enc_cfg->g_timebase.num = default_timebase_num;
227  enc_cfg->g_timebase.den = default_timebase_den;
228  enc_cfg->rc_target_bitrate = default_bitrate;
229  enc_cfg->kf_min_dist = default_kf_dist;
230  enc_cfg->kf_max_dist = default_kf_dist;
231  enc_cfg->rc_end_usage = VPX_CQ;
232 
233  // initialize AppInput with default values
234  app_input->frames_to_code = default_frames_to_code;
235  app_input->frames_to_skip = default_frames_to_skip;
236 
237  // process command line options
238  argv = argv_dup(argc - 1, argv_ + 1);
239  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
240  arg.argv_step = 1;
241 
242  if (arg_match(&arg, &frames_arg, argi)) {
243  app_input->frames_to_code = arg_parse_uint(&arg);
244  } else if (arg_match(&arg, &outputfile, argi)) {
245  app_input->output_filename = arg.val;
246  } else if (arg_match(&arg, &width_arg, argi)) {
247  enc_cfg->g_w = arg_parse_uint(&arg);
248  } else if (arg_match(&arg, &height_arg, argi)) {
249  enc_cfg->g_h = arg_parse_uint(&arg);
250  } else if (arg_match(&arg, &timebase_arg, argi)) {
251  enc_cfg->g_timebase = arg_parse_rational(&arg);
252  } else if (arg_match(&arg, &bitrate_arg, argi)) {
253  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
254  } else if (arg_match(&arg, &skip_frames_arg, argi)) {
255  app_input->frames_to_skip = arg_parse_uint(&arg);
256  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
257  svc_ctx->spatial_layers = arg_parse_uint(&arg);
258  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
259  svc_ctx->temporal_layers = arg_parse_uint(&arg);
260 #if OUTPUT_RC_STATS
261  } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
262  svc_ctx->output_rc_stat = arg_parse_uint(&arg);
263 #endif
264  } else if (arg_match(&arg, &speed_arg, argi)) {
265  svc_ctx->speed = arg_parse_uint(&arg);
266  if (svc_ctx->speed > 9) {
267  warn("Mapping speed %d to speed 9.\n", svc_ctx->speed);
268  }
269  } else if (arg_match(&arg, &aqmode_arg, argi)) {
270  svc_ctx->aqmode = arg_parse_uint(&arg);
271  } else if (arg_match(&arg, &threads_arg, argi)) {
272  svc_ctx->threads = arg_parse_uint(&arg);
273  } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
274  svc_ctx->temporal_layering_mode = enc_cfg->temporal_layering_mode =
275  arg_parse_int(&arg);
276  if (svc_ctx->temporal_layering_mode) {
277  enc_cfg->g_error_resilient = 1;
278  }
279  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
280  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
281  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
282  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
283  strncat(string_options, " scale-factors=",
284  sizeof(string_options) - strlen(string_options) - 1);
285  strncat(string_options, arg.val,
286  sizeof(string_options) - strlen(string_options) - 1);
287  } else if (arg_match(&arg, &bitrates_arg, argi)) {
288  strncat(string_options, " bitrates=",
289  sizeof(string_options) - strlen(string_options) - 1);
290  strncat(string_options, arg.val,
291  sizeof(string_options) - strlen(string_options) - 1);
292  } else if (arg_match(&arg, &passes_arg, argi)) {
293  passes = arg_parse_uint(&arg);
294  if (passes < 1 || passes > 2) {
295  die("Error: Invalid number of passes (%d)\n", passes);
296  }
297  } else if (arg_match(&arg, &pass_arg, argi)) {
298  pass = arg_parse_uint(&arg);
299  if (pass < 1 || pass > 2) {
300  die("Error: Invalid pass selected (%d)\n", pass);
301  }
302  } else if (arg_match(&arg, &fpf_name_arg, argi)) {
303  fpf_file_name = arg.val;
304  } else if (arg_match(&arg, &min_q_arg, argi)) {
305  strncat(string_options, " min-quantizers=",
306  sizeof(string_options) - strlen(string_options) - 1);
307  strncat(string_options, arg.val,
308  sizeof(string_options) - strlen(string_options) - 1);
309  } else if (arg_match(&arg, &max_q_arg, argi)) {
310  strncat(string_options, " max-quantizers=",
311  sizeof(string_options) - strlen(string_options) - 1);
312  strncat(string_options, arg.val,
313  sizeof(string_options) - strlen(string_options) - 1);
314  } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
315  min_bitrate = arg_parse_uint(&arg);
316  } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
317  max_bitrate = arg_parse_uint(&arg);
318  } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
319  enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
320  } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
321  enc_cfg->rc_end_usage = arg_parse_uint(&arg);
322 #if CONFIG_VP9_HIGHBITDEPTH
323  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
324  enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
325  switch (enc_cfg->g_bit_depth) {
326  case VPX_BITS_8:
327  enc_cfg->g_input_bit_depth = 8;
328  enc_cfg->g_profile = 0;
329  break;
330  case VPX_BITS_10:
331  enc_cfg->g_input_bit_depth = 10;
332  enc_cfg->g_profile = 2;
333  break;
334  case VPX_BITS_12:
335  enc_cfg->g_input_bit_depth = 12;
336  enc_cfg->g_profile = 2;
337  break;
338  default:
339  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
340  break;
341  }
342 #endif // CONFIG_VP9_HIGHBITDEPTH
343  } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
344  enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
345  } else if (arg_match(&arg, &tune_content_arg, argi)) {
346  app_input->tune_content = arg_parse_uint(&arg);
347  } else if (arg_match(&arg, &inter_layer_pred_arg, argi)) {
348  app_input->inter_layer_pred = arg_parse_uint(&arg);
349  } else {
350  ++argj;
351  }
352  }
353 
354  // There will be a space in front of the string options
355  if (strlen(string_options) > 0)
356  vpx_svc_set_options(svc_ctx, string_options + 1);
357 
358  if (passes == 0 || passes == 1) {
359  if (pass) {
360  fprintf(stderr, "pass is ignored since there's only one pass\n");
361  }
362  enc_cfg->g_pass = VPX_RC_ONE_PASS;
363  } else {
364  if (pass == 0) {
365  die("pass must be specified when passes is 2\n");
366  }
367 
368  if (fpf_file_name == NULL) {
369  die("fpf must be specified when passes is 2\n");
370  }
371 
372  if (pass == 1) {
373  enc_cfg->g_pass = VPX_RC_FIRST_PASS;
374  if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
375  fatal("Failed to open statistics store");
376  }
377  } else {
378  enc_cfg->g_pass = VPX_RC_LAST_PASS;
379  if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
380  fatal("Failed to open statistics store");
381  }
382  enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);
383  }
384  app_input->passes = passes;
385  app_input->pass = pass;
386  }
387 
388  if (enc_cfg->rc_target_bitrate > 0) {
389  if (min_bitrate > 0) {
390  enc_cfg->rc_2pass_vbr_minsection_pct =
391  min_bitrate * 100 / enc_cfg->rc_target_bitrate;
392  }
393  if (max_bitrate > 0) {
394  enc_cfg->rc_2pass_vbr_maxsection_pct =
395  max_bitrate * 100 / enc_cfg->rc_target_bitrate;
396  }
397  }
398 
399  // Check for unrecognized options
400  for (argi = argv; *argi; ++argi)
401  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
402  die("Error: Unrecognized option %s\n", *argi);
403 
404  if (argv[0] == NULL) {
405  usage_exit();
406  }
407  app_input->input_ctx.filename = argv[0];
408  free(argv);
409 
410  open_input_file(&app_input->input_ctx);
411  if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
412  enc_cfg->g_w = app_input->input_ctx.width;
413  enc_cfg->g_h = app_input->input_ctx.height;
414  }
415 
416  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
417  enc_cfg->g_h % 2)
418  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
419 
420  printf(
421  "Codec %s\nframes: %d, skip: %d\n"
422  "layers: %d\n"
423  "width %d, height: %d,\n"
424  "num: %d, den: %d, bitrate: %d,\n"
425  "gop size: %d\n",
426  vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
427  app_input->frames_to_skip, svc_ctx->spatial_layers, enc_cfg->g_w,
428  enc_cfg->g_h, enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
429  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
430 }
431 
432 #if OUTPUT_RC_STATS
433 // For rate control encoding stats.
434 struct RateControlStats {
435  // Number of input frames per layer.
436  int layer_input_frames[VPX_MAX_LAYERS];
437  // Total (cumulative) number of encoded frames per layer.
438  int layer_tot_enc_frames[VPX_MAX_LAYERS];
439  // Number of encoded non-key frames per layer.
440  int layer_enc_frames[VPX_MAX_LAYERS];
441  // Framerate per layer (cumulative).
442  double layer_framerate[VPX_MAX_LAYERS];
443  // Target average frame size per layer (per-frame-bandwidth per layer).
444  double layer_pfb[VPX_MAX_LAYERS];
445  // Actual average frame size per layer.
446  double layer_avg_frame_size[VPX_MAX_LAYERS];
447  // Average rate mismatch per layer (|target - actual| / target).
448  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
449  // Actual encoding bitrate per layer (cumulative).
450  double layer_encoding_bitrate[VPX_MAX_LAYERS];
451  // Average of the short-time encoder actual bitrate.
452  // TODO(marpan): Should we add these short-time stats for each layer?
453  double avg_st_encoding_bitrate;
454  // Variance of the short-time encoder actual bitrate.
455  double variance_st_encoding_bitrate;
456  // Window (number of frames) for computing short-time encoding bitrate.
457  int window_size;
458  // Number of window measurements.
459  int window_count;
460 };
461 
462 // Note: these rate control stats assume only 1 key frame in the
463 // sequence (i.e., first frame only).
464 static void set_rate_control_stats(struct RateControlStats *rc,
465  vpx_codec_enc_cfg_t *cfg) {
466  unsigned int sl, tl;
467  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
468  // per-frame-bandwidth, for the rate control encoding stats below.
469  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
470 
471  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
472  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
473  const int layer = sl * cfg->ts_number_layers + tl;
474  if (cfg->ts_number_layers == 1)
475  rc->layer_framerate[layer] = framerate;
476  else
477  rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
478  if (tl > 0) {
479  rc->layer_pfb[layer] =
480  1000.0 *
481  (cfg->layer_target_bitrate[layer] -
482  cfg->layer_target_bitrate[layer - 1]) /
483  (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
484  } else {
485  rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
486  rc->layer_framerate[layer];
487  }
488  rc->layer_input_frames[layer] = 0;
489  rc->layer_enc_frames[layer] = 0;
490  rc->layer_tot_enc_frames[layer] = 0;
491  rc->layer_encoding_bitrate[layer] = 0.0;
492  rc->layer_avg_frame_size[layer] = 0.0;
493  rc->layer_avg_rate_mismatch[layer] = 0.0;
494  }
495  }
496  rc->window_count = 0;
497  rc->window_size = 15;
498  rc->avg_st_encoding_bitrate = 0.0;
499  rc->variance_st_encoding_bitrate = 0.0;
500 }
501 
502 static void printout_rate_control_summary(struct RateControlStats *rc,
503  vpx_codec_enc_cfg_t *cfg,
504  int frame_cnt) {
505  unsigned int sl, tl;
506  double perc_fluctuation = 0.0;
507  int tot_num_frames = 0;
508  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
509  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
511  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
512  tot_num_frames = 0;
513  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
514  const int layer = sl * cfg->ts_number_layers + tl;
515  const int num_dropped =
516  (tl > 0)
517  ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer])
518  : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] -
519  1);
520  tot_num_frames += rc->layer_input_frames[layer];
521  rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
522  rc->layer_encoding_bitrate[layer] /
523  tot_num_frames;
524  rc->layer_avg_frame_size[layer] =
525  rc->layer_avg_frame_size[layer] / rc->layer_enc_frames[layer];
526  rc->layer_avg_rate_mismatch[layer] = 100.0 *
527  rc->layer_avg_rate_mismatch[layer] /
528  rc->layer_enc_frames[layer];
529  printf("For layer#: sl%d tl%d \n", sl, tl);
530  printf("Bitrate (target vs actual): %d %f.0 kbps\n",
531  cfg->layer_target_bitrate[layer],
532  rc->layer_encoding_bitrate[layer]);
533  printf("Average frame size (target vs actual): %f %f bits\n",
534  rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
535  printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[layer]);
536  printf(
537  "Number of input frames, encoded (non-key) frames, "
538  "and percent dropped frames: %d %d %f.0 \n",
539  rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
540  100.0 * num_dropped / rc->layer_input_frames[layer]);
541  printf("\n");
542  }
543  }
544  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
545  rc->variance_st_encoding_bitrate =
546  rc->variance_st_encoding_bitrate / rc->window_count -
547  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
548  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
549  rc->avg_st_encoding_bitrate;
550  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
551  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
552  rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
553  perc_fluctuation);
554  printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt,
555  tot_num_frames);
556 }
557 
558 static vpx_codec_err_t parse_superframe_index(const uint8_t *data,
559  size_t data_sz, uint64_t sizes[8],
560  int *count) {
561  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
562  // it is a super frame index. If the last byte of real video compression
563  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
564  // not the associated matching marker byte at the front of the index we have
565  // an invalid bitstream and need to return an error.
566 
567  uint8_t marker;
568 
569  marker = *(data + data_sz - 1);
570  *count = 0;
571 
572  if ((marker & 0xe0) == 0xc0) {
573  const uint32_t frames = (marker & 0x7) + 1;
574  const uint32_t mag = ((marker >> 3) & 0x3) + 1;
575  const size_t index_sz = 2 + mag * frames;
576 
577  // This chunk is marked as having a superframe index but doesn't have
578  // enough data for it, thus it's an invalid superframe index.
579  if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
580 
581  {
582  const uint8_t marker2 = *(data + data_sz - index_sz);
583 
584  // This chunk is marked as having a superframe index but doesn't have
585  // the matching marker byte at the front of the index therefore it's an
586  // invalid chunk.
587  if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
588  }
589 
590  {
591  // Found a valid superframe index.
592  uint32_t i, j;
593  const uint8_t *x = &data[data_sz - index_sz + 1];
594 
595  for (i = 0; i < frames; ++i) {
596  uint32_t this_sz = 0;
597 
598  for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
599  sizes[i] = this_sz;
600  }
601  *count = frames;
602  }
603  }
604  return VPX_CODEC_OK;
605 }
606 #endif
607 
608 // Example pattern for spatial layers and 2 temporal layers used in the
609 // bypass/flexible mode. The pattern corresponds to the pattern
610 // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
611 // non-flexible mode.
612 static void set_frame_flags_bypass_mode_ex0(
613  int tl, int num_spatial_layers, int is_key_frame,
614  vpx_svc_ref_frame_config_t *ref_frame_config) {
615  int sl;
616  for (sl = 0; sl < num_spatial_layers; ++sl)
617  ref_frame_config->update_buffer_slot[sl] = 0;
618 
619  for (sl = 0; sl < num_spatial_layers; ++sl) {
620  // Set the buffer idx.
621  if (tl == 0) {
622  ref_frame_config->lst_fb_idx[sl] = sl;
623  if (sl) {
624  if (is_key_frame) {
625  ref_frame_config->lst_fb_idx[sl] = sl - 1;
626  ref_frame_config->gld_fb_idx[sl] = sl;
627  } else {
628  ref_frame_config->gld_fb_idx[sl] = sl - 1;
629  }
630  } else {
631  ref_frame_config->gld_fb_idx[sl] = 0;
632  }
633  ref_frame_config->alt_fb_idx[sl] = 0;
634  } else if (tl == 1) {
635  ref_frame_config->lst_fb_idx[sl] = sl;
636  ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
637  ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
638  }
639  // Set the reference and update flags.
640  if (!tl) {
641  if (!sl) {
642  // Base spatial and base temporal (sl = 0, tl = 0)
643  ref_frame_config->reference_last[sl] = 1;
644  ref_frame_config->reference_golden[sl] = 0;
645  ref_frame_config->reference_alt_ref[sl] = 0;
646  ref_frame_config->update_buffer_slot[sl] |=
647  1 << ref_frame_config->lst_fb_idx[sl];
648  } else {
649  if (is_key_frame) {
650  ref_frame_config->reference_last[sl] = 1;
651  ref_frame_config->reference_golden[sl] = 0;
652  ref_frame_config->reference_alt_ref[sl] = 0;
653  ref_frame_config->update_buffer_slot[sl] |=
654  1 << ref_frame_config->gld_fb_idx[sl];
655  } else {
656  // Non-zero spatiall layer.
657  ref_frame_config->reference_last[sl] = 1;
658  ref_frame_config->reference_golden[sl] = 1;
659  ref_frame_config->reference_alt_ref[sl] = 1;
660  ref_frame_config->update_buffer_slot[sl] |=
661  1 << ref_frame_config->lst_fb_idx[sl];
662  }
663  }
664  } else if (tl == 1) {
665  if (!sl) {
666  // Base spatial and top temporal (tl = 1)
667  ref_frame_config->reference_last[sl] = 1;
668  ref_frame_config->reference_golden[sl] = 0;
669  ref_frame_config->reference_alt_ref[sl] = 0;
670  ref_frame_config->update_buffer_slot[sl] |=
671  1 << ref_frame_config->alt_fb_idx[sl];
672  } else {
673  // Non-zero spatial.
674  if (sl < num_spatial_layers - 1) {
675  ref_frame_config->reference_last[sl] = 1;
676  ref_frame_config->reference_golden[sl] = 1;
677  ref_frame_config->reference_alt_ref[sl] = 0;
678  ref_frame_config->update_buffer_slot[sl] |=
679  1 << ref_frame_config->alt_fb_idx[sl];
680  } else if (sl == num_spatial_layers - 1) {
681  // Top spatial and top temporal (non-reference -- doesn't update any
682  // reference buffers)
683  ref_frame_config->reference_last[sl] = 1;
684  ref_frame_config->reference_golden[sl] = 1;
685  ref_frame_config->reference_alt_ref[sl] = 0;
686  }
687  }
688  }
689  }
690 }
691 
692 // Example pattern for 2 spatial layers and 2 temporal layers used in the
693 // bypass/flexible mode, except only 1 spatial layer when temporal_layer_id = 1.
694 static void set_frame_flags_bypass_mode_ex1(
695  int tl, int num_spatial_layers, int is_key_frame,
696  vpx_svc_ref_frame_config_t *ref_frame_config) {
697  int sl;
698  for (sl = 0; sl < num_spatial_layers; ++sl)
699  ref_frame_config->update_buffer_slot[sl] = 0;
700 
701  if (tl == 0) {
702  if (is_key_frame) {
703  ref_frame_config->lst_fb_idx[1] = 0;
704  ref_frame_config->gld_fb_idx[1] = 1;
705  } else {
706  ref_frame_config->lst_fb_idx[1] = 1;
707  ref_frame_config->gld_fb_idx[1] = 0;
708  }
709  ref_frame_config->alt_fb_idx[1] = 0;
710 
711  ref_frame_config->lst_fb_idx[0] = 0;
712  ref_frame_config->gld_fb_idx[0] = 0;
713  ref_frame_config->alt_fb_idx[0] = 0;
714  }
715  if (tl == 1) {
716  ref_frame_config->lst_fb_idx[0] = 0;
717  ref_frame_config->gld_fb_idx[0] = 1;
718  ref_frame_config->alt_fb_idx[0] = 2;
719 
720  ref_frame_config->lst_fb_idx[1] = 1;
721  ref_frame_config->gld_fb_idx[1] = 2;
722  ref_frame_config->alt_fb_idx[1] = 3;
723  }
724  // Set the reference and update flags.
725  if (tl == 0) {
726  // Base spatial and base temporal (sl = 0, tl = 0)
727  ref_frame_config->reference_last[0] = 1;
728  ref_frame_config->reference_golden[0] = 0;
729  ref_frame_config->reference_alt_ref[0] = 0;
730  ref_frame_config->update_buffer_slot[0] |=
731  1 << ref_frame_config->lst_fb_idx[0];
732 
733  if (is_key_frame) {
734  ref_frame_config->reference_last[1] = 1;
735  ref_frame_config->reference_golden[1] = 0;
736  ref_frame_config->reference_alt_ref[1] = 0;
737  ref_frame_config->update_buffer_slot[1] |=
738  1 << ref_frame_config->gld_fb_idx[1];
739  } else {
740  // Non-zero spatiall layer.
741  ref_frame_config->reference_last[1] = 1;
742  ref_frame_config->reference_golden[1] = 1;
743  ref_frame_config->reference_alt_ref[1] = 1;
744  ref_frame_config->update_buffer_slot[1] |=
745  1 << ref_frame_config->lst_fb_idx[1];
746  }
747  }
748  if (tl == 1) {
749  // Top spatial and top temporal (non-reference -- doesn't update any
750  // reference buffers)
751  ref_frame_config->reference_last[1] = 1;
752  ref_frame_config->reference_golden[1] = 0;
753  ref_frame_config->reference_alt_ref[1] = 0;
754  }
755 }
756 
757 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
758 static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
759  const int frames_out, int *mismatch_seen) {
760  vpx_image_t enc_img, dec_img;
761  struct vp9_ref_frame ref_enc, ref_dec;
762  if (*mismatch_seen) return;
763  /* Get the internal reference frame */
764  ref_enc.idx = 0;
765  ref_dec.idx = 0;
766  vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc);
767  enc_img = ref_enc.img;
768  vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec);
769  dec_img = ref_dec.img;
770 #if CONFIG_VP9_HIGHBITDEPTH
771  if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) !=
772  (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) {
773  if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
774  vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
775  enc_img.d_w, enc_img.d_h, 16);
776  vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img);
777  }
778  if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
779  vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
780  dec_img.d_w, dec_img.d_h, 16);
781  vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img);
782  }
783  }
784 #endif
785 
786  if (!compare_img(&enc_img, &dec_img)) {
787  int y[4], u[4], v[4];
788 #if CONFIG_VP9_HIGHBITDEPTH
789  if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
790  find_mismatch_high(&enc_img, &dec_img, y, u, v);
791  } else {
792  find_mismatch(&enc_img, &dec_img, y, u, v);
793  }
794 #else
795  find_mismatch(&enc_img, &dec_img, y, u, v);
796 #endif
797  decoder->err = 1;
798  printf(
799  "Encode/decode mismatch on frame %d at"
800  " Y[%d, %d] {%d/%d},"
801  " U[%d, %d] {%d/%d},"
802  " V[%d, %d] {%d/%d}\n",
803  frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1],
804  v[2], v[3]);
805  *mismatch_seen = frames_out;
806  }
807 
808  vpx_img_free(&enc_img);
809  vpx_img_free(&dec_img);
810 }
811 #endif
812 
813 #if OUTPUT_RC_STATS
814 static void svc_output_rc_stats(
815  vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *enc_cfg,
816  vpx_svc_layer_id_t *layer_id, const vpx_codec_cx_pkt_t *cx_pkt,
817  struct RateControlStats *rc, VpxVideoWriter **outfile,
818  const uint32_t frame_cnt, const double framerate) {
819  int num_layers_encoded = 0;
820  unsigned int sl, tl;
821  uint64_t sizes[8];
822  uint64_t sizes_parsed[8];
823  int count = 0;
824  double sum_bitrate = 0.0;
825  double sum_bitrate2 = 0.0;
826  vp9_zero(sizes);
827  vp9_zero(sizes_parsed);
828  vpx_codec_control(codec, VP9E_GET_SVC_LAYER_ID, layer_id);
829  parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz,
830  sizes_parsed, &count);
831  if (enc_cfg->ss_number_layers == 1) sizes[0] = cx_pkt->data.frame.sz;
832  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
833  sizes[sl] = 0;
834  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
835  sizes[sl] = sizes_parsed[num_layers_encoded];
836  num_layers_encoded++;
837  }
838  }
839  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
840  unsigned int sl2;
841  uint64_t tot_size = 0;
842 #if SIMULCAST_MODE
843  for (sl2 = 0; sl2 < sl; ++sl2) {
844  if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
845  }
846  vpx_video_writer_write_frame(outfile[sl],
847  (uint8_t *)(cx_pkt->data.frame.buf) + tot_size,
848  (size_t)(sizes[sl]), cx_pkt->data.frame.pts);
849 #else
850  for (sl2 = 0; sl2 <= sl; ++sl2) {
851  if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
852  }
853  if (tot_size > 0)
854  vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf,
855  (size_t)(tot_size), cx_pkt->data.frame.pts);
856 #endif // SIMULCAST_MODE
857  }
858  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
859  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
860  for (tl = layer_id->temporal_layer_id; tl < enc_cfg->ts_number_layers;
861  ++tl) {
862  const int layer = sl * enc_cfg->ts_number_layers + tl;
863  ++rc->layer_tot_enc_frames[layer];
864  rc->layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
865  // Keep count of rate control stats per layer, for non-key
866  // frames.
867  if (tl == (unsigned int)layer_id->temporal_layer_id &&
868  !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
869  rc->layer_avg_frame_size[layer] += 8.0 * sizes[sl];
870  rc->layer_avg_rate_mismatch[layer] +=
871  fabs(8.0 * sizes[sl] - rc->layer_pfb[layer]) /
872  rc->layer_pfb[layer];
873  ++rc->layer_enc_frames[layer];
874  }
875  }
876  }
877  }
878 
879  // Update for short-time encoding bitrate states, for moving
880  // window of size rc->window, shifted by rc->window / 2.
881  // Ignore first window segment, due to key frame.
882  if (frame_cnt > (unsigned int)rc->window_size) {
883  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
884  if (cx_pkt->data.frame.spatial_layer_encoded[sl])
885  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
886  }
887  if (frame_cnt % rc->window_size == 0) {
888  rc->window_count += 1;
889  rc->avg_st_encoding_bitrate += sum_bitrate / rc->window_size;
890  rc->variance_st_encoding_bitrate +=
891  (sum_bitrate / rc->window_size) * (sum_bitrate / rc->window_size);
892  }
893  }
894 
895  // Second shifted window.
896  if (frame_cnt > (unsigned int)(rc->window_size + rc->window_size / 2)) {
897  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
898  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
899  }
900 
901  if (frame_cnt > (unsigned int)(2 * rc->window_size) &&
902  frame_cnt % rc->window_size == 0) {
903  rc->window_count += 1;
904  rc->avg_st_encoding_bitrate += sum_bitrate2 / rc->window_size;
905  rc->variance_st_encoding_bitrate +=
906  (sum_bitrate2 / rc->window_size) * (sum_bitrate2 / rc->window_size);
907  }
908  }
909 }
910 #endif
911 
912 int main(int argc, const char **argv) {
913  AppInput app_input;
914  VpxVideoWriter *writer = NULL;
915  VpxVideoInfo info;
916  vpx_codec_ctx_t encoder;
917  vpx_codec_enc_cfg_t enc_cfg;
918  SvcContext svc_ctx;
919  vpx_svc_frame_drop_t svc_drop_frame;
920  uint32_t i;
921  uint32_t frame_cnt = 0;
922  vpx_image_t raw;
923  vpx_codec_err_t res;
924  int pts = 0; /* PTS starts at 0 */
925  int frame_duration = 1; /* 1 timebase tick per frame */
926  int end_of_stream = 0;
927  int frames_received = 0;
928 #if OUTPUT_RC_STATS
929  VpxVideoWriter *outfile[VPX_SS_MAX_LAYERS] = { NULL };
930  struct RateControlStats rc;
931  vpx_svc_layer_id_t layer_id;
932  vpx_svc_ref_frame_config_t ref_frame_config;
933  unsigned int sl;
934  double framerate = 30.0;
935 #endif
936  struct vpx_usec_timer timer;
937  int64_t cx_time = 0;
938 #if CONFIG_INTERNAL_STATS
939  FILE *f = fopen("opsnr.stt", "a");
940 #endif
941 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
942  int mismatch_seen = 0;
943  vpx_codec_ctx_t decoder;
944 #endif
945  memset(&svc_ctx, 0, sizeof(svc_ctx));
946  memset(&app_input, 0, sizeof(AppInput));
947  memset(&info, 0, sizeof(VpxVideoInfo));
948  memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t));
949  memset(&rc, 0, sizeof(struct RateControlStats));
950  exec_name = argv[0];
951 
952  /* Setup default input stream settings */
953  app_input.input_ctx.framerate.numerator = 30;
954  app_input.input_ctx.framerate.denominator = 1;
955  app_input.input_ctx.only_i420 = 1;
956  app_input.input_ctx.bit_depth = 0;
957 
958  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
959 
960  // Y4M reader handles its own allocation.
961  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
962 // Allocate image buffer
963 #if CONFIG_VP9_HIGHBITDEPTH
964  if (!vpx_img_alloc(&raw,
965  enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
966  : VPX_IMG_FMT_I42016,
967  enc_cfg.g_w, enc_cfg.g_h, 32)) {
968  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
969  }
970 #else
971  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
972  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
973  }
974 #endif // CONFIG_VP9_HIGHBITDEPTH
975  }
976 
977  // Initialize codec
978  if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) !=
979  VPX_CODEC_OK)
980  die("Failed to initialize encoder\n");
981 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
982  if (vpx_codec_dec_init(
983  &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0))
984  die("Failed to initialize decoder\n");
985 #endif
986 
987 #if OUTPUT_RC_STATS
988  rc.window_count = 1;
989  rc.window_size = 15; // Silence a static analysis warning.
990  rc.avg_st_encoding_bitrate = 0.0;
991  rc.variance_st_encoding_bitrate = 0.0;
992  if (svc_ctx.output_rc_stat) {
993  set_rate_control_stats(&rc, &enc_cfg);
994  framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
995  }
996 #endif
997 
998  info.codec_fourcc = VP9_FOURCC;
999  info.frame_width = enc_cfg.g_w;
1000  info.frame_height = enc_cfg.g_h;
1001  info.time_base.numerator = enc_cfg.g_timebase.num;
1002  info.time_base.denominator = enc_cfg.g_timebase.den;
1003 
1004  if (!(app_input.passes == 2 && app_input.pass == 1)) {
1005  // We don't save the bitstream for the 1st pass on two pass rate control
1006  writer =
1007  vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1008  if (!writer)
1009  die("Failed to open %s for writing\n", app_input.output_filename);
1010  }
1011 #if OUTPUT_RC_STATS
1012  // Write out spatial layer stream.
1013  // TODO(marpan/jianj): allow for writing each spatial and temporal stream.
1014  if (svc_ctx.output_rc_stat) {
1015  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
1016  char file_name[PATH_MAX];
1017 
1018  snprintf(file_name, sizeof(file_name), "%s_s%d.ivf",
1019  app_input.output_filename, sl);
1020  outfile[sl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
1021  if (!outfile[sl]) die("Failed to open %s for writing", file_name);
1022  }
1023  }
1024 #endif
1025 
1026  // skip initial frames
1027  for (i = 0; i < app_input.frames_to_skip; ++i)
1028  read_frame(&app_input.input_ctx, &raw);
1029 
1030  if (svc_ctx.speed != -1)
1031  vpx_codec_control(&encoder, VP8E_SET_CPUUSED, svc_ctx.speed);
1032  if (svc_ctx.threads) {
1034  get_msb(svc_ctx.threads));
1035  if (svc_ctx.threads > 1)
1036  vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 1);
1037  else
1038  vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 0);
1039  }
1040  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
1041  vpx_codec_control(&encoder, VP9E_SET_AQ_MODE, 3);
1042  if (svc_ctx.speed >= 5)
1045 
1047  app_input.inter_layer_pred);
1048 
1050 
1051  vpx_codec_control(&encoder, VP9E_SET_TUNE_CONTENT, app_input.tune_content);
1052 
1053  svc_drop_frame.framedrop_mode = FULL_SUPERFRAME_DROP;
1054  for (sl = 0; sl < (unsigned int)svc_ctx.spatial_layers; ++sl)
1055  svc_drop_frame.framedrop_thresh[sl] = enc_cfg.rc_dropframe_thresh;
1056  svc_drop_frame.max_consec_drop = INT_MAX;
1057  vpx_codec_control(&encoder, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame);
1058 
1059  // Encode frames
1060  while (!end_of_stream) {
1061  vpx_codec_iter_t iter = NULL;
1062  const vpx_codec_cx_pkt_t *cx_pkt;
1063  // Example patterns for bypass/flexible mode:
1064  // example_pattern = 0: 2 temporal layers, and spatial_layers = 1,2,3. Exact
1065  // to fixed SVC patterns. example_pattern = 1: 2 spatial and 2 temporal
1066  // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example
1067  // uses the extended API.
1068  int example_pattern = 0;
1069  if (frame_cnt >= app_input.frames_to_code ||
1070  !read_frame(&app_input.input_ctx, &raw)) {
1071  // We need one extra vpx_svc_encode call at end of stream to flush
1072  // encoder and get remaining data
1073  end_of_stream = 1;
1074  }
1075 
1076  // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
1077  // and the buffer indices for each spatial layer of the current
1078  // (super)frame to be encoded. The spatial and temporal layer_id for the
1079  // current frame also needs to be set.
1080  // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
1081  // mode to "VP9E_LAYERING_MODE_BYPASS".
1082  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
1083  layer_id.spatial_layer_id = 0;
1084  // Example for 2 temporal layers.
1085  if (frame_cnt % 2 == 0) {
1086  layer_id.temporal_layer_id = 0;
1087  for (i = 0; i < VPX_SS_MAX_LAYERS; i++)
1088  layer_id.temporal_layer_id_per_spatial[i] = 0;
1089  } else {
1090  layer_id.temporal_layer_id = 1;
1091  for (i = 0; i < VPX_SS_MAX_LAYERS; i++)
1092  layer_id.temporal_layer_id_per_spatial[i] = 1;
1093  }
1094  if (example_pattern == 1) {
1095  // example_pattern 1 is hard-coded for 2 spatial and 2 temporal layers.
1096  assert(svc_ctx.spatial_layers == 2);
1097  assert(svc_ctx.temporal_layers == 2);
1098  if (frame_cnt % 2 == 0) {
1099  // Spatial layer 0 and 1 are encoded.
1100  layer_id.temporal_layer_id_per_spatial[0] = 0;
1101  layer_id.temporal_layer_id_per_spatial[1] = 0;
1102  layer_id.spatial_layer_id = 0;
1103  } else {
1104  // Only spatial layer 1 is encoded here.
1105  layer_id.temporal_layer_id_per_spatial[1] = 1;
1106  layer_id.spatial_layer_id = 1;
1107  }
1108  }
1109  vpx_codec_control(&encoder, VP9E_SET_SVC_LAYER_ID, &layer_id);
1110  // TODO(jianj): Fix the parameter passing for "is_key_frame" in
1111  // set_frame_flags_bypass_model() for case of periodic key frames.
1112  if (example_pattern == 0) {
1113  set_frame_flags_bypass_mode_ex0(layer_id.temporal_layer_id,
1114  svc_ctx.spatial_layers, frame_cnt == 0,
1115  &ref_frame_config);
1116  } else if (example_pattern == 1) {
1117  set_frame_flags_bypass_mode_ex1(layer_id.temporal_layer_id,
1118  svc_ctx.spatial_layers, frame_cnt == 0,
1119  &ref_frame_config);
1120  }
1121  ref_frame_config.duration[0] = frame_duration * 1;
1122  ref_frame_config.duration[1] = frame_duration * 1;
1123 
1125  &ref_frame_config);
1126  // Keep track of input frames, to account for frame drops in rate control
1127  // stats/metrics.
1128  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
1129  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
1130  layer_id.temporal_layer_id];
1131  }
1132  } else {
1133  // For the fixed pattern SVC, temporal layer is given by superframe count.
1134  unsigned int tl = 0;
1135  if (enc_cfg.ts_number_layers == 2)
1136  tl = (frame_cnt % 2 != 0);
1137  else if (enc_cfg.ts_number_layers == 3) {
1138  if (frame_cnt % 2 != 0) tl = 2;
1139  if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) tl = 1;
1140  }
1141  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl)
1142  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + tl];
1143  }
1144 
1145  vpx_usec_timer_start(&timer);
1146  res = vpx_svc_encode(
1147  &svc_ctx, &encoder, (end_of_stream ? NULL : &raw), pts, frame_duration,
1148  svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
1149  vpx_usec_timer_mark(&timer);
1150  cx_time += vpx_usec_timer_elapsed(&timer);
1151 
1152  fflush(stdout);
1153  if (res != VPX_CODEC_OK) {
1154  die_codec(&encoder, "Failed to encode frame");
1155  }
1156 
1157  while ((cx_pkt = vpx_codec_get_cx_data(&encoder, &iter)) != NULL) {
1158  switch (cx_pkt->kind) {
1159  case VPX_CODEC_CX_FRAME_PKT: {
1160  SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
1161  if (cx_pkt->data.frame.sz > 0) {
1162  vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf,
1163  cx_pkt->data.frame.sz,
1164  cx_pkt->data.frame.pts);
1165 #if OUTPUT_RC_STATS
1166  if (svc_ctx.output_rc_stat) {
1167  svc_output_rc_stats(&encoder, &enc_cfg, &layer_id, cx_pkt, &rc,
1168  outfile, frame_cnt, framerate);
1169  }
1170 #endif
1171  }
1172  /*
1173  printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
1174  !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
1175  (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
1176  */
1177  if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
1178  si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
1179  ++frames_received;
1180 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
1181  if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf,
1182  (unsigned int)cx_pkt->data.frame.sz, NULL, 0))
1183  die_codec(&decoder, "Failed to decode frame.");
1184 #endif
1185  break;
1186  }
1187  case VPX_CODEC_STATS_PKT: {
1188  stats_write(&app_input.rc_stats, cx_pkt->data.twopass_stats.buf,
1189  cx_pkt->data.twopass_stats.sz);
1190  break;
1191  }
1192  default: { break; }
1193  }
1194 
1195 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
1196  vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id);
1197  // Don't look for mismatch on top spatial and top temporal layers as they
1198  // are non reference frames.
1199  if ((enc_cfg.ss_number_layers > 1 || enc_cfg.ts_number_layers > 1) &&
1200  !(layer_id.temporal_layer_id > 0 &&
1201  layer_id.temporal_layer_id == (int)enc_cfg.ts_number_layers - 1 &&
1202  cx_pkt->data.frame
1203  .spatial_layer_encoded[enc_cfg.ss_number_layers - 1])) {
1204  test_decode(&encoder, &decoder, frame_cnt, &mismatch_seen);
1205  }
1206 #endif
1207  }
1208 
1209  if (!end_of_stream) {
1210  ++frame_cnt;
1211  pts += frame_duration;
1212  }
1213  }
1214 
1215  printf("Processed %d frames\n", frame_cnt);
1216 
1217  close_input_file(&app_input.input_ctx);
1218 
1219 #if OUTPUT_RC_STATS
1220  if (svc_ctx.output_rc_stat) {
1221  printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
1222  printf("\n");
1223  }
1224 #endif
1225  if (vpx_codec_destroy(&encoder))
1226  die_codec(&encoder, "Failed to destroy codec");
1227  if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1);
1228  if (writer) {
1229  vpx_video_writer_close(writer);
1230  }
1231 #if OUTPUT_RC_STATS
1232  if (svc_ctx.output_rc_stat) {
1233  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
1234  vpx_video_writer_close(outfile[sl]);
1235  }
1236  }
1237 #endif
1238 #if CONFIG_INTERNAL_STATS
1239  if (mismatch_seen) {
1240  fprintf(f, "First mismatch occurred in frame %d\n", mismatch_seen);
1241  } else {
1242  fprintf(f, "No mismatch detected in recon buffers\n");
1243  }
1244  fclose(f);
1245 #endif
1246  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
1247  frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
1248  1000000 * (double)frame_cnt / (double)cx_time);
1249  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1250  vpx_img_free(&raw);
1251  }
1252  // display average size, psnr
1253  vpx_svc_dump_statistics(&svc_ctx);
1254  vpx_svc_release(&svc_ctx);
1255  return EXIT_SUCCESS;
1256 }
vpx_fixed_buf_t twopass_stats
Definition: vpx_encoder.h:182
unsigned int ts_number_layers
Number of temporal coding layers.
Definition: vpx_encoder.h:644
Codec control function to set encoder internal speed settings.
Definition: vp8cx.h:155
#define VPX_MAX_LAYERS
Definition: vpx_encoder.h:43
int reference_alt_ref[5]
Definition: vp8cx.h:836
Image Descriptor.
Definition: vpx_image.h:72
Describes the encoder algorithm interface to applications.
const char * vpx_codec_iface_name(vpx_codec_iface_t *iface)
Return the name for a given interface.
Codec control function to constrain the inter-layer prediction (prediction of lower spatial resolutio...
Definition: vp8cx.h:619
const char * vpx_codec_err_to_string(vpx_codec_err_t err)
Convert error number to printable string.
int lst_fb_idx[5]
Definition: vp8cx.h:826
Codec control function to set content type.
Definition: vp8cx.h:463
struct vpx_rational g_timebase
Stream timebase units.
Definition: vpx_encoder.h:343
Codec control function to set noise sensitivity.
Definition: vp8cx.h:421
unsigned int layer_target_bitrate[12]
Target bitrate for each spatial/temporal layer.
Definition: vpx_encoder.h:684
SVC_LAYER_DROP_MODE framedrop_mode
Definition: vp8cx.h:864
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: vpx_encoder.h:329
int den
Definition: vpx_encoder.h:220
Definition: vpx_encoder.h:148
int framedrop_thresh[5]
Definition: vp8cx.h:862
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: vpx_encoder.h:614
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: vpx_encoder.h:372
Encoder configuration structure.
Definition: vpx_encoder.h:268
int reference_golden[5]
Definition: vp8cx.h:835
The coded data for this stream is corrupt or incomplete.
Definition: vpx_codec.h:133
Codec control function to set row level multi-threading.
Definition: vp8cx.h:570
Codec control function to set Max data rate for Intra frames.
Definition: vp8cx.h:257
Encoder output packet.
Definition: vpx_encoder.h:159
void * buf
Definition: vpx_encoder.h:97
unsigned int ts_rate_decimator[5]
Frame rate decimation factor for each temporal layer.
Definition: vpx_encoder.h:658
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: vpx_encoder.h:605
Definition: vpx_encoder.h:226
vp9 svc frame dropping parameters.
Definition: vp8cx.h:861
unsigned int g_profile
Bitstream profile to use.
Definition: vpx_encoder.h:295
Definition: vpx_encoder.h:227
Codec control function to set number of tile columns.
Definition: vp8cx.h:351
#define VPX_IMG_FMT_HIGHBITDEPTH
Definition: vpx_image.h:35
struct vpx_codec_cx_pkt::@1::@2 frame
#define VPX_SS_MAX_LAYERS
Definition: vpx_encoder.h:46
vpx_image_t * vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
Definition: vpx_image.h:42
unsigned int d_w
Definition: vpx_image.h:83
#define vpx_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for vpx_codec_dec_init_ver()
Definition: vpx_decoder.h:143
unsigned int g_w
Width of the frame.
Definition: vpx_encoder.h:304
int reference_last[5]
Definition: vp8cx.h:834
int update_buffer_slot[5]
Definition: vp8cx.h:829
Codec control function to set adaptive quantization mode.
Definition: vp8cx.h:398
vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline)
Decode data.
Codec control function to get svc layer ID.
Definition: vp8cx.h:471
unsigned int g_h
Height of the frame.
Definition: vpx_encoder.h:313
enum vpx_codec_cx_pkt_kind kind
Definition: vpx_encoder.h:160
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: vpx_encoder.h:391
vp9 svc layer parameters
Definition: vp8cx.h:810
Operation completed without error.
Definition: vpx_codec.h:95
void vpx_img_free(vpx_image_t *img)
Close an image descriptor.
vpx_img_fmt_t fmt
Definition: vpx_image.h:73
unsigned int rc_target_bitrate
Target data rate.
Definition: vpx_encoder.h:460
#define VPX_DL_REALTIME
deadline parameter analogous to VPx REALTIME mode.
Definition: vpx_encoder.h:830
int num
Definition: vpx_encoder.h:219
Definition: vpx_codec.h:223
vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, unsigned int usage)
Get a default configuration.
Codec control function to set the frame flags and buffer indices for spatial layers. The frame flags and buffer indices are set using the struct vpx_svc_ref_frame_config defined below.
Definition: vp8cx.h:545
enum vpx_enc_pass g_pass
Multi-pass Encoding Mode.
Definition: vpx_encoder.h:358
Codec control function to set mode and thresholds for frame dropping in SVC. Drop frame thresholds ar...
Definition: vp8cx.h:628
#define VPX_DL_GOOD_QUALITY
deadline parameter analogous to VPx GOOD QUALITY mode.
Definition: vpx_encoder.h:832
unsigned int ss_number_layers
Number of spatial coding layers.
Definition: vpx_encoder.h:624
vpx_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: vpx_encoder.h:321
Provides definitions for using VP8 or VP9 encoder algorithm within the vpx Codec Interface.
Bypass mode. Used when application needs to control temporal layering. This will only work when the n...
Definition: vp8cx.h:716
Definition: vp8cx.h:849
vpx_codec_err_t
Algorithm return codes.
Definition: vpx_codec.h:93
const vpx_codec_cx_pkt_t * vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter)
Encoded data iterator.
union vpx_codec_cx_pkt::@1 data
int temporal_layering_mode
Temporal layering mode indicating which temporal layering scheme to use.
Definition: vpx_encoder.h:693
vpx_fixed_buf_t rc_twopass_stats_in
Two-pass stats buffer.
Definition: vpx_encoder.h:447
VP9 specific reference frame data struct.
Definition: vp8.h:110
int temporal_layer_id
Definition: vp8cx.h:813
int max_consec_drop
Definition: vp8cx.h:865
Definition: vpx_encoder.h:234
int idx
Definition: vp8.h:111
#define vpx_codec_control(ctx, id, data)
vpx_codec_control wrapper macro
Definition: vpx_codec.h:407
vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx)
Destroy a codec instance.
unsigned int d_h
Definition: vpx_image.h:84
size_t sz
Definition: vpx_encoder.h:98
Definition: vpx_codec.h:221
vp9 svc frame flag parameters.
Definition: vp8cx.h:825
vpx_codec_err_t err
Definition: vpx_codec.h:203
Definition: vp8.h:55
Codec control function to set the threshold for MBs treated static.
Definition: vp8cx.h:188
int64_t duration[5]
Definition: vp8cx.h:837
#define VPX_FRAME_IS_KEY
Definition: vpx_encoder.h:116
Definition: vpx_codec.h:222
int alt_fb_idx[5]
Definition: vp8cx.h:828
const void * vpx_codec_iter_t
Iterator.
Definition: vpx_codec.h:190
Definition: vpx_encoder.h:147
unsigned int rc_2pass_vbr_maxsection_pct
Two-pass mode per-GOP maximum bitrate.
Definition: vpx_encoder.h:577
vpx_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: vpx_encoder.h:351
unsigned int rc_2pass_vbr_minsection_pct
Two-pass mode per-GOP minimum bitrate.
Definition: vpx_encoder.h:570
int gld_fb_idx[5]
Definition: vp8cx.h:827
Codec control function to set svc layer for spatial and temporal.
Definition: vp8cx.h:453
enum vpx_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: vpx_encoder.h:440
Definition: vpx_encoder.h:225
Codec context structure.
Definition: vpx_codec.h:200