24 #include "../tools_common.h"
25 #include "../video_writer.h"
27 #include "../vpx_ports/vpx_timer.h"
28 #include "vpx/svc_context.h"
31 #include "../vpxstats.h"
32 #include "vp9/encoder/vp9_encoder.h"
33 #define OUTPUT_RC_STATS 1
35 static const arg_def_t skip_frames_arg =
36 ARG_DEF(
"s",
"skip-frames", 1,
"input frames to skip");
37 static const arg_def_t frames_arg =
38 ARG_DEF(
"f",
"frames", 1,
"number of frames to encode");
39 static const arg_def_t threads_arg =
40 ARG_DEF(
"th",
"threads", 1,
"number of threads to use");
42 static const arg_def_t output_rc_stats_arg =
43 ARG_DEF(
"rcstat",
"output_rc_stats", 1,
"output rc stats");
45 static const arg_def_t width_arg = ARG_DEF(
"w",
"width", 1,
"source width");
46 static const arg_def_t height_arg = ARG_DEF(
"h",
"height", 1,
"source height");
47 static const arg_def_t timebase_arg =
48 ARG_DEF(
"t",
"timebase", 1,
"timebase (num/den)");
49 static const arg_def_t bitrate_arg = ARG_DEF(
50 "b",
"target-bitrate", 1,
"encoding bitrate, in kilobits per second");
51 static const arg_def_t spatial_layers_arg =
52 ARG_DEF(
"sl",
"spatial-layers", 1,
"number of spatial SVC layers");
53 static const arg_def_t temporal_layers_arg =
54 ARG_DEF(
"tl",
"temporal-layers", 1,
"number of temporal SVC layers");
55 static const arg_def_t temporal_layering_mode_arg =
56 ARG_DEF(
"tlm",
"temporal-layering-mode", 1,
57 "temporal layering scheme."
58 "VP9E_TEMPORAL_LAYERING_MODE");
59 static const arg_def_t kf_dist_arg =
60 ARG_DEF(
"k",
"kf-dist", 1,
"number of frames between keyframes");
61 static const arg_def_t scale_factors_arg =
62 ARG_DEF(
"r",
"scale-factors", 1,
"scale factors (lowest to highest layer)");
63 static const arg_def_t passes_arg =
64 ARG_DEF(
"p",
"passes", 1,
"Number of passes (1/2)");
65 static const arg_def_t pass_arg =
66 ARG_DEF(NULL,
"pass", 1,
"Pass to execute (1/2)");
67 static const arg_def_t fpf_name_arg =
68 ARG_DEF(NULL,
"fpf", 1,
"First pass statistics file name");
69 static const arg_def_t min_q_arg =
70 ARG_DEF(NULL,
"min-q", 1,
"Minimum quantizer");
71 static const arg_def_t max_q_arg =
72 ARG_DEF(NULL,
"max-q", 1,
"Maximum quantizer");
73 static const arg_def_t min_bitrate_arg =
74 ARG_DEF(NULL,
"min-bitrate", 1,
"Minimum bitrate");
75 static const arg_def_t max_bitrate_arg =
76 ARG_DEF(NULL,
"max-bitrate", 1,
"Maximum bitrate");
77 static const arg_def_t lag_in_frame_arg =
78 ARG_DEF(NULL,
"lag-in-frames", 1,
79 "Number of frame to input before "
80 "generating any outputs");
81 static const arg_def_t rc_end_usage_arg =
82 ARG_DEF(NULL,
"rc-end-usage", 1,
"0 - 3: VBR, CBR, CQ, Q");
83 static const arg_def_t speed_arg =
84 ARG_DEF(
"sp",
"speed", 1,
"speed configuration");
85 static const arg_def_t aqmode_arg =
86 ARG_DEF(
"aq",
"aqmode", 1,
"aq-mode off/on");
87 static const arg_def_t bitrates_arg =
88 ARG_DEF(
"bl",
"bitrates", 1,
"bitrates[sl * num_tl + tl]");
90 #if CONFIG_VP9_HIGHBITDEPTH
91 static const struct arg_enum_list bitdepth_enum[] = {
95 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
96 "d",
"bit-depth", 1,
"Bit depth for codec 8, 10 or 12. ", bitdepth_enum);
97 #endif // CONFIG_VP9_HIGHBITDEPTH
99 static const arg_def_t *svc_args[] = { &frames_arg,
115 &temporal_layers_arg,
116 &temporal_layering_mode_arg,
121 &output_rc_stats_arg,
124 #if CONFIG_VP9_HIGHBITDEPTH
132 static const uint32_t default_frames_to_skip = 0;
133 static const uint32_t default_frames_to_code = 60 * 60;
134 static const uint32_t default_width = 1920;
135 static const uint32_t default_height = 1080;
136 static const uint32_t default_timebase_num = 1;
137 static const uint32_t default_timebase_den = 60;
138 static const uint32_t default_bitrate = 1000;
139 static const uint32_t default_spatial_layers = 5;
140 static const uint32_t default_temporal_layers = 1;
141 static const uint32_t default_kf_dist = 100;
142 static const uint32_t default_temporal_layering_mode = 0;
143 static const uint32_t default_output_rc_stats = 0;
144 static const int32_t default_speed = -1;
145 static const uint32_t default_threads = 0;
148 const char *input_filename;
149 const char *output_filename;
150 uint32_t frames_to_code;
151 uint32_t frames_to_skip;
152 struct VpxInputContext input_ctx;
158 static const char *exec_name;
160 void usage_exit(
void) {
161 fprintf(stderr,
"Usage: %s <options> input_filename output_filename\n",
163 fprintf(stderr,
"Options:\n");
164 arg_show_usage(stderr, svc_args);
168 static void parse_command_line(
int argc,
const char **argv_,
169 AppInput *app_input, SvcContext *svc_ctx,
178 const char *fpf_file_name = NULL;
179 unsigned int min_bitrate = 0;
180 unsigned int max_bitrate = 0;
181 char string_options[1024] = { 0 };
184 svc_ctx->log_level = SVC_LOG_DEBUG;
185 svc_ctx->spatial_layers = default_spatial_layers;
186 svc_ctx->temporal_layers = default_temporal_layers;
187 svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
189 svc_ctx->output_rc_stat = default_output_rc_stats;
191 svc_ctx->speed = default_speed;
192 svc_ctx->threads = default_threads;
200 enc_cfg->
g_w = default_width;
201 enc_cfg->
g_h = default_height;
210 app_input->frames_to_code = default_frames_to_code;
211 app_input->frames_to_skip = default_frames_to_skip;
214 argv = argv_dup(argc - 1, argv_ + 1);
215 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
218 if (arg_match(&arg, &frames_arg, argi)) {
219 app_input->frames_to_code = arg_parse_uint(&arg);
220 }
else if (arg_match(&arg, &width_arg, argi)) {
221 enc_cfg->
g_w = arg_parse_uint(&arg);
222 }
else if (arg_match(&arg, &height_arg, argi)) {
223 enc_cfg->
g_h = arg_parse_uint(&arg);
224 }
else if (arg_match(&arg, &timebase_arg, argi)) {
225 enc_cfg->
g_timebase = arg_parse_rational(&arg);
226 }
else if (arg_match(&arg, &bitrate_arg, argi)) {
228 }
else if (arg_match(&arg, &skip_frames_arg, argi)) {
229 app_input->frames_to_skip = arg_parse_uint(&arg);
230 }
else if (arg_match(&arg, &spatial_layers_arg, argi)) {
231 svc_ctx->spatial_layers = arg_parse_uint(&arg);
232 }
else if (arg_match(&arg, &temporal_layers_arg, argi)) {
233 svc_ctx->temporal_layers = arg_parse_uint(&arg);
235 }
else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
236 svc_ctx->output_rc_stat = arg_parse_uint(&arg);
238 }
else if (arg_match(&arg, &speed_arg, argi)) {
239 svc_ctx->speed = arg_parse_uint(&arg);
240 }
else if (arg_match(&arg, &aqmode_arg, argi)) {
241 svc_ctx->aqmode = arg_parse_uint(&arg);
242 }
else if (arg_match(&arg, &threads_arg, argi)) {
243 svc_ctx->threads = arg_parse_uint(&arg);
244 }
else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
247 if (svc_ctx->temporal_layering_mode) {
250 }
else if (arg_match(&arg, &kf_dist_arg, argi)) {
253 }
else if (arg_match(&arg, &scale_factors_arg, argi)) {
254 snprintf(string_options,
sizeof(string_options),
"%s scale-factors=%s",
255 string_options, arg.val);
256 }
else if (arg_match(&arg, &bitrates_arg, argi)) {
257 snprintf(string_options,
sizeof(string_options),
"%s bitrates=%s",
258 string_options, arg.val);
259 }
else if (arg_match(&arg, &passes_arg, argi)) {
260 passes = arg_parse_uint(&arg);
261 if (passes < 1 || passes > 2) {
262 die(
"Error: Invalid number of passes (%d)\n", passes);
264 }
else if (arg_match(&arg, &pass_arg, argi)) {
265 pass = arg_parse_uint(&arg);
266 if (pass < 1 || pass > 2) {
267 die(
"Error: Invalid pass selected (%d)\n", pass);
269 }
else if (arg_match(&arg, &fpf_name_arg, argi)) {
270 fpf_file_name = arg.val;
271 }
else if (arg_match(&arg, &min_q_arg, argi)) {
272 snprintf(string_options,
sizeof(string_options),
"%s min-quantizers=%s",
273 string_options, arg.val);
274 }
else if (arg_match(&arg, &max_q_arg, argi)) {
275 snprintf(string_options,
sizeof(string_options),
"%s max-quantizers=%s",
276 string_options, arg.val);
277 }
else if (arg_match(&arg, &min_bitrate_arg, argi)) {
278 min_bitrate = arg_parse_uint(&arg);
279 }
else if (arg_match(&arg, &max_bitrate_arg, argi)) {
280 max_bitrate = arg_parse_uint(&arg);
281 }
else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
283 }
else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
285 #if CONFIG_VP9_HIGHBITDEPTH
286 }
else if (arg_match(&arg, &bitdepth_arg, argi)) {
287 enc_cfg->
g_bit_depth = arg_parse_enum_or_int(&arg);
302 die(
"Error: Invalid bit depth selected (%d)\n", enc_cfg->
g_bit_depth);
305 #endif // CONFIG_VP9_HIGHBITDEPTH
312 if (strlen(string_options) > 0)
313 vpx_svc_set_options(svc_ctx, string_options + 1);
315 if (passes == 0 || passes == 1) {
317 fprintf(stderr,
"pass is ignored since there's only one pass\n");
322 die(
"pass must be specified when passes is 2\n");
325 if (fpf_file_name == NULL) {
326 die(
"fpf must be specified when passes is 2\n");
331 if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
332 fatal(
"Failed to open statistics store");
336 if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
337 fatal(
"Failed to open statistics store");
341 app_input->passes = passes;
342 app_input->pass = pass;
346 if (min_bitrate > 0) {
350 if (max_bitrate > 0) {
357 for (argi = argv; *argi; ++argi)
358 if (argi[0][0] ==
'-' && strlen(argi[0]) > 1)
359 die(
"Error: Unrecognized option %s\n", *argi);
361 if (argv[0] == NULL || argv[1] == 0) {
364 app_input->input_filename = argv[0];
365 app_input->output_filename = argv[1];
368 if (enc_cfg->
g_w < 16 || enc_cfg->
g_w % 2 || enc_cfg->
g_h < 16 ||
370 die(
"Invalid resolution: %d x %d\n", enc_cfg->
g_w, enc_cfg->
g_h);
373 "Codec %s\nframes: %d, skip: %d\n"
375 "width %d, height: %d,\n"
376 "num: %d, den: %d, bitrate: %d,\n"
379 app_input->frames_to_skip, svc_ctx->spatial_layers, enc_cfg->
g_w,
386 struct RateControlStats {
405 double avg_st_encoding_bitrate;
407 double variance_st_encoding_bitrate;
416 static void set_rate_control_stats(
struct RateControlStats *rc,
427 rc->layer_framerate[layer] = framerate;
431 rc->layer_pfb[layer] =
434 (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
437 rc->layer_framerate[layer];
439 rc->layer_input_frames[layer] = 0;
440 rc->layer_enc_frames[layer] = 0;
441 rc->layer_tot_enc_frames[layer] = 0;
442 rc->layer_encoding_bitrate[layer] = 0.0;
443 rc->layer_avg_frame_size[layer] = 0.0;
444 rc->layer_avg_rate_mismatch[layer] = 0.0;
447 rc->window_count = 0;
448 rc->window_size = 15;
449 rc->avg_st_encoding_bitrate = 0.0;
450 rc->variance_st_encoding_bitrate = 0.0;
453 static void printout_rate_control_summary(
struct RateControlStats *rc,
457 double perc_fluctuation = 0.0;
458 int tot_num_frames = 0;
459 printf(
"Total number of processed frames: %d\n\n", frame_cnt - 1);
460 printf(
"Rate control layer stats for sl%d tl%d layer(s):\n\n",
466 const int num_dropped =
468 ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer])
469 : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] -
471 tot_num_frames += rc->layer_input_frames[layer];
472 rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
473 rc->layer_encoding_bitrate[layer] /
475 rc->layer_avg_frame_size[layer] =
476 rc->layer_avg_frame_size[layer] / rc->layer_enc_frames[layer];
477 rc->layer_avg_rate_mismatch[layer] = 100.0 *
478 rc->layer_avg_rate_mismatch[layer] /
479 rc->layer_enc_frames[layer];
480 printf(
"For layer#: sl%d tl%d \n", sl, tl);
481 printf(
"Bitrate (target vs actual): %d %f.0 kbps\n",
483 rc->layer_encoding_bitrate[layer]);
484 printf(
"Average frame size (target vs actual): %f %f bits\n",
485 rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
486 printf(
"Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[layer]);
488 "Number of input frames, encoded (non-key) frames, "
489 "and percent dropped frames: %d %d %f.0 \n",
490 rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
491 100.0 * num_dropped / rc->layer_input_frames[layer]);
495 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
496 rc->variance_st_encoding_bitrate =
497 rc->variance_st_encoding_bitrate / rc->window_count -
498 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
499 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
500 rc->avg_st_encoding_bitrate;
501 printf(
"Short-time stats, for window of %d frames: \n", rc->window_size);
502 printf(
"Average, rms-variance, and percent-fluct: %f %f %f \n",
503 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
505 if (frame_cnt != tot_num_frames)
506 die(
"Error: Number of input frames not equal to output encoded frames != "
507 "%d tot_num_frames = %d\n",
508 frame_cnt, tot_num_frames);
511 vpx_codec_err_t parse_superframe_index(
const uint8_t *data,
size_t data_sz,
512 uint64_t sizes[8],
int *count) {
521 marker = *(data + data_sz - 1);
524 if ((marker & 0xe0) == 0xc0) {
525 const uint32_t frames = (marker & 0x7) + 1;
526 const uint32_t mag = ((marker >> 3) & 0x3) + 1;
527 const size_t index_sz = 2 + mag * frames;
534 const uint8_t marker2 = *(data + data_sz - index_sz);
545 const uint8_t *x = &data[data_sz - index_sz + 1];
547 for (i = 0; i < frames; ++i) {
548 uint32_t this_sz = 0;
550 for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
564 void set_frame_flags_bypass_mode(
int sl,
int tl,
int num_spatial_layers,
567 for (sl = 0; sl < num_spatial_layers; ++sl) {
583 }
else if (tl == 1) {
600 }
else if (tl == 1) {
602 ref_frame_config->
gld_fb_idx[sl] = num_spatial_layers + sl - 1;
603 ref_frame_config->
alt_fb_idx[sl] = num_spatial_layers + sl;
608 int main(
int argc,
const char **argv) {
610 VpxVideoWriter *writer = NULL;
616 uint32_t frame_cnt = 0;
620 int frame_duration = 1;
622 int end_of_stream = 0;
623 int frames_received = 0;
626 struct RateControlStats rc;
630 double sum_bitrate = 0.0;
631 double sum_bitrate2 = 0.0;
632 double framerate = 30.0;
634 struct vpx_usec_timer timer;
636 memset(&svc_ctx, 0,
sizeof(svc_ctx));
637 svc_ctx.log_print = 1;
639 parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
642 #if CONFIG_VP9_HIGHBITDEPTH
646 enc_cfg.
g_w, enc_cfg.
g_h, 32)) {
647 die(
"Failed to allocate image %dx%d\n", enc_cfg.
g_w, enc_cfg.
g_h);
651 die(
"Failed to allocate image %dx%d\n", enc_cfg.
g_w, enc_cfg.
g_h);
653 #endif // CONFIG_VP9_HIGHBITDEPTH
655 if (!(infile = fopen(app_input.input_filename,
"rb")))
656 die(
"Failed to open %s for reading\n", app_input.input_filename);
659 if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
661 die(
"Failed to initialize encoder\n");
664 if (svc_ctx.output_rc_stat) {
665 set_rate_control_stats(&rc, &enc_cfg);
670 info.codec_fourcc = VP9_FOURCC;
674 if (!(app_input.passes == 2 && app_input.pass == 1)) {
677 vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);
679 die(
"Failed to open %s for writing\n", app_input.output_filename);
684 if (svc_ctx.output_rc_stat) {
686 char file_name[PATH_MAX];
688 snprintf(file_name,
sizeof(file_name),
"%s_t%d.ivf",
689 app_input.output_filename, tl);
690 outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
691 if (!outfile[tl]) die(
"Failed to open %s for writing", file_name);
697 for (i = 0; i < app_input.frames_to_skip; ++i) vpx_img_read(&raw, infile);
699 if (svc_ctx.speed != -1)
701 if (svc_ctx.threads) {
703 if (svc_ctx.threads > 1)
708 if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
710 if (svc_ctx.speed >= 5)
715 while (!end_of_stream) {
718 if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
731 layer_id.spatial_layer_id = 0;
733 if (frame_cnt % 2 == 0)
734 layer_id.temporal_layer_id = 0;
736 layer_id.temporal_layer_id = 1;
741 set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
742 svc_ctx.spatial_layers, frame_cnt == 0,
750 layer_id.temporal_layer_id];
754 vpx_usec_timer_start(&timer);
755 res = vpx_svc_encode(
756 &svc_ctx, &codec, (end_of_stream ? NULL : &raw), pts, frame_duration,
758 vpx_usec_timer_mark(&timer);
759 cx_time += vpx_usec_timer_elapsed(&timer);
761 printf(
"%s", vpx_svc_get_message(&svc_ctx));
764 die_codec(&codec,
"Failed to encode frame");
768 switch (cx_pkt->
kind) {
770 SvcInternal_t *
const si = (SvcInternal_t *)svc_ctx.internal;
776 vpx_video_writer_write_frame(writer, cx_pkt->
data.
frame.buf,
781 if (svc_ctx.output_rc_stat) {
783 parse_superframe_index(cx_pkt->
data.
frame.buf,
791 if (svc_ctx.temporal_layering_mode !=
795 layer_id.temporal_layer_id];
798 for (tl = layer_id.temporal_layer_id;
800 vpx_video_writer_write_frame(
806 for (tl = layer_id.temporal_layer_id;
809 ++rc.layer_tot_enc_frames[layer];
810 rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
813 if (tl == (
unsigned int)layer_id.temporal_layer_id &&
815 rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
816 rc.layer_avg_rate_mismatch[layer] +=
817 fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
819 ++rc.layer_enc_frames[layer];
827 if (frame_cnt > (
unsigned int)rc.window_size) {
828 tl = layer_id.temporal_layer_id;
830 sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
832 if (frame_cnt % rc.window_size == 0) {
833 rc.window_count += 1;
834 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
835 rc.variance_st_encoding_bitrate +=
836 (sum_bitrate / rc.window_size) *
837 (sum_bitrate / rc.window_size);
844 (
unsigned int)(rc.window_size + rc.window_size / 2)) {
845 tl = layer_id.temporal_layer_id;
847 sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
850 if (frame_cnt > (
unsigned int)(2 * rc.window_size) &&
851 frame_cnt % rc.window_size == 0) {
852 rc.window_count += 1;
853 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
854 rc.variance_st_encoding_bitrate +=
855 (sum_bitrate2 / rc.window_size) *
856 (sum_bitrate2 / rc.window_size);
869 si->bytes_sum[0] += (int)cx_pkt->
data.
frame.sz;
882 if (!end_of_stream) {
884 pts += frame_duration;
893 --rc.layer_input_frames[layer];
897 printf(
"Processed %d frames\n", frame_cnt);
900 if (svc_ctx.output_rc_stat) {
901 printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
906 if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1);
908 vpx_video_writer_close(writer);
911 if (svc_ctx.output_rc_stat) {
913 vpx_video_writer_close(outfile[tl]);
917 printf(
"Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
918 frame_cnt, 1000 * (
float)cx_time / (
double)(frame_cnt * 1000000),
919 1000000 * (
double)frame_cnt / (
double)cx_time);
922 printf(
"%s", vpx_svc_dump_statistics(&svc_ctx));
923 vpx_svc_release(&svc_ctx);