AOMedia AV1 Codec
speed_features.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_SPEED_FEATURES_H_
13 #define AOM_AV1_ENCODER_SPEED_FEATURES_H_
14 
15 #include "av1/common/enums.h"
16 #include "av1/encoder/enc_enums.h"
17 #include "av1/encoder/mcomp.h"
18 #include "av1/encoder/encodemb.h"
19 
20 #ifdef __cplusplus
21 extern "C" {
22 #endif
23 
27 #define MAX_MESH_STEP 4
28 
29 typedef struct MESH_PATTERN {
30  int range;
31  int interval;
32 } MESH_PATTERN;
33 
34 enum {
35  GM_FULL_SEARCH,
36  GM_REDUCED_REF_SEARCH_SKIP_L2_L3,
37  GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2,
38  GM_DISABLE_SEARCH
39 } UENUM1BYTE(GM_SEARCH_TYPE);
40 
41 enum {
42  DIST_WTD_COMP_ENABLED,
43  DIST_WTD_COMP_SKIP_MV_SEARCH,
44  DIST_WTD_COMP_DISABLED,
45 } UENUM1BYTE(DIST_WTD_COMP_FLAG);
46 
47 enum {
48  INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
49  (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) |
50  (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) |
51  (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | (1 << PAETH_PRED),
52  UV_INTRA_ALL =
53  (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) |
54  (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | (1 << UV_D113_PRED) |
55  (1 << UV_D157_PRED) | (1 << UV_D203_PRED) | (1 << UV_D67_PRED) |
56  (1 << UV_SMOOTH_PRED) | (1 << UV_SMOOTH_V_PRED) |
57  (1 << UV_SMOOTH_H_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED),
58  UV_INTRA_DC = (1 << UV_DC_PRED),
59  UV_INTRA_DC_CFL = (1 << UV_DC_PRED) | (1 << UV_CFL_PRED),
60  UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED),
61  UV_INTRA_DC_PAETH_CFL =
62  (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED),
63  UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED),
64  UV_INTRA_DC_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) |
65  (1 << UV_H_PRED) | (1 << UV_CFL_PRED),
66  UV_INTRA_DC_PAETH_H_V = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) |
67  (1 << UV_V_PRED) | (1 << UV_H_PRED),
68  UV_INTRA_DC_PAETH_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) |
69  (1 << UV_V_PRED) | (1 << UV_H_PRED) |
70  (1 << UV_CFL_PRED),
71  INTRA_DC = (1 << DC_PRED),
72  INTRA_DC_TM = (1 << DC_PRED) | (1 << PAETH_PRED),
73  INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
74  INTRA_DC_H_V_SMOOTH =
75  (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << SMOOTH_PRED),
76  INTRA_DC_PAETH_H_V =
77  (1 << DC_PRED) | (1 << PAETH_PRED) | (1 << V_PRED) | (1 << H_PRED)
78 };
79 
80 enum {
81  INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) |
82  (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) |
83  (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) |
84  (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) | (1 << GLOBAL_GLOBALMV),
85  INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) |
86  (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) |
87  (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
88  (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
89  (1 << NEAR_NEARMV),
90 };
91 
92 enum {
93  DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
94  (1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST),
95 
96  DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
97 
98  DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
99 
100  LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
101  (1 << THR_ALTR) | (1 << THR_GOLD)
102 };
103 
104 enum {
105  TXFM_CODING_SF = 1,
106  INTER_PRED_SF = 2,
107  INTRA_PRED_SF = 4,
108  PARTITION_SF = 8,
109  LOOP_FILTER_SF = 16,
110  RD_SKIP_SF = 32,
111  RESERVE_2_SF = 64,
112  RESERVE_3_SF = 128,
113 } UENUM1BYTE(DEV_SPEED_FEATURES);
114 
115 /* This enumeration defines when the rate control recode loop will be
116  * enabled.
117  */
118 enum {
119  /*
120  * No recodes allowed
121  */
122  DISALLOW_RECODE = 0,
123  /*
124  * Allow recode only for KF/ARF/GF frames
125  */
126  ALLOW_RECODE_KFARFGF = 1,
127  /*
128  * Allow recode for all frame types based on bitrate constraints.
129  */
130  ALLOW_RECODE = 2,
131 } UENUM1BYTE(RECODE_LOOP_TYPE);
132 
133 enum {
134  SUBPEL_TREE = 0,
135  SUBPEL_TREE_PRUNED = 1, // Prunes 1/2-pel searches
136  SUBPEL_TREE_PRUNED_MORE = 2, // Prunes 1/2-pel searches more aggressively
137 } UENUM1BYTE(SUBPEL_SEARCH_METHODS);
138 
139 enum {
140  // Try the full image with different values.
141  LPF_PICK_FROM_FULL_IMAGE,
142  // Try the full image filter search with non-dual filter only.
143  LPF_PICK_FROM_FULL_IMAGE_NON_DUAL,
144  // Try a small portion of the image with different values.
145  LPF_PICK_FROM_SUBIMAGE,
146  // Estimate the level based on quantizer and frame type
147  LPF_PICK_FROM_Q,
148  // Pick 0 to disable LPF if LPF was enabled last frame
149  LPF_PICK_MINIMAL_LPF
150 } UENUM1BYTE(LPF_PICK_METHOD);
156 typedef enum {
165  CDEF_PICK_METHODS
167 
169 enum {
170  // Terminate search early based on distortion so far compared to
171  // qp step, distortion in the neighborhood of the frame, etc.
172  FLAG_EARLY_TERMINATE = 1 << 0,
173 
174  // Skips comp inter modes if the best so far is an intra mode.
175  FLAG_SKIP_COMP_BESTINTRA = 1 << 1,
176 
177  // Skips oblique intra modes if the best so far is an inter mode.
178  FLAG_SKIP_INTRA_BESTINTER = 1 << 3,
179 
180  // Skips oblique intra modes at angles 27, 63, 117, 153 if the best
181  // intra so far is not one of the neighboring directions.
182  FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4,
183 
184  // Skips intra modes other than DC_PRED if the source variance is small
185  FLAG_SKIP_INTRA_LOWVAR = 1 << 5,
186 } UENUM1BYTE(MODE_SEARCH_SKIP_LOGIC);
187 
188 enum {
189  // No tx type pruning
190  TX_TYPE_PRUNE_0 = 0,
191  // adaptively prunes the least perspective tx types out of all 16
192  // (tuned to provide negligible quality loss)
193  TX_TYPE_PRUNE_1 = 1,
194  // similar, but applies much more aggressive pruning to get better speed-up
195  TX_TYPE_PRUNE_2 = 2,
196  TX_TYPE_PRUNE_3 = 3,
197  // More aggressive pruning based on tx type score and allowed tx count
198  TX_TYPE_PRUNE_4 = 4,
199  TX_TYPE_PRUNE_5 = 5,
200 } UENUM1BYTE(TX_TYPE_PRUNE_MODE);
201 
202 enum {
203  // No reaction to rate control on a detected slide/scene change.
204  NO_DETECTION = 0,
205 
206  // Set to larger Q based only on the detected slide/scene change and
207  // current/past Q.
208  FAST_DETECTION_MAXQ = 1,
209 } UENUM1BYTE(OVERSHOOT_DETECTION_CBR);
210 
211 enum {
212  // Turns off multi-winner mode. So we will do txfm search on either all modes
213  // if winner mode is off, or we will only on txfm search on a single winner
214  // mode.
215  MULTI_WINNER_MODE_OFF = 0,
216 
217  // Limits the number of winner modes to at most 2
218  MULTI_WINNER_MODE_FAST = 1,
219 
220  // Uses the default number of winner modes, which is 3 for intra mode, and 1
221  // for inter mode.
222  MULTI_WINNER_MODE_DEFAULT = 2,
223 
224  // Maximum number of winner modes allowed.
225  MULTI_WINNER_MODE_LEVELS,
226 } UENUM1BYTE(MULTI_WINNER_MODE_TYPE);
227 
228 enum {
229  PRUNE_NEARMV_OFF = 0, // Turn off nearmv pruning
230  PRUNE_NEARMV_LEVEL1 = 1, // Prune nearmv for qindex (0-85)
231  PRUNE_NEARMV_LEVEL2 = 2, // Prune nearmv for qindex (0-170)
232  PRUNE_NEARMV_LEVEL3 = 3, // Prune nearmv more aggressively for qindex (0-170)
233  PRUNE_NEARMV_MAX = PRUNE_NEARMV_LEVEL3,
234 } UENUM1BYTE(PRUNE_NEARMV_LEVEL);
235 
236 typedef struct {
237  TX_TYPE_PRUNE_MODE prune_2d_txfm_mode;
238  int fast_intra_tx_type_search;
239 
240  // INT_MAX: Disable fast search.
241  // 1 - 1024: Probability threshold used for conditionally forcing tx type,
242  // during mode search.
243  // 0: Force tx type to be DCT_DCT unconditionally, during
244  // mode search.
245  int fast_inter_tx_type_prob_thresh;
246 
247  // Prune less likely chosen transforms for each intra mode. The speed
248  // feature ranges from 0 to 2, for different speed / compression trade offs.
249  int use_reduced_intra_txset;
250 
251  // Use a skip flag prediction model to detect blocks with skip = 1 early
252  // and avoid doing full TX type search for such blocks.
253  int use_skip_flag_prediction;
254 
255  // Threshold used by the ML based method to predict TX block split decisions.
256  int ml_tx_split_thresh;
257 
258  // skip remaining transform type search when we found the rdcost of skip is
259  // better than applying transform
260  int skip_tx_search;
261 
262  // Prune tx type search using previous frame stats.
263  int prune_tx_type_using_stats;
264  // Prune tx type search using estimated RDcost
265  int prune_tx_type_est_rd;
266 
267  // Flag used to control the winner mode processing for tx type pruning for
268  // inter blocks. It enables further tx type mode pruning based on ML model for
269  // mode evaluation and disables tx type mode pruning for winner mode
270  // processing.
271  int winner_mode_tx_type_pruning;
272 } TX_TYPE_SEARCH;
273 
274 enum {
275  // Search partitions using RD criterion
276  SEARCH_PARTITION,
277 
278  // Always use a fixed size partition
279  FIXED_PARTITION,
280 
281  // Partition using source variance
282  VAR_BASED_PARTITION,
283 
284 #if CONFIG_RT_ML_PARTITIONING
285  // Partition using ML model
286  ML_BASED_PARTITION
287 #endif
288 } UENUM1BYTE(PARTITION_SEARCH_TYPE);
289 
290 enum {
291  NOT_IN_USE,
292  DIRECT_PRED,
293  RELAXED_PRED,
294  ADAPT_PRED
295 } UENUM1BYTE(MAX_PART_PRED_MODE);
296 
297 enum {
298  LAST_MV_DATA,
299  CURRENT_Q,
300  QTR_ONLY,
301 } UENUM1BYTE(MV_PREC_LOGIC);
302 
303 enum {
304  SUPERRES_AUTO_ALL, // Tries all possible superres ratios
305  SUPERRES_AUTO_DUAL, // Tries no superres and q-based superres ratios
306  SUPERRES_AUTO_SOLO, // Only apply the q-based superres ratio
307 } UENUM1BYTE(SUPERRES_AUTO_SEARCH_TYPE);
325 typedef enum {
332 
337 typedef enum {
338  NO_PRUNING = -1,
355 
359 typedef enum {
364 
369 typedef enum {
371  0,
373  1,
375  2,
377  3,
379  4,
382 
389 
393  RECODE_LOOP_TYPE recode_loop;
394 
400 
407  MV_PREC_LOGIC high_precision_mv_usage;
408 
417 
421  SUPERRES_AUTO_SEARCH_TYPE superres_auto_search_type;
422 
427 
432 
439 
451 
456 
461 
467 
469 typedef struct TPL_SPEED_FEATURES {
470  // GOP length adaptive decision.
471  // If set to 0, tpl model decides whether a shorter gf interval is better.
472  // If set to 1, tpl stats of ARFs from base layer, (base+1) layer and
473  // (base+2) layer decide whether a shorter gf interval is better.
474  // If set to 2, tpl stats of ARFs from base layer, (base+1) layer and GF boost
475  // decide whether a shorter gf interval is better.
476  // If set to 3, gop length adaptive decision is disabled.
477  int gop_length_decision_method;
478  // Prune the intra modes search by tpl.
479  // If set to 0, we will search all intra modes from DC_PRED to PAETH_PRED.
480  // If set to 1, we only search DC_PRED, V_PRED, and H_PRED.
481  int prune_intra_modes;
482  // This parameter controls which step in the n-step process we start at.
483  int reduce_first_step_size;
484  // Skip motion estimation based on the precision of center MVs and the
485  // difference between center MVs.
486  // If set to 0, motion estimation is skipped for duplicate center MVs
487  // (default). If set to 1, motion estimation is skipped for duplicate
488  // full-pixel center MVs. If set to 2, motion estimation is skipped if the
489  // difference between center MVs is less than the threshold.
490  int skip_alike_starting_mv;
491 
492  // When to stop subpel search.
493  SUBPEL_FORCE_STOP subpel_force_stop;
494 
495  // Which search method to use.
496  SEARCH_METHODS search_method;
497 
498  // Prune starting mvs in TPL based on sad scores.
499  int prune_starting_mv;
500 
501  // Not run TPL for filtered Key frame.
502  int disable_filtered_key_tpl;
503 
504  // Prune reference frames in TPL.
505  int prune_ref_frames_in_tpl;
506 
507  // Support compound predictions.
508  int allow_compound_pred;
509 
510  // Calculate rate and distortion based on Y plane only.
511  int use_y_only_rate_distortion;
512 } TPL_SPEED_FEATURES;
513 
514 typedef struct GLOBAL_MOTION_SPEED_FEATURES {
515  GM_SEARCH_TYPE gm_search_type;
516 
517  // During global motion estimation, prune remaining reference frames in a
518  // given direction(past/future), if the evaluated ref_frame in that direction
519  // yields gm_type as INVALID/TRANSLATION/IDENTITY
520  int prune_ref_frame_for_gm_search;
521 
522  // When the current GM type is set to ZEROMV, prune ZEROMV if its performance
523  // is worse than NEWMV under SSE metric.
524  // 0 : no pruning
525  // 1 : conservative pruning
526  // 2 : aggressive pruning
528 
529  // Disable global motion estimation based on stats of previous frames in the
530  // GF group
531  int disable_gm_search_based_on_stats;
532 } GLOBAL_MOTION_SPEED_FEATURES;
533 
534 typedef struct PARTITION_SPEED_FEATURES {
535  PARTITION_SEARCH_TYPE partition_search_type;
536 
537  // Used if partition_search_type = FIXED_PARTITION
538  BLOCK_SIZE fixed_partition_size;
539 
540  // Prune extended partition types search
541  // Can take values 0 - 2, 0 referring to no pruning, and 1 - 2 increasing
542  // aggressiveness of pruning in order.
543  int prune_ext_partition_types_search_level;
544 
545  // Prune part4 based on block size
546  int prune_part4_search;
547 
548  // Use a ML model to prune rectangular, ab and 4-way horz
549  // and vert partitions
550  int ml_prune_partition;
551 
552  // Use a ML model to adaptively terminate partition search after trying
553  // PARTITION_SPLIT. Can take values 0 - 2, 0 meaning not being enabled, and
554  // 1 - 2 increasing aggressiveness in order.
555  int ml_early_term_after_part_split_level;
556 
557  // Skip rectangular partition test when partition type none gives better
558  // rd than partition type split. Can take values 0 - 2, 0 referring to no
559  // skipping, and 1 - 2 increasing aggressiveness of skipping in order.
560  int less_rectangular_check_level;
561 
562  // Use square partition only beyond this block size.
563  BLOCK_SIZE use_square_partition_only_threshold;
564 
565  // Sets max square partition levels for this superblock based on
566  // motion vector and prediction error distribution produced from 16x16
567  // simple motion search
568  MAX_PART_PRED_MODE auto_max_partition_based_on_simple_motion;
569 
570  // Min and max square partition size we enable (block_size) as per auto
571  // min max, but also used by adjust partitioning, and pick_partitioning.
572  BLOCK_SIZE default_min_partition_size;
573  BLOCK_SIZE default_max_partition_size;
574 
575  // Sets level of adjustment of variance-based partitioning during
576  // rd_use_partition 0 - no partition adjustment, 1 - try to merge partitions
577  // for small blocks and high QP, 2 - try to merge partitions, 3 - try to merge
578  // and split leaf partitions and 0 - 3 decreasing aggressiveness in order.
579  int adjust_var_based_rd_partitioning;
580 
581  // Partition search early breakout thresholds.
582  int64_t partition_search_breakout_dist_thr;
583  int partition_search_breakout_rate_thr;
584 
585  // Thresholds for ML based partition search breakout.
586  int ml_partition_search_breakout_thresh[PARTITION_BLOCK_SIZES];
587 
588  // Aggressiveness levels for pruning split and rectangular partitions based on
589  // simple_motion_search. SIMPLE_AGG_LVL0 to SIMPLE_AGG_LVL3 correspond to
590  // simple motion search based pruning. QIDX_BASED_AGG_LVL1 corresponds to
591  // qindex based and simple motion search based pruning.
592  int simple_motion_search_prune_agg;
593 
594  // Perform simple_motion_search on each possible subblock and use it to prune
595  // PARTITION_HORZ and PARTITION_VERT.
596  int simple_motion_search_prune_rect;
597 
598  // Perform simple motion search before none_partition to decide if we
599  // want to remove all partitions other than PARTITION_SPLIT. If set to 0, this
600  // model is disabled. If set to 1, the model attempts to perform
601  // PARTITION_SPLIT only. If set to 2, the model also attempts to prune
602  // PARTITION_SPLIT.
603  int simple_motion_search_split;
604 
605  // Use features from simple_motion_search to terminate prediction block
606  // partition after PARTITION_NONE
607  int simple_motion_search_early_term_none;
608 
609  // Controls whether to reduce the number of motion search steps. If this is 0,
610  // then simple_motion_search has the same number of steps as
611  // single_motion_search (assuming no other speed features). Otherwise, reduce
612  // the number of steps by the value contained in this variable.
613  int simple_motion_search_reduce_search_steps;
614 
615  // This variable controls the maximum block size where intra blocks can be
616  // used in inter frames.
617  // TODO(aconverse): Fold this into one of the other many mode skips
618  BLOCK_SIZE max_intra_bsize;
619 
620  // Use CNN with luma pixels on source frame on each of the 64x64 subblock to
621  // perform partition pruning in intra frames.
622  // 0: No Pruning
623  // 1: Prune split and rectangular partitions only
624  // 2: Prune none, split and rectangular partitions
625  int intra_cnn_based_part_prune_level;
626 
627  // Disable extended partition search for lower block sizes.
628  int ext_partition_eval_thresh;
629 
630  // Disable rectangular partitions for larger block sizes.
631  int rect_partition_eval_thresh;
632 
633  // prune extended partition search
634  // 0 : no pruning
635  // 1 : prune 1:4 partition search using winner info from split partitions
636  // 2 : prune 1:4 and AB partition search using split and HORZ/VERT info
637  int prune_ext_part_using_split_info;
638 
639  // Prunt rectangular, AB and 4-way partition based on q index and block size
640  // 0 : no pruning
641  // 1 : prune sub_8x8 at very low quantizers
642  // 2 : prune all block size based on qindex
643  int prune_rectangular_split_based_on_qidx;
644 
645  // Terminate partition search for child partition,
646  // when NONE and SPLIT partition rd_costs are INT64_MAX.
647  int early_term_after_none_split;
648 
649  // Level used to adjust threshold for av1_ml_predict_breakout(). At lower
650  // levels, more conservative threshold is used, and value of 0 indicates
651  // av1_ml_predict_breakout() is disabled. Value of 3 corresponds to default
652  // case with no adjustment to lbd thresholds.
653  int ml_predict_breakout_level;
654 
655  // Prune sub_8x8 (BLOCK_4X4, BLOCK_4X8 and BLOCK_8X4) partitions.
656  // 0 : no pruning
657  // 1 : pruning based on neighbour block information
658  // 2 : prune always
659  int prune_sub_8x8_partition_level;
660 
661  // Prune rectangular split based on simple motion search split/no_split score.
662  // 0: disable pruning, 1: enable pruning
663  int simple_motion_search_rect_split;
664 
665  // The current encoder adopts a DFS search for block partitions.
666  // Therefore the mode selection and associated rdcost is ready for smaller
667  // blocks before the mode selection for some partition types.
668  // AB partition could use previous rd information and skip mode search.
669  // An example is:
670  //
671  // current block
672  // +---+---+
673  // | |
674  // + +
675  // | |
676  // +-------+
677  //
678  // SPLIT partition has been searched first before trying HORZ_A
679  // +---+---+
680  // | R | R |
681  // +---+---+
682  // | R | R |
683  // +---+---+
684  //
685  // HORZ_A
686  // +---+---+
687  // | | |
688  // +---+---+
689  // | |
690  // +-------+
691  //
692  // With this speed feature, the top two sub blocks can directly use rdcost
693  // searched in split partition, and the mode info is also copied from
694  // saved info. Similarly, the bottom rectangular block can also use
695  // the available information from previous rectangular search.
696  int reuse_prev_rd_results_for_part_ab;
697 
698  // Reuse the best prediction modes found in PARTITION_SPLIT and PARTITION_RECT
699  // when encoding PARTITION_AB.
700  int reuse_best_prediction_for_part_ab;
701 
702  // The current partition search records the best rdcost so far and uses it
703  // in mode search and transform search to early skip when some criteria is
704  // met. For example, when the current rdcost is larger than the best rdcost,
705  // or the model rdcost is larger than the best rdcost times some thresholds.
706  // By default, this feature is turned on to speed up the encoder partition
707  // search.
708  // If disabling it, at speed 0, 30 frames, we could get
709  // about -0.25% quality gain (psnr, ssim, vmaf), with about 13% slowdown.
710  int use_best_rd_for_pruning;
711 
712  // Skip evaluation of non-square partitions based on the corresponding NONE
713  // partition.
714  // 0: no pruning
715  // 1: prune extended partitions if NONE is skippable
716  // 2: on top of 1, prune rectangular partitions if NONE is inter, not a newmv
717  // mode and skippable
718  int skip_non_sq_part_based_on_none;
719 
720  // Disables 8x8 and below partitions for low quantizers.
721  int disable_8x8_part_based_on_qidx;
722 } PARTITION_SPEED_FEATURES;
723 
724 typedef struct MV_SPEED_FEATURES {
725  // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
726  SEARCH_METHODS search_method;
727 
728  // Enable the use of faster, less accurate mv search method on bsize >=
729  // BLOCK_32X32.
730  // TODO(chiyotsai@google.com): Take the clip's resolution and mv activity into
731  // account.
732  int use_bsize_dependent_search_method;
733 
734  // If this is set to 1, we limit the motion search range to 2 times the
735  // largest motion vector found in the last frame.
736  int auto_mv_step_size;
737 
738  // Subpel_search_method can only be subpel_tree which does a subpixel
739  // logarithmic search that keeps stepping at 1/2 pixel units until
740  // you stop getting a gain, and then goes on to 1/4 and repeats
741  // the same process. Along the way it skips many diagonals.
742  SUBPEL_SEARCH_METHODS subpel_search_method;
743 
744  // Maximum number of steps in logarithmic subpel search before giving up.
745  int subpel_iters_per_step;
746 
747  // When to stop subpel search.
748  SUBPEL_FORCE_STOP subpel_force_stop;
749 
750  // When to stop subpel search in simple motion search.
751  SUBPEL_FORCE_STOP simple_motion_subpel_force_stop;
752 
753  // If true, sub-pixel search uses the exact convolve function used for final
754  // encoding and decoding; otherwise, it uses bilinear interpolation.
755  SUBPEL_SEARCH_TYPE use_accurate_subpel_search;
756 
757  // Threshold for allowing exhaustive motion search.
758  int exhaustive_searches_thresh;
759 
760  // Pattern to be used for any exhaustive mesh searches (except intraBC ME).
761  MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
762 
763  // Pattern to be used for exhaustive mesh searches of intraBC ME.
764  MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_STEP];
765 
766  // Reduce single motion search range based on MV result of prior ref_mv_idx.
767  int reduce_search_range;
768 
769  // Prune mesh search.
770  PRUNE_MESH_SEARCH_LEVEL prune_mesh_search;
771 
772  // Use the rd cost around the best FULLPEL_MV to speed up subpel search
773  int use_fullpel_costlist;
774 
775  // Set the full pixel search level of obmc
776  // 0: obmc_full_pixel_diamond
777  // 1: obmc_refining_search_sad (faster)
778  int obmc_full_pixel_search_level;
779 
780  // Accurate full pixel motion search based on TPL stats.
781  int full_pixel_search_level;
782 
783  // Whether to downsample the rows in sad calculation during motion search.
784  // This is only active when there are at least 16 rows.
785  int use_downsampled_sad;
786 
787  // Enable/disable extensive joint motion search.
788  int disable_extensive_joint_motion_search;
789 
790  // Enable second best mv check in joint mv search.
791  // 0: allow second MV (use rd cost as the metric)
792  // 1: use var as the metric
793  // 2: disable second MV
794  int disable_second_mv;
795 
796  // Skips full pixel search based on start mv of prior ref_mv_idx.
797  int skip_fullpel_search_using_startmv;
798 } MV_SPEED_FEATURES;
799 
800 typedef struct INTER_MODE_SPEED_FEATURES {
801  // 2-pass inter mode model estimation where the preliminary pass skips
802  // transform search and uses a model to estimate rd, while the final pass
803  // computes the full transform search. Two types of models are supported:
804  // 0: not used
805  // 1: used with online dynamic rd model
806  // 2: used with static rd model
807  int inter_mode_rd_model_estimation;
808 
809  // Bypass transform search based on skip rd
810  int txfm_rd_gate_level;
811 
812  // Limit the inter mode tested in the RD loop
813  int reduce_inter_modes;
814 
815  // This variable is used to cap the maximum number of times we skip testing a
816  // mode to be evaluated. A high value means we will be faster.
817  int adaptive_rd_thresh;
818 
819  // Aggressively prune inter modes when best mode is skippable.
820  int prune_inter_modes_if_skippable;
821 
822  // Drop less likely to be picked reference frames in the RD search.
823  // Has seven levels for now: 0, 1, 2, 3, 4, 5 and 6 where higher levels prune
824  // more aggressively than lower ones. (0 means no pruning).
825  int selective_ref_frame;
826 
827  // Prune reference frames for rectangular partitions.
828  // 0 implies no pruning
829  // 1 implies prune for extended partition
830  // 2 implies prune horiz, vert and extended partition
831  int prune_ref_frame_for_rect_partitions;
832 
833  // Prune inter modes w.r.t past reference frames
834  // 0 no pruning
835  // 1 prune inter modes w.r.t ALTREF2 and ALTREF reference frames
836  // 2 prune inter modes w.r.t BWDREF, ALTREF2 and ALTREF reference frames
837  int alt_ref_search_fp;
838 
839  // Prune compound reference frames
840  // 0 no pruning
841  // 1 prune compound references which do not satisfy the two conditions:
842  // a) The references are at a nearest distance from the current frame in
843  // both past and future direction.
844  // b) The references have minimum pred_mv_sad in both past and future
845  // direction.
846  // 2 prune compound references except the one with nearest distance from the
847  // current frame in both past and future direction.
848  int prune_comp_ref_frames;
849 
850  // Skip the current ref_mv in NEW_MV mode based on mv, rate cost, etc.
851  // This speed feature equaling 0 means no skipping.
852  // If the speed feature equals 1 or 2, skip the current ref_mv in NEW_MV mode
853  // if we have already encountered ref_mv in the drl such that:
854  // 1. The other drl has the same mv during the SIMPLE_TRANSLATION search
855  // process as the current mv.
856  // 2. The rate needed to encode the current mv is larger than that for the
857  // other ref_mv.
858  // The speed feature equaling 1 means using subpel mv in the comparison.
859  // The speed feature equaling 2 means using fullpel mv in the comparison.
860  // If the speed feature >= 3, skip the current ref_mv in NEW_MV mode based on
861  // known full_mv bestsme and drl cost.
862  int skip_newmv_in_drl;
863 
864  // This speed feature checks duplicate ref MVs among NEARESTMV, NEARMV,
865  // GLOBALMV and skips NEARMV or GLOBALMV (in order) if a duplicate is found
866  // TODO(any): Instead of skipping repeated ref mv, use the recalculated
867  // rd-cost based on mode rate and skip the mode evaluation
868  int skip_repeated_ref_mv;
869 
870  // Flag used to control the ref_best_rd based gating for chroma
871  int perform_best_rd_based_gating_for_chroma;
872 
873  // Reuse the inter_intra_mode search result from NEARESTMV mode to other
874  // single ref modes
875  int reuse_inter_intra_mode;
876 
877  // prune wedge and compound segment approximate rd evaluation based on
878  // compound average modeled rd
879  int prune_comp_type_by_model_rd;
880 
881  // prune wedge and compound segment approximate rd evaluation based on
882  // compound average rd/ref_best_rd
883  int prune_comp_type_by_comp_avg;
884 
885  // Skip some ref frames in compound motion search by single motion search
886  // result. Has three levels for now: 0 referring to no skipping, and 1 - 3
887  // increasing aggressiveness of skipping in order.
888  // Note: The search order might affect the result. It assumes that the single
889  // reference modes are searched before compound modes. It is better to search
890  // same single inter mode as a group.
891  int prune_comp_search_by_single_result;
892 
893  // Instead of performing a full MV search, do a simple translation first
894  // and only perform a full MV search on the motion vectors that performed
895  // well.
896  int prune_mode_search_simple_translation;
897 
898  // Only search compound modes with at least one "good" reference frame.
899  // A reference frame is good if, after looking at its performance among
900  // the single reference modes, it is one of the two best performers.
901  int prune_compound_using_single_ref;
902 
903  // Skip extended compound mode (NEAREST_NEWMV, NEW_NEARESTMV, NEAR_NEWMV,
904  // NEW_NEARMV) using ref frames of above and left neighbor
905  // blocks.
906  // 0 : no pruning
907  // 1 : prune ext compound modes using neighbor blocks (less aggressiveness)
908  // 2 : prune ext compound modes using neighbor blocks (high aggressiveness)
909  // 3 : prune ext compound modes unconditionally (highest aggressiveness)
910  int prune_ext_comp_using_neighbors;
911 
912  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
913  int skip_ext_comp_nearmv_mode;
914 
915  // Skip extended compound mode when ref frame corresponding to NEWMV does not
916  // have NEWMV as single mode winner.
917  // 0 : no pruning
918  // 1 : prune extended compound mode (less aggressiveness)
919  // 2 : prune extended compound mode (high aggressiveness)
920  int prune_comp_using_best_single_mode_ref;
921 
922  // Skip NEARESTMV and NEARMV using weight computed in ref mv list population
923  int prune_nearest_near_mv_using_refmv_weight;
924 
925  // Based on previous ref_mv_idx search result, prune the following search.
926  int prune_ref_mv_idx_search;
927 
928  // Disable one sided compound modes.
929  int disable_onesided_comp;
930 
931  // Prune obmc search using previous frame stats.
932  // INT_MAX : disable obmc search
933  int prune_obmc_prob_thresh;
934 
935  // Prune warped motion search using previous frame stats.
936  int prune_warped_prob_thresh;
937 
938  // Variance threshold to enable/disable Interintra wedge search
939  unsigned int disable_interintra_wedge_var_thresh;
940 
941  // Variance threshold to enable/disable Interinter wedge search
942  unsigned int disable_interinter_wedge_var_thresh;
943 
944  // De-couple wedge and mode search during interintra RDO.
945  int fast_interintra_wedge_search;
946 
947  // Whether fast wedge sign estimate is used
948  int fast_wedge_sign_estimate;
949 
950  // Enable/disable ME for interinter wedge search.
951  int disable_interinter_wedge_newmv_search;
952 
953  // Decide when and how to use joint_comp.
954  DIST_WTD_COMP_FLAG use_dist_wtd_comp_flag;
955 
956  // Clip the frequency of updating the mv cost.
957  INTERNAL_COST_UPDATE_TYPE mv_cost_upd_level;
958 
959  // Clip the frequency of updating the coeff cost.
960  INTERNAL_COST_UPDATE_TYPE coeff_cost_upd_level;
961 
962  // Clip the frequency of updating the mode cost.
963  INTERNAL_COST_UPDATE_TYPE mode_cost_upd_level;
964 
965  // Prune inter modes based on tpl stats
966  // 0 : no pruning
967  // 1 - 3 indicate increasing aggressiveness in order.
968  int prune_inter_modes_based_on_tpl;
969 
970  // Skip NEARMV and NEAR_NEARMV modes using ref frames of above and left
971  // neighbor blocks and qindex.
972  PRUNE_NEARMV_LEVEL prune_nearmv_using_neighbors;
973 
974  // Model based breakout after interpolation filter search
975  // 0: no breakout
976  // 1: use model based rd breakout
977  int model_based_post_interp_filter_breakout;
978 
979  // Reuse compound type rd decision when exact match is found
980  // 0: No reuse
981  // 1: Reuse the compound type decision
982  int reuse_compound_type_decision;
983 
984  // Enable/disable masked compound.
985  int disable_masked_comp;
986 
987  // Enable/disable the fast compound mode search.
988  int enable_fast_compound_mode_search;
989 
990  // Reuse masked compound type search results
991  int reuse_mask_search_results;
992 
993  // Enable/disable fast search for wedge masks
994  int enable_fast_wedge_mask_search;
995 
996  // Early breakout from transform search of inter modes
997  int inter_mode_txfm_breakout;
998 
999  // Limit number of inter modes for txfm search if a newmv mode gets
1000  // evaluated among the top modes.
1001  // 0: no pruning
1002  // 1 to 3 indicate increasing order of aggressiveness
1003  int limit_inter_mode_cands;
1004 
1005  // Cap the no. of txfm searches for a given prediction mode.
1006  // 0: no cap, 1: cap beyond first 4 searches, 2: cap beyond first 3 searches.
1007  int limit_txfm_eval_per_mode;
1008 
1009  // Prune warped motion search based on block size.
1010  int extra_prune_warped;
1011 
1012  // Do not search compound modes for ARF.
1013  // The intuition is that ARF is predicted by frames far away from it,
1014  // whose temporal correlations with the ARF are likely low.
1015  // It is therefore likely that compound modes do not work as well for ARF
1016  // as other inter frames.
1017  // Speed/quality impact:
1018  // Speed 1: 12% faster, 0.1% psnr loss.
1019  // Speed 2: 2% faster, 0.05% psnr loss.
1020  // No change for speed 3 and up, because |disable_onesided_comp| is true.
1021  int skip_arf_compound;
1022 } INTER_MODE_SPEED_FEATURES;
1023 
1024 typedef struct INTERP_FILTER_SPEED_FEATURES {
1025  // Do limited interpolation filter search for dual filters, since best choice
1026  // usually includes EIGHTTAP_REGULAR.
1027  int use_fast_interpolation_filter_search;
1028 
1029  // Disable dual filter
1030  int disable_dual_filter;
1031 
1032  // Save results of av1_interpolation_filter_search for a block
1033  // Check mv and ref_frames before search, if they are very close with previous
1034  // saved results, filter search can be skipped.
1035  int use_interp_filter;
1036 
1037  // skip sharp_filter evaluation based on regular and smooth filter rd for
1038  // dual_filter=0 case
1039  int skip_sharp_interp_filter_search;
1040 
1041  int cb_pred_filter_search;
1042 
1043  // adaptive interp_filter search to allow skip of certain filter types.
1044  int adaptive_interp_filter_search;
1045 } INTERP_FILTER_SPEED_FEATURES;
1046 
1047 typedef struct INTRA_MODE_SPEED_FEATURES {
1048  // These bit masks allow you to enable or disable intra modes for each
1049  // transform size separately.
1050  int intra_y_mode_mask[TX_SIZES];
1051  int intra_uv_mode_mask[TX_SIZES];
1052 
1053  // flag to allow skipping intra mode for inter frame prediction
1054  int skip_intra_in_interframe;
1055 
1056  // Prune intra mode candidates based on source block histogram of gradient.
1057  // Applies to luma plane only.
1058  // Feasible values are 0..4. The feature is disabled for 0. An increasing
1059  // value indicates more aggressive pruning threshold.
1060  int intra_pruning_with_hog;
1061 
1062  // Prune intra mode candidates based on source block histogram of gradient.
1063  // Applies to chroma plane only.
1064  // Feasible values are 0..4. The feature is disabled for 0. An increasing
1065  // value indicates more aggressive pruning threshold.
1066  int chroma_intra_pruning_with_hog;
1067 
1068  // Enable/disable smooth intra modes.
1069  int disable_smooth_intra;
1070 
1071  // Prune UV_SMOOTH_PRED mode for chroma based on chroma source variance.
1072  // false : No pruning
1073  // true : Prune UV_SMOOTH_PRED mode based on chroma source variance
1074  //
1075  // For allintra encode, this speed feature reduces instruction count
1076  // by 1.90%, 2.21% and 1.97% for speed 6, 7 and 8 with coding performance
1077  // change less than 0.04%. For AVIF image encode, this speed feature reduces
1078  // encode time by 1.56%, 2.14% and 0.90% for speed 6, 7 and 8 on a typical
1079  // image dataset with coding performance change less than 0.05%.
1080  bool prune_smooth_intra_mode_for_chroma;
1081 
1082  // Prune filter intra modes in intra frames.
1083  // 0 : No pruning
1084  // 1 : Evaluate applicable filter intra modes based on best intra mode so far
1085  // 2 : Do not evaluate filter intra modes
1086  int prune_filter_intra_level;
1087 
1088  // prune palette search
1089  // 0: No pruning
1090  // 1: Perform coarse search to prune the palette colors. For winner colors,
1091  // neighbors are also evaluated using a finer search.
1092  // 2: Perform 2 way palette search from max colors to min colors (and min
1093  // colors to remaining colors) and terminate the search if current number of
1094  // palette colors is not the winner.
1095  int prune_palette_search_level;
1096 
1097  // Terminate early in luma palette_size search. Speed feature values indicate
1098  // increasing level of pruning.
1099  // 0: No early termination
1100  // 1: Terminate early for higher luma palette_size, if header rd cost of lower
1101  // palette_size is more than 2 * best_rd. This level of pruning is more
1102  // conservative when compared to sf level 2 as the cases which will get pruned
1103  // with sf level 1 is a subset of the cases which will get pruned with sf
1104  // level 2.
1105  // 2: Terminate early for higher luma palette_size, if header rd cost of lower
1106  // palette_size is more than best_rd.
1107  // For allintra encode, this sf reduces instruction count by 2.49%, 1.07%,
1108  // 2.76%, 2.30%, 1.84%, 2.69%, 2.04%, 2.05% and 1.44% for speed 0, 1, 2, 3, 4,
1109  // 5, 6, 7 and 8 on screen content set with coding performance change less
1110  // than 0.01% for speed <= 2 and less than 0.03% for speed >= 3. For AVIF
1111  // image encode, this sf reduces instruction count by 1.94%, 1.13%, 1.29%,
1112  // 0.93%, 0.89%, 1.03%, 1.07%, 1.20% and 0.18% for speed 0, 1, 2, 3, 4, 5, 6,
1113  // 7 and 8 on a typical image dataset with coding performance change less than
1114  // 0.01%.
1115  int prune_luma_palette_size_search_level;
1116 
1117  // Prune chroma intra modes based on luma intra mode winner.
1118  // 0: No pruning
1119  // 1: Prune chroma intra modes other than UV_DC_PRED, UV_SMOOTH_PRED,
1120  // UV_CFL_PRED and the mode that corresponds to luma intra mode winner.
1121  int prune_chroma_modes_using_luma_winner;
1122 
1123  // Clip the frequency of updating the mv cost for intrabc.
1124  INTERNAL_COST_UPDATE_TYPE dv_cost_upd_level;
1125 
1126  // We use DCT_DCT transform followed by computing SATD (Sum of Absolute
1127  // Transformed Differences) as an estimation of RD score to quickly find the
1128  // best possible Chroma from Luma (CFL) parameter. Then we do a full RD search
1129  // near the best possible parameter. The search range is set here.
1130  // The range of cfl_searh_range should be [1, 33], and the following are the
1131  // recommended values.
1132  // 1: Fastest mode.
1133  // 3: Default mode that provides good speedup without losing compression
1134  // performance at speed 0.
1135  // 33: Exhaustive rd search (33 == CFL_MAGS_SIZE). This mode should only
1136  // be used for debugging purpose.
1137  int cfl_search_range;
1138 
1139  // TOP_INTRA_MODEL_COUNT is 4 that is the number of top model rd to store in
1140  // intra mode decision. Here, add a speed feature to reduce this number for
1141  // higher speeds.
1142  int top_intra_model_count_allowed;
1143 
1144  // Adapt top_intra_model_count_allowed locally to prune luma intra modes using
1145  // neighbor block and quantizer information.
1146  int adapt_top_model_rd_count_using_neighbors;
1147 
1148  // Prune the evaluation of odd delta angles of directional luma intra modes by
1149  // using the rdcosts of neighbouring delta angles.
1150  // For allintra encode, this speed feature reduces instruction count
1151  // by 4.461%, 3.699% and 3.536% for speed 6, 7 and 8 on a typical video
1152  // dataset with coding performance change less than 0.26%. For AVIF image
1153  // encode, this speed feature reduces encode time by 2.849%, 2.471%,
1154  // and 2.051% for speed 6, 7 and 8 on a typical image dataset with coding
1155  // performance change less than 0.27%.
1156  int prune_luma_odd_delta_angles_in_intra;
1157 
1158  // Terminate early in chroma palette_size search.
1159  // 0: No early termination
1160  // 1: Terminate early for higher palette_size, if header rd cost of lower
1161  // palette_size is more than best_rd.
1162  // For allintra encode, this sf reduces instruction count by 0.45%,
1163  // 0.62%, 1.73%, 2.50%, 2.89%, 3.09% and 3.86% for speed 0 to 6 on screen
1164  // content set with coding performance change less than 0.01%.
1165  // For AVIF image encode, this sf reduces instruction count by 0.45%, 0.81%,
1166  // 0.85%, 1.05%, 1.45%, 1.66% and 1.95% for speed 0 to 6 on a typical image
1167  // dataset with no quality drop.
1168  int early_term_chroma_palette_size_search;
1169 
1170  // Skips the evaluation of filter intra modes in inter frames if rd evaluation
1171  // of luma intra dc mode results in invalid rd stats.
1172  int skip_filter_intra_in_inter_frames;
1173 } INTRA_MODE_SPEED_FEATURES;
1174 
1175 typedef struct TX_SPEED_FEATURES {
1176  // Init search depth for square and rectangular transform partitions.
1177  // Values:
1178  // 0 - search full tree, 1: search 1 level, 2: search the highest level only
1179  int inter_tx_size_search_init_depth_sqr;
1180  int inter_tx_size_search_init_depth_rect;
1181  int intra_tx_size_search_init_depth_sqr;
1182  int intra_tx_size_search_init_depth_rect;
1183 
1184  // If any dimension of a coding block size above 64, always search the
1185  // largest transform only, since the largest transform block size is 64x64.
1186  int tx_size_search_lgr_block;
1187 
1188  TX_TYPE_SEARCH tx_type_search;
1189 
1190  // Skip split transform block partition when the collocated bigger block
1191  // is selected as all zero coefficients.
1192  int txb_split_cap;
1193 
1194  // Shortcut the transform block partition and type search when the target
1195  // rdcost is relatively lower.
1196  // Values are 0 (not used) , or 1 - 2 with progressively increasing
1197  // aggressiveness
1198  int adaptive_txb_search_level;
1199 
1200  // Prune level for tx_size_type search for inter based on rd model
1201  // 0: no pruning
1202  // 1-2: progressively increasing aggressiveness of pruning
1203  int model_based_prune_tx_search_level;
1204 
1205  // Refine TX type after fast TX search.
1206  int refine_fast_tx_search_results;
1207 
1208  // Prune transform split/no_split eval based on residual properties. A value
1209  // of 0 indicates no pruning, and the aggressiveness of pruning progressively
1210  // increases from levels 1 to 3.
1211  int prune_tx_size_level;
1212 
1213  // Prune the evaluation of transform depths as decided by the NN model.
1214  // false: No pruning.
1215  // true : Avoid the evaluation of specific transform depths using NN model.
1216  //
1217  // For allintra encode, this speed feature reduces instruction count
1218  // by 4.76%, 8.92% and 11.28% for speed 6, 7 and 8 with coding performance
1219  // change less than 0.32%. For AVIF image encode, this speed feature reduces
1220  // encode time by 4.65%, 9.16% and 10.45% for speed 6, 7 and 8 on a typical
1221  // image dataset with coding performance change less than 0.19%.
1222  bool prune_intra_tx_depths_using_nn;
1223 } TX_SPEED_FEATURES;
1224 
1225 typedef struct RD_CALC_SPEED_FEATURES {
1226  // Fast approximation of av1_model_rd_from_var_lapndz
1227  int simple_model_rd_from_var;
1228 
1229  // Perform faster distortion computation during the R-D evaluation by trying
1230  // to approximate the prediction error with transform coefficients (faster but
1231  // less accurate) rather than computing distortion in the pixel domain (slower
1232  // but more accurate). The following methods are used for distortion
1233  // computation:
1234  // Method 0: Always compute distortion in the pixel domain
1235  // Method 1: Based on block error, try using transform domain distortion for
1236  // tx_type search and compute distortion in pixel domain for final RD_STATS
1237  // Method 2: Based on block error, try to compute distortion in transform
1238  // domain
1239  // Methods 1 and 2 may fallback to computing distortion in the pixel domain in
1240  // case the block error is less than the threshold, which is controlled by the
1241  // speed feature tx_domain_dist_thres_level.
1242  //
1243  // The speed feature tx_domain_dist_level decides which of the above methods
1244  // needs to be used across different mode evaluation stages as described
1245  // below:
1246  // Eval type: Default Mode Winner
1247  // Level 0 : Method 0 Method 2 Method 0
1248  // Level 1 : Method 1 Method 2 Method 0
1249  // Level 2 : Method 2 Method 2 Method 0
1250  // Level 3 : Method 2 Method 2 Method 2
1251  int tx_domain_dist_level;
1252 
1253  // Transform domain distortion threshold level
1254  int tx_domain_dist_thres_level;
1255 
1256  // Trellis (dynamic programming) optimization of quantized values
1257  TRELLIS_OPT_TYPE optimize_coefficients;
1258 
1259  // Use hash table to store macroblock RD search results
1260  // to avoid repeated search on the same residue signal.
1261  int use_mb_rd_hash;
1262 
1263  // Flag used to control the extent of coeff R-D optimization
1264  int perform_coeff_opt;
1265 } RD_CALC_SPEED_FEATURES;
1266 
1267 typedef struct WINNER_MODE_SPEED_FEATURES {
1268  // Flag used to control the winner mode processing for better R-D optimization
1269  // of quantized coeffs
1270  int enable_winner_mode_for_coeff_opt;
1271 
1272  // Flag used to control the winner mode processing for transform size
1273  // search method
1274  int enable_winner_mode_for_tx_size_srch;
1275 
1276  // Control transform size search level
1277  // Eval type: Default Mode Winner
1278  // Level 0 : FULL RD LARGEST ALL FULL RD
1279  // Level 1 : FAST RD LARGEST ALL FULL RD
1280  // Level 2 : LARGEST ALL LARGEST ALL FULL RD
1281  // Level 3 : LARGEST ALL LARGEST ALL LARGEST ALL
1282  int tx_size_search_level;
1283 
1284  // Flag used to control the winner mode processing for use transform
1285  // domain distortion
1286  int enable_winner_mode_for_use_tx_domain_dist;
1287 
1288  // Flag used to enable processing of multiple winner modes
1289  MULTI_WINNER_MODE_TYPE multi_winner_mode_type;
1290 
1291  // Motion mode for winner candidates:
1292  // 0: speed feature OFF
1293  // 1 / 2 : Use configured number of winner candidates
1294  int motion_mode_for_winner_cand;
1295 
1296  // Controls the prediction of transform skip block or DC only block.
1297  //
1298  // Different speed feature values (0 to 3) decide the aggressiveness of
1299  // prediction (refer to predict_dc_levels[][] in speed_features.c) to be used
1300  // during different mode evaluation stages.
1301  int dc_blk_pred_level;
1302 
1303  // If on, disables interpolation filter search in handle_inter_mode loop, and
1304  // performs it during winner mode processing by \ref
1305  // tx_search_best_inter_candidates.
1306  int winner_mode_ifs;
1307 
1308  // Controls the disabling of winner mode processing. Speed feature levels
1309  // are ordered in increasing aggressiveness of pruning. The method considered
1310  // for disabling, depends on the sf level value and it is described as below.
1311  // 0: Do not disable
1312  // 1: Disable for blocks with low source variance.
1313  // 2: Disable for blocks which turn out to be transform skip (skipped based on
1314  // eob) during MODE_EVAL stage except NEWMV mode.
1315  // 3: Disable for blocks which turn out to be transform skip during MODE_EVAL
1316  // stage except NEWMV mode. For high quantizers, prune conservatively based on
1317  // transform skip (skipped based on eob) except for NEWMV mode.
1318  // 4: Disable for blocks which turn out to be transform skip during MODE_EVAL
1319  // stage.
1320  int prune_winner_mode_eval_level;
1321 } WINNER_MODE_SPEED_FEATURES;
1322 
1323 typedef struct LOOP_FILTER_SPEED_FEATURES {
1324  // This feature controls how the loop filter level is determined.
1325  LPF_PICK_METHOD lpf_pick;
1326 
1327  // Skip some final iterations in the determination of the best loop filter
1328  // level.
1329  int use_coarse_filter_level_search;
1330 
1331  // Control how the CDEF strength is determined.
1332  CDEF_PICK_METHOD cdef_pick_method;
1333 
1334  // Decoder side speed feature to add penalty for use of dual-sgr filters.
1335  // Takes values 0 - 10, 0 indicating no penalty and each additional level
1336  // adding a penalty of 1%
1337  int dual_sgr_penalty_level;
1338 
1339  // prune sgr ep using binary search like mechanism
1340  int enable_sgr_ep_pruning;
1341 
1342  // Disable loop restoration for Chroma plane
1343  int disable_loop_restoration_chroma;
1344 
1345  // Disable loop restoration for luma plane
1346  int disable_loop_restoration_luma;
1347 
1348  // Prune RESTORE_WIENER evaluation based on source variance
1349  // 0 : no pruning
1350  // 1 : conservative pruning
1351  // 2 : aggressive pruning
1352  int prune_wiener_based_on_src_var;
1353 
1354  // Prune self-guided loop restoration based on wiener search results
1355  // 0 : no pruning
1356  // 1 : pruning based on rdcost ratio of RESTORE_WIENER and RESTORE_NONE
1357  // 2 : pruning based on winner restoration type among RESTORE_WIENER and
1358  // RESTORE_NONE
1359  int prune_sgr_based_on_wiener;
1360 
1361  // Reduce the wiener filter win size for luma
1362  int reduce_wiener_window_size;
1363 
1364  // Disable loop restoration filter
1365  int disable_lr_filter;
1366 
1367  // Whether to downsample the rows in computation of wiener stats.
1368  int use_downsampled_wiener_stats;
1369 } LOOP_FILTER_SPEED_FEATURES;
1370 
1371 typedef struct REAL_TIME_SPEED_FEATURES {
1372  // check intra prediction for non-RD mode.
1373  int check_intra_pred_nonrd;
1374 
1375  // Skip checking intra prediction.
1376  // 0 - don't skip
1377  // 1 - skip if TX is skipped and best mode is not NEWMV
1378  // 2 - skip if TX is skipped
1379  // Skipping aggressiveness increases from level 1 to 2.
1380  int skip_intra_pred;
1381 
1382  // Perform coarse ME before calculating variance in variance-based partition
1383  int estimate_motion_for_var_based_partition;
1384 
1385  // For nonrd_use_partition: mode of extra check of leaf partition
1386  // 0 - don't check merge
1387  // 1 - always check merge
1388  // 2 - check merge and prune checking final split
1389  // 3 - check merge and prune checking final split based on bsize and qindex
1390  int nonrd_check_partition_merge_mode;
1391 
1392  // For nonrd_use_partition: check of leaf partition extra split
1393  int nonrd_check_partition_split;
1394 
1395  // Implements various heuristics to skip searching modes
1396  // The heuristics selected are based on flags
1397  // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
1398  unsigned int mode_search_skip_flags;
1399 
1400  // For nonrd: Reduces ref frame search.
1401  // 0 - low level of search prune in non last frames
1402  // 1 - pruned search in non last frames
1403  // 2 - more pruned search in non last frames
1404  int nonrd_prune_ref_frame_search;
1405 
1406  // This flag controls the use of non-RD mode decision.
1407  int use_nonrd_pick_mode;
1408 
1409  // Use ALTREF frame in non-RD mode decision.
1410  int use_nonrd_altref_frame;
1411 
1412  // Use compound reference for non-RD mode.
1413  int use_comp_ref_nonrd;
1414 
1415  // Reference frames for compound prediction for nonrd pickmode:
1416  // LAST_GOLDEN (0), LAST_LAST2 (1), or LAST_ALTREF (2).
1417  int ref_frame_comp_nonrd[3];
1418 
1419  // use reduced ref set for real-time mode
1420  int use_real_time_ref_set;
1421 
1422  // Skip a number of expensive mode evaluations for blocks with very low
1423  // temporal variance.
1424  int short_circuit_low_temp_var;
1425 
1426  // Use modeled (currently CurvFit model) RDCost for fast non-RD mode
1427  int use_modeled_non_rd_cost;
1428 
1429  // Reuse inter prediction in fast non-rd mode.
1430  int reuse_inter_pred_nonrd;
1431 
1432  // Number of best inter modes to search transform. INT_MAX - search all.
1433  int num_inter_modes_for_tx_search;
1434 
1435  // Use interpolation filter search in non-RD mode decision.
1436  int use_nonrd_filter_search;
1437 
1438  // Use simplified RD model for interpolation search and Intra
1439  int use_simple_rd_model;
1440 
1441  // If set forces interpolation filter to EIGHTTAP_REGULAR
1442  int skip_interp_filter_search;
1443 
1444  // For nonrd mode: use hybrid intra mode search for intra only frames based on
1445  // block properties.
1446  // 0 : use nonrd pick intra for all blocks
1447  // 1 : use rd for bsize < 16x16, nonrd otherwise
1448  // 2 : use rd for bsize < 16x16 and src var >= 101, nonrd otherwise
1449  int hybrid_intra_pickmode;
1450 
1451  // Compute variance/sse on source difference, prior to encoding superblock.
1452  int source_metrics_sb_nonrd;
1453 
1454  // Flag to indicate process for handling overshoot on slide/scene change,
1455  // for real-time CBR mode.
1456  OVERSHOOT_DETECTION_CBR overshoot_detection_cbr;
1457 
1458  // Check for scene/content change detection on every frame before encoding.
1459  int check_scene_detection;
1460 
1461  // For nonrd mode: Prefer larger partition blks in variance based partitioning
1462  // 0: disabled, 1-4: increasing aggressiveness
1463  int prefer_large_partition_blocks;
1464 
1465  // uses results of temporal noise estimate
1466  int use_temporal_noise_estimate;
1467 
1468  // Parameter indicating initial search window to be used in full-pixel search
1469  // for nonrd_pickmode. Range [0, MAX_MVSEARCH_STEPS - 1]. Lower value
1470  // indicates larger window. If set to 0, step_param is set based on internal
1471  // logic in set_mv_search_params().
1472  int fullpel_search_step_param;
1473 
1474  // Bit mask to enable or disable intra modes for each prediction block size
1475  // separately, for nonrd pickmode.
1476  int intra_y_mode_bsize_mask_nrd[BLOCK_SIZES];
1477 
1478  // Skips mode checks more agressively in nonRD mode
1479  int nonrd_agressive_skip;
1480 
1481  // Skip cdef on 64x64 blocks when NEWMV or INTRA is not picked or color
1482  // sensitivity is off. When color sensitivity is on for a superblock, all
1483  // 64x64 blocks within will not skip.
1484  int skip_cdef_sb;
1485 
1486  // Forces larger partition blocks in variance based partitioning for intra
1487  // frames
1488  int force_large_partition_blocks_intra;
1489 
1490  // Skip evaluation of no split in tx size selection for merge partition
1491  int skip_tx_no_split_var_based_partition;
1492 
1493  // Intermediate termination of newMV mode evaluation based on so far best mode
1494  // sse
1495  int skip_newmv_mode_based_on_sse;
1496 
1497  // Define gf length multiplier.
1498  // Level 0: use large multiplier, level 1: use medium multiplier.
1499  int gf_length_lvl;
1500 
1501  // Prune inter modes with golden frame as reference for NEARMV and NEWMV modes
1502  int prune_inter_modes_with_golden_ref;
1503 
1504  // Prune inter modes w.r.t golden or alt-ref frame based on sad
1505  int prune_inter_modes_wrt_gf_arf_based_on_sad;
1506 
1507  // Prune inter mode search in rd path based on current block's temporal
1508  // variance wrt LAST reference.
1509  int prune_inter_modes_using_temp_var;
1510 
1511  // Force half_pel at block level.
1512  int force_half_pel_block;
1513 
1514  // Prune intra mode evaluation in inter frames based on mv range.
1515  BLOCK_SIZE prune_intra_mode_based_on_mv_range;
1516  // The number of times to left shift the splitting thresholds in variance
1517  // based partitioning. The minimum values should be 7 to avoid left shifting
1518  // by a negative number.
1519  int var_part_split_threshold_shift;
1520 
1521  // Qindex based variance partition threshold index, which determines
1522  // the aggressiveness of partition pruning
1523  // 0: disabled for speeds 9,10
1524  // 1,2: (rd-path) lowers qindex thresholds conditionally (for low SAD sb)
1525  // 3,4: (non-rd path) uses pre-tuned qindex thresholds
1526  int var_part_based_on_qidx;
1527 
1528  // Enable GF refresh based on Q value.
1529  int gf_refresh_based_on_qp;
1530 
1531  // Temporal filtering
1532  int use_rtc_tf;
1533 
1534  // Prune the use of the identity transform in nonrd_pickmode,
1535  // used for screen content mode: only for smaller blocks
1536  // and higher spatial variance, and when skip_txfm is not
1537  // already set.
1538  int prune_idtx_nonrd;
1539 
1540  // Skip loopfilter, for static content after slide change
1541  // or key frame, once quality has ramped up.
1542  int skip_lf_screen;
1543 
1544  // For nonrd: early exit out of variance partition that sets the
1545  // block size to superblock size, and sets mode to zeromv-last skip.
1546  int part_early_exit_zeromv;
1547 
1548  // Early terminate inter mode search based on sse in non-rd path.
1549  INTER_SEARCH_EARLY_TERM_IDX sse_early_term_inter_search;
1550 
1551  // SAD based adaptive altref selection
1552  int sad_based_adp_altref_lag;
1553 
1554  // Enable/disable partition direct merging.
1555  int partition_direct_merging;
1556 
1557  // SAD based compound mode pruning
1558  int sad_based_comp_prune;
1559 
1560  // Level of aggressiveness for obtaining tx size based on qstep
1561  int tx_size_level_based_on_qstep;
1562 
1563  // Reduce the mv resolution for zero mv if the variance is low.
1564  bool reduce_zeromv_mvres;
1565 
1566  // Avoid the partitioning of a 16x16 block in variance based partitioning
1567  // (VBP) by making use of minimum and maximum sub-block variances.
1568  // For allintra encode, this speed feature reduces instruction count by 5.39%
1569  // for speed 9 on a typical video dataset with coding performance gain
1570  // of 1.44%.
1571  // For AVIF image encode, this speed feature reduces encode time
1572  // by 8.44% for speed 9 on a typical image dataset with coding performance
1573  // gain of 0.78%.
1574  bool vbp_prune_16x16_split_using_min_max_sub_blk_var;
1575 
1576  // A qindex threshold that determines whether to use qindex based
1577  // CDEF filter strength estimation for screen content types.
1578  // This speed feature has a substantial gain on coding metrics,
1579  // with moderate increased encoding time.
1580  // Set to zero to turn off this speed feature.
1581  int screen_content_cdef_filter_qindex_thresh;
1582 
1583  // Prunes global_globalmv search if its variance is \gt the globalmv's
1584  // variance.
1585  bool prune_global_globalmv_with_zeromv;
1586 
1587  // Allow mode cost update at frame level every couple frames. This
1588  // overrides the command line setting --mode-cost-upd-freq=3 (never update
1589  // except on key frame and first delta).
1590  bool frame_level_mode_cost_update;
1591 
1592  // If compound is enabled, and the current block size is \geq BLOCK_16X16,
1593  // limit the compound modes to GLOBAL_GLOBALMV. This does not apply to the
1594  // base layer of svc.
1595  bool check_only_zero_zeromv_on_large_blocks;
1596 
1597  // Allow for disabling cdf update for non reference frames in svc mode.
1598  bool disable_cdf_update_non_reference_frame;
1599 } REAL_TIME_SPEED_FEATURES;
1600 
1606 typedef struct SPEED_FEATURES {
1611 
1616 
1620  TPL_SPEED_FEATURES tpl_sf;
1621 
1625  GLOBAL_MOTION_SPEED_FEATURES gm_sf;
1626 
1630  PARTITION_SPEED_FEATURES part_sf;
1631 
1635  MV_SPEED_FEATURES mv_sf;
1636 
1640  INTER_MODE_SPEED_FEATURES inter_sf;
1641 
1645  INTERP_FILTER_SPEED_FEATURES interp_sf;
1646 
1650  INTRA_MODE_SPEED_FEATURES intra_sf;
1651 
1655  TX_SPEED_FEATURES tx_sf;
1656 
1660  RD_CALC_SPEED_FEATURES rd_sf;
1661 
1665  WINNER_MODE_SPEED_FEATURES winner_mode_sf;
1666 
1670  LOOP_FILTER_SPEED_FEATURES lpf_sf;
1671 
1675  REAL_TIME_SPEED_FEATURES rt_sf;
1679 struct AV1_COMP;
1680 
1694  int speed);
1695 
1708  int speed);
1721 
1722 #ifdef __cplusplus
1723 } // extern "C"
1724 #endif
1725 
1726 #endif // AOM_AV1_ENCODER_SPEED_FEATURES_H_
static int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize, const HandleInterModeArgs *args, int prune_zero_mv_with_sse)
Prunes ZeroMV Search Using Best NEWMV's SSE.
Definition: rdopt.c:2476
void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi, int speed)
Frame size independent speed vs quality trade off flags.
void av1_set_speed_features_qindex_dependent(struct AV1_COMP *cpi, int speed)
Q index dependent speed vs quality trade off flags.
void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi, int speed)
Frame size dependent speed vs quality trade off flags.
INTERNAL_COST_UPDATE_TYPE
This enum decides internally how often to update the entropy costs.
Definition: speed_features.h:325
@ INTERNAL_COST_UPD_OFF
Definition: speed_features.h:326
@ INTERNAL_COST_UPD_SBROW_SET
Definition: speed_features.h:328
@ INTERNAL_COST_UPD_SBROW
Definition: speed_features.h:329
@ INTERNAL_COST_UPD_SB
Definition: speed_features.h:330
@ INTERNAL_COST_UPD_TILE
Definition: speed_features.h:327
INTER_SEARCH_EARLY_TERM_IDX
This enumeration defines inter search early termination index in non-rd path based on sse value.
Definition: speed_features.h:369
@ EARLY_TERM_INDICES
Definition: speed_features.h:380
@ EARLY_TERM_IDX_1
Definition: speed_features.h:372
@ EARLY_TERM_IDX_4
Definition: speed_features.h:378
@ EARLY_TERM_IDX_2
Definition: speed_features.h:374
@ EARLY_TERM_DISABLED
Definition: speed_features.h:370
@ EARLY_TERM_IDX_3
Definition: speed_features.h:376
SIMPLE_MOTION_SEARCH_PRUNE_LEVEL
This enumeration defines a variety of simple motion search based partition prune levels.
Definition: speed_features.h:337
@ QIDX_BASED_AGG_LVL1
Definition: speed_features.h:343
@ SIMPLE_AGG_LVL3
Definition: speed_features.h:342
@ TOTAL_SIMPLE_AGG_LVLS
Definition: speed_features.h:346
@ SIMPLE_AGG_LVL1
Definition: speed_features.h:340
@ SIMPLE_AGG_LVL0
Definition: speed_features.h:339
@ TOTAL_AGG_LVLS
Definition: speed_features.h:352
@ SIMPLE_AGG_LVL2
Definition: speed_features.h:341
@ TOTAL_QINDEX_BASED_AGG_LVLS
Definition: speed_features.h:348
struct SPEED_FEATURES SPEED_FEATURES
Top level speed vs quality trade off data struture.
PRUNE_MESH_SEARCH_LEVEL
This enumeration defines a variety of mesh search prune levels.
Definition: speed_features.h:359
@ PRUNE_MESH_SEARCH_LVL_1
Definition: speed_features.h:361
@ PRUNE_MESH_SEARCH_LVL_2
Definition: speed_features.h:362
@ PRUNE_MESH_SEARCH_DISABLED
Definition: speed_features.h:360
struct HIGH_LEVEL_SPEED_FEATURES HIGH_LEVEL_SPEED_FEATURES
Sequence/frame level speed vs quality features.
struct FIRST_PASS_SPEED_FEATURES FIRST_PASS_SPEED_FEATURES
CDEF_PICK_METHOD
This enumeration defines a variety of CDEF pick methods.
Definition: speed_features.h:156
@ CDEF_FAST_SEARCH_LVL2
Definition: speed_features.h:159
@ CDEF_FAST_SEARCH_LVL5
Definition: speed_features.h:163
@ CDEF_FAST_SEARCH_LVL1
Definition: speed_features.h:158
@ CDEF_FULL_SEARCH
Definition: speed_features.h:157
@ CDEF_PICK_FROM_Q
Definition: speed_features.h:164
@ CDEF_FAST_SEARCH_LVL4
Definition: speed_features.h:162
@ CDEF_FAST_SEARCH_LVL3
Definition: speed_features.h:160
Top level encoder structure.
Definition: encoder.h:2700
int speed
Definition: encoder.h:2917
Definition: speed_features.h:443
int disable_recon
Skips reconstruction by using source buffers for prediction.
Definition: speed_features.h:460
int reduce_mv_step_param
Reduces the mv search window. By default, the initial search window is around MIN(MIN(dims),...
Definition: speed_features.h:450
int skip_zeromv_motion_search
Skips the motion search centered on 0,0 mv.
Definition: speed_features.h:465
int skip_motion_search_threshold
Skips the motion search when the zero mv has small sse.
Definition: speed_features.h:455
Sequence/frame level speed vs quality features.
Definition: speed_features.h:386
int second_alt_ref_filtering
Definition: speed_features.h:431
int frame_parameter_update
Definition: speed_features.h:388
MV_PREC_LOGIC high_precision_mv_usage
Definition: speed_features.h:407
int disable_extra_sc_testing
Definition: speed_features.h:426
int recode_tolerance
Definition: speed_features.h:399
SUPERRES_AUTO_SEARCH_TYPE superres_auto_search_type
Definition: speed_features.h:421
int static_segmentation
Definition: speed_features.h:416
int num_frames_used_in_tf
Definition: speed_features.h:437
RECODE_LOOP_TYPE recode_loop
Definition: speed_features.h:393
Top level speed vs quality trade off data struture.
Definition: speed_features.h:1606
MV_SPEED_FEATURES mv_sf
Definition: speed_features.h:1635
TPL_SPEED_FEATURES tpl_sf
Definition: speed_features.h:1620
LOOP_FILTER_SPEED_FEATURES lpf_sf
Definition: speed_features.h:1670
TX_SPEED_FEATURES tx_sf
Definition: speed_features.h:1655
INTER_MODE_SPEED_FEATURES inter_sf
Definition: speed_features.h:1640
RD_CALC_SPEED_FEATURES rd_sf
Definition: speed_features.h:1660
PARTITION_SPEED_FEATURES part_sf
Definition: speed_features.h:1630
GLOBAL_MOTION_SPEED_FEATURES gm_sf
Definition: speed_features.h:1625
INTERP_FILTER_SPEED_FEATURES interp_sf
Definition: speed_features.h:1645
FIRST_PASS_SPEED_FEATURES fp_sf
Definition: speed_features.h:1615
INTRA_MODE_SPEED_FEATURES intra_sf
Definition: speed_features.h:1650
WINNER_MODE_SPEED_FEATURES winner_mode_sf
Definition: speed_features.h:1665
REAL_TIME_SPEED_FEATURES rt_sf
Definition: speed_features.h:1675
HIGH_LEVEL_SPEED_FEATURES hl_sf
Definition: speed_features.h:1610