VirtualBox

source: vbox/trunk/src/libs/ffmpeg-20060710/libavcodec/h264.c@ 10184

最後變更 在這個檔案從10184是 5776,由 vboxsync 提交於 17 年 前

ffmpeg: exported to OSE

檔案大小: 317.3 KB
 
1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <[email protected]>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21/**
22 * @file h264.c
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <[email protected]>
25 */
26
27#include "common.h"
28#include "dsputil.h"
29#include "avcodec.h"
30#include "mpegvideo.h"
31#include "h264data.h"
32#include "golomb.h"
33
34#include "cabac.h"
35
36//#undef NDEBUG
37#include <assert.h>
38
39#define interlaced_dct interlaced_dct_is_a_bad_name
40#define mb_intra mb_intra_isnt_initalized_see_mb_type
41
42#define LUMA_DC_BLOCK_INDEX 25
43#define CHROMA_DC_BLOCK_INDEX 26
44
45#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46#define COEFF_TOKEN_VLC_BITS 8
47#define TOTAL_ZEROS_VLC_BITS 9
48#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49#define RUN_VLC_BITS 3
50#define RUN7_VLC_BITS 6
51
52#define MAX_SPS_COUNT 32
53#define MAX_PPS_COUNT 256
54
55#define MAX_MMCO_COUNT 66
56
57/* Compiling in interlaced support reduces the speed
58 * of progressive decoding by about 2%. */
59#define ALLOW_INTERLACE
60
61#ifdef ALLOW_INTERLACE
62#define MB_MBAFF h->mb_mbaff
63#define MB_FIELD h->mb_field_decoding_flag
64#define FRAME_MBAFF h->mb_aff_frame
65#else
66#define MB_MBAFF 0
67#define MB_FIELD 0
68#define FRAME_MBAFF 0
69#undef IS_INTERLACED
70#define IS_INTERLACED(mb_type) 0
71#endif
72
73/**
74 * Sequence parameter set
75 */
76typedef struct SPS{
77
78 int profile_idc;
79 int level_idc;
80 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
81 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
82 int poc_type; ///< pic_order_cnt_type
83 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
84 int delta_pic_order_always_zero_flag;
85 int offset_for_non_ref_pic;
86 int offset_for_top_to_bottom_field;
87 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
88 int ref_frame_count; ///< num_ref_frames
89 int gaps_in_frame_num_allowed_flag;
90 int mb_width; ///< frame_width_in_mbs_minus1 + 1
91 int mb_height; ///< frame_height_in_mbs_minus1 + 1
92 int frame_mbs_only_flag;
93 int mb_aff; ///<mb_adaptive_frame_field_flag
94 int direct_8x8_inference_flag;
95 int crop; ///< frame_cropping_flag
96 int crop_left; ///< frame_cropping_rect_left_offset
97 int crop_right; ///< frame_cropping_rect_right_offset
98 int crop_top; ///< frame_cropping_rect_top_offset
99 int crop_bottom; ///< frame_cropping_rect_bottom_offset
100 int vui_parameters_present_flag;
101 AVRational sar;
102 int timing_info_present_flag;
103 uint32_t num_units_in_tick;
104 uint32_t time_scale;
105 int fixed_frame_rate_flag;
106 short offset_for_ref_frame[256]; //FIXME dyn aloc?
107 int bitstream_restriction_flag;
108 int num_reorder_frames;
109 int scaling_matrix_present;
110 uint8_t scaling_matrix4[6][16];
111 uint8_t scaling_matrix8[2][64];
112}SPS;
113
114/**
115 * Picture parameter set
116 */
117typedef struct PPS{
118 int sps_id;
119 int cabac; ///< entropy_coding_mode_flag
120 int pic_order_present; ///< pic_order_present_flag
121 int slice_group_count; ///< num_slice_groups_minus1 + 1
122 int mb_slice_group_map_type;
123 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
124 int weighted_pred; ///< weighted_pred_flag
125 int weighted_bipred_idc;
126 int init_qp; ///< pic_init_qp_minus26 + 26
127 int init_qs; ///< pic_init_qs_minus26 + 26
128 int chroma_qp_index_offset;
129 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
130 int constrained_intra_pred; ///< constrained_intra_pred_flag
131 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
132 int transform_8x8_mode; ///< transform_8x8_mode_flag
133 uint8_t scaling_matrix4[6][16];
134 uint8_t scaling_matrix8[2][64];
135}PPS;
136
137/**
138 * Memory management control operation opcode.
139 */
140typedef enum MMCOOpcode{
141 MMCO_END=0,
142 MMCO_SHORT2UNUSED,
143 MMCO_LONG2UNUSED,
144 MMCO_SHORT2LONG,
145 MMCO_SET_MAX_LONG,
146 MMCO_RESET,
147 MMCO_LONG,
148} MMCOOpcode;
149
150/**
151 * Memory management control operation.
152 */
153typedef struct MMCO{
154 MMCOOpcode opcode;
155 int short_frame_num;
156 int long_index;
157} MMCO;
158
159/**
160 * H264Context
161 */
162typedef struct H264Context{
163 MpegEncContext s;
164 int nal_ref_idc;
165 int nal_unit_type;
166#define NAL_SLICE 1
167#define NAL_DPA 2
168#define NAL_DPB 3
169#define NAL_DPC 4
170#define NAL_IDR_SLICE 5
171#define NAL_SEI 6
172#define NAL_SPS 7
173#define NAL_PPS 8
174#define NAL_AUD 9
175#define NAL_END_SEQUENCE 10
176#define NAL_END_STREAM 11
177#define NAL_FILLER_DATA 12
178#define NAL_SPS_EXT 13
179#define NAL_AUXILIARY_SLICE 19
180 uint8_t *rbsp_buffer;
181 unsigned int rbsp_buffer_size;
182
183 /**
184 * Used to parse AVC variant of h264
185 */
186 int is_avc; ///< this flag is != 0 if codec is avc1
187 int got_avcC; ///< flag used to parse avcC data only once
188 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
189
190 int chroma_qp; //QPc
191
192 int prev_mb_skipped;
193 int next_mb_skipped;
194
195 //prediction stuff
196 int chroma_pred_mode;
197 int intra16x16_pred_mode;
198
199 int top_mb_xy;
200 int left_mb_xy[2];
201
202 int8_t intra4x4_pred_mode_cache[5*8];
203 int8_t (*intra4x4_pred_mode)[8];
204 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
205 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
206 void (*pred8x8 [4+3])(uint8_t *src, int stride);
207 void (*pred16x16[4+3])(uint8_t *src, int stride);
208 unsigned int topleft_samples_available;
209 unsigned int top_samples_available;
210 unsigned int topright_samples_available;
211 unsigned int left_samples_available;
212 uint8_t (*top_borders[2])[16+2*8];
213 uint8_t left_border[2*(17+2*9)];
214
215 /**
216 * non zero coeff count cache.
217 * is 64 if not available.
218 */
219 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
220 uint8_t (*non_zero_count)[16];
221
222 /**
223 * Motion vector cache.
224 */
225 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
226 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
227#define LIST_NOT_USED -1 //FIXME rename?
228#define PART_NOT_AVAILABLE -2
229
230 /**
231 * is 1 if the specific list MV&references are set to 0,0,-2.
232 */
233 int mv_cache_clean[2];
234
235 /**
236 * number of neighbors (top and/or left) that used 8x8 dct
237 */
238 int neighbor_transform_size;
239
240 /**
241 * block_offset[ 0..23] for frame macroblocks
242 * block_offset[24..47] for field macroblocks
243 */
244 int block_offset[2*(16+8)];
245
246 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
247 uint32_t *mb2b8_xy;
248 int b_stride; //FIXME use s->b4_stride
249 int b8_stride;
250
251 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
252 int mb_uvlinesize;
253
254 int emu_edge_width;
255 int emu_edge_height;
256
257 int halfpel_flag;
258 int thirdpel_flag;
259
260 int unknown_svq3_flag;
261 int next_slice_index;
262
263 SPS sps_buffer[MAX_SPS_COUNT];
264 SPS sps; ///< current sps
265
266 PPS pps_buffer[MAX_PPS_COUNT];
267 /**
268 * current pps
269 */
270 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
271
272 uint32_t dequant4_buffer[6][52][16];
273 uint32_t dequant8_buffer[2][52][64];
274 uint32_t (*dequant4_coeff[6])[16];
275 uint32_t (*dequant8_coeff[2])[64];
276 int dequant_coeff_pps; ///< reinit tables when pps changes
277
278 int slice_num;
279 uint8_t *slice_table_base;
280 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
281 int slice_type;
282 int slice_type_fixed;
283
284 //interlacing specific flags
285 int mb_aff_frame;
286 int mb_field_decoding_flag;
287 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
288
289 int sub_mb_type[4];
290
291 //POC stuff
292 int poc_lsb;
293 int poc_msb;
294 int delta_poc_bottom;
295 int delta_poc[2];
296 int frame_num;
297 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
298 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
299 int frame_num_offset; ///< for POC type 2
300 int prev_frame_num_offset; ///< for POC type 2
301 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
302
303 /**
304 * frame_num for frames or 2*frame_num for field pics.
305 */
306 int curr_pic_num;
307
308 /**
309 * max_frame_num or 2*max_frame_num for field pics.
310 */
311 int max_pic_num;
312
313 //Weighted pred stuff
314 int use_weight;
315 int use_weight_chroma;
316 int luma_log2_weight_denom;
317 int chroma_log2_weight_denom;
318 int luma_weight[2][48];
319 int luma_offset[2][48];
320 int chroma_weight[2][48][2];
321 int chroma_offset[2][48][2];
322 int implicit_weight[48][48];
323
324 //deblock
325 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
326 int slice_alpha_c0_offset;
327 int slice_beta_offset;
328
329 int redundant_pic_count;
330
331 int direct_spatial_mv_pred;
332 int dist_scale_factor[16];
333 int dist_scale_factor_field[32];
334 int map_col_to_list0[2][16];
335 int map_col_to_list0_field[2][32];
336
337 /**
338 * num_ref_idx_l0/1_active_minus1 + 1
339 */
340 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
341 Picture *short_ref[32];
342 Picture *long_ref[32];
343 Picture default_ref_list[2][32];
344 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
345 Picture *delayed_pic[16]; //FIXME size?
346 Picture *delayed_output_pic;
347
348 /**
349 * memory management control operations buffer.
350 */
351 MMCO mmco[MAX_MMCO_COUNT];
352 int mmco_index;
353
354 int long_ref_count; ///< number of actual long term references
355 int short_ref_count; ///< number of actual short term references
356
357 //data partitioning
358 GetBitContext intra_gb;
359 GetBitContext inter_gb;
360 GetBitContext *intra_gb_ptr;
361 GetBitContext *inter_gb_ptr;
362
363 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
364
365 /**
366 * Cabac
367 */
368 CABACContext cabac;
369 uint8_t cabac_state[460];
370 int cabac_init_idc;
371
372 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
373 uint16_t *cbp_table;
374 int top_cbp;
375 int left_cbp;
376 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
377 uint8_t *chroma_pred_mode_table;
378 int last_qscale_diff;
379 int16_t (*mvd_table[2])[2];
380 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
381 uint8_t *direct_table;
382 uint8_t direct_cache[5*8];
383
384 uint8_t zigzag_scan[16];
385 uint8_t zigzag_scan8x8[64];
386 uint8_t zigzag_scan8x8_cavlc[64];
387 uint8_t field_scan[16];
388 uint8_t field_scan8x8[64];
389 uint8_t field_scan8x8_cavlc[64];
390 const uint8_t *zigzag_scan_q0;
391 const uint8_t *zigzag_scan8x8_q0;
392 const uint8_t *zigzag_scan8x8_cavlc_q0;
393 const uint8_t *field_scan_q0;
394 const uint8_t *field_scan8x8_q0;
395 const uint8_t *field_scan8x8_cavlc_q0;
396
397 int x264_build;
398}H264Context;
399
400static VLC coeff_token_vlc[4];
401static VLC chroma_dc_coeff_token_vlc;
402
403static VLC total_zeros_vlc[15];
404static VLC chroma_dc_total_zeros_vlc[3];
405
406static VLC run_vlc[6];
407static VLC run7_vlc;
408
409static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
410static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
411static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
412
413static always_inline uint32_t pack16to32(int a, int b){
414#ifdef WORDS_BIGENDIAN
415 return (b&0xFFFF) + (a<<16);
416#else
417 return (a&0xFFFF) + (b<<16);
418#endif
419}
420
421/**
422 * fill a rectangle.
423 * @param h height of the rectangle, should be a constant
424 * @param w width of the rectangle, should be a constant
425 * @param size the size of val (1 or 4), should be a constant
426 */
427static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
428 uint8_t *p= (uint8_t*)vp;
429 assert(size==1 || size==4);
430 assert(w<=4);
431
432 w *= size;
433 stride *= size;
434
435 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
436 assert((stride&(w-1))==0);
437 if(w==2){
438 const uint16_t v= size==4 ? val : val*0x0101;
439 *(uint16_t*)(p + 0*stride)= v;
440 if(h==1) return;
441 *(uint16_t*)(p + 1*stride)= v;
442 if(h==2) return;
443 *(uint16_t*)(p + 2*stride)=
444 *(uint16_t*)(p + 3*stride)= v;
445 }else if(w==4){
446 const uint32_t v= size==4 ? val : val*0x01010101;
447 *(uint32_t*)(p + 0*stride)= v;
448 if(h==1) return;
449 *(uint32_t*)(p + 1*stride)= v;
450 if(h==2) return;
451 *(uint32_t*)(p + 2*stride)=
452 *(uint32_t*)(p + 3*stride)= v;
453 }else if(w==8){
454 //gcc can't optimize 64bit math on x86_32
455#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
456 const uint64_t v= val*0x0100000001ULL;
457 *(uint64_t*)(p + 0*stride)= v;
458 if(h==1) return;
459 *(uint64_t*)(p + 1*stride)= v;
460 if(h==2) return;
461 *(uint64_t*)(p + 2*stride)=
462 *(uint64_t*)(p + 3*stride)= v;
463 }else if(w==16){
464 const uint64_t v= val*0x0100000001ULL;
465 *(uint64_t*)(p + 0+0*stride)=
466 *(uint64_t*)(p + 8+0*stride)=
467 *(uint64_t*)(p + 0+1*stride)=
468 *(uint64_t*)(p + 8+1*stride)= v;
469 if(h==2) return;
470 *(uint64_t*)(p + 0+2*stride)=
471 *(uint64_t*)(p + 8+2*stride)=
472 *(uint64_t*)(p + 0+3*stride)=
473 *(uint64_t*)(p + 8+3*stride)= v;
474#else
475 *(uint32_t*)(p + 0+0*stride)=
476 *(uint32_t*)(p + 4+0*stride)= val;
477 if(h==1) return;
478 *(uint32_t*)(p + 0+1*stride)=
479 *(uint32_t*)(p + 4+1*stride)= val;
480 if(h==2) return;
481 *(uint32_t*)(p + 0+2*stride)=
482 *(uint32_t*)(p + 4+2*stride)=
483 *(uint32_t*)(p + 0+3*stride)=
484 *(uint32_t*)(p + 4+3*stride)= val;
485 }else if(w==16){
486 *(uint32_t*)(p + 0+0*stride)=
487 *(uint32_t*)(p + 4+0*stride)=
488 *(uint32_t*)(p + 8+0*stride)=
489 *(uint32_t*)(p +12+0*stride)=
490 *(uint32_t*)(p + 0+1*stride)=
491 *(uint32_t*)(p + 4+1*stride)=
492 *(uint32_t*)(p + 8+1*stride)=
493 *(uint32_t*)(p +12+1*stride)= val;
494 if(h==2) return;
495 *(uint32_t*)(p + 0+2*stride)=
496 *(uint32_t*)(p + 4+2*stride)=
497 *(uint32_t*)(p + 8+2*stride)=
498 *(uint32_t*)(p +12+2*stride)=
499 *(uint32_t*)(p + 0+3*stride)=
500 *(uint32_t*)(p + 4+3*stride)=
501 *(uint32_t*)(p + 8+3*stride)=
502 *(uint32_t*)(p +12+3*stride)= val;
503#endif
504 }else
505 assert(0);
506 assert(h==4);
507}
508
509static void fill_caches(H264Context *h, int mb_type, int for_deblock){
510 MpegEncContext * const s = &h->s;
511 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
512 int topleft_xy, top_xy, topright_xy, left_xy[2];
513 int topleft_type, top_type, topright_type, left_type[2];
514 int left_block[8];
515 int i;
516
517 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
518 // the actual condition is whether we're on the edge of a slice,
519 // and even then the intra and nnz parts are unnecessary.
520 if(for_deblock && h->slice_num == 1 && !FRAME_MBAFF)
521 return;
522
523 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
524
525 top_xy = mb_xy - s->mb_stride;
526 topleft_xy = top_xy - 1;
527 topright_xy= top_xy + 1;
528 left_xy[1] = left_xy[0] = mb_xy-1;
529 left_block[0]= 0;
530 left_block[1]= 1;
531 left_block[2]= 2;
532 left_block[3]= 3;
533 left_block[4]= 7;
534 left_block[5]= 10;
535 left_block[6]= 8;
536 left_block[7]= 11;
537 if(FRAME_MBAFF){
538 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
539 const int top_pair_xy = pair_xy - s->mb_stride;
540 const int topleft_pair_xy = top_pair_xy - 1;
541 const int topright_pair_xy = top_pair_xy + 1;
542 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
543 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
544 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
545 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
546 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
547 const int bottom = (s->mb_y & 1);
548 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
549 if (bottom
550 ? !curr_mb_frame_flag // bottom macroblock
551 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
552 ) {
553 top_xy -= s->mb_stride;
554 }
555 if (bottom
556 ? !curr_mb_frame_flag // bottom macroblock
557 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
558 ) {
559 topleft_xy -= s->mb_stride;
560 }
561 if (bottom
562 ? !curr_mb_frame_flag // bottom macroblock
563 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
564 ) {
565 topright_xy -= s->mb_stride;
566 }
567 if (left_mb_frame_flag != curr_mb_frame_flag) {
568 left_xy[1] = left_xy[0] = pair_xy - 1;
569 if (curr_mb_frame_flag) {
570 if (bottom) {
571 left_block[0]= 2;
572 left_block[1]= 2;
573 left_block[2]= 3;
574 left_block[3]= 3;
575 left_block[4]= 8;
576 left_block[5]= 11;
577 left_block[6]= 8;
578 left_block[7]= 11;
579 } else {
580 left_block[0]= 0;
581 left_block[1]= 0;
582 left_block[2]= 1;
583 left_block[3]= 1;
584 left_block[4]= 7;
585 left_block[5]= 10;
586 left_block[6]= 7;
587 left_block[7]= 10;
588 }
589 } else {
590 left_xy[1] += s->mb_stride;
591 //left_block[0]= 0;
592 left_block[1]= 2;
593 left_block[2]= 0;
594 left_block[3]= 2;
595 //left_block[4]= 7;
596 left_block[5]= 10;
597 left_block[6]= 7;
598 left_block[7]= 10;
599 }
600 }
601 }
602
603 h->top_mb_xy = top_xy;
604 h->left_mb_xy[0] = left_xy[0];
605 h->left_mb_xy[1] = left_xy[1];
606 if(for_deblock){
607 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
608 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
609 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
610 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
611 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
612
613 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
614 int list;
615 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
616 for(i=0; i<16; i++)
617 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
618 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
619 if(USES_LIST(mb_type,list)){
620 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
621 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
622 uint8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
623 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
624 dst[0] = src[0];
625 dst[1] = src[1];
626 dst[2] = src[2];
627 dst[3] = src[3];
628 }
629 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
630 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
631 ref += h->b8_stride;
632 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
633 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
634 }else{
635 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
636 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
637 }
638 }
639 }
640 }else{
641 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
642 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
643 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
644 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
645 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
646 }
647
648 if(IS_INTRA(mb_type)){
649 h->topleft_samples_available=
650 h->top_samples_available=
651 h->left_samples_available= 0xFFFF;
652 h->topright_samples_available= 0xEEEA;
653
654 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
655 h->topleft_samples_available= 0xB3FF;
656 h->top_samples_available= 0x33FF;
657 h->topright_samples_available= 0x26EA;
658 }
659 for(i=0; i<2; i++){
660 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
661 h->topleft_samples_available&= 0xDF5F;
662 h->left_samples_available&= 0x5F5F;
663 }
664 }
665
666 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
667 h->topleft_samples_available&= 0x7FFF;
668
669 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
670 h->topright_samples_available&= 0xFBFF;
671
672 if(IS_INTRA4x4(mb_type)){
673 if(IS_INTRA4x4(top_type)){
674 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
675 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
676 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
677 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
678 }else{
679 int pred;
680 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
681 pred= -1;
682 else{
683 pred= 2;
684 }
685 h->intra4x4_pred_mode_cache[4+8*0]=
686 h->intra4x4_pred_mode_cache[5+8*0]=
687 h->intra4x4_pred_mode_cache[6+8*0]=
688 h->intra4x4_pred_mode_cache[7+8*0]= pred;
689 }
690 for(i=0; i<2; i++){
691 if(IS_INTRA4x4(left_type[i])){
692 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
693 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
694 }else{
695 int pred;
696 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
697 pred= -1;
698 else{
699 pred= 2;
700 }
701 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
702 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
703 }
704 }
705 }
706 }
707
708
709/*
7100 . T T. T T T T
7111 L . .L . . . .
7122 L . .L . . . .
7133 . T TL . . . .
7144 L . .L . . . .
7155 L . .. . . . .
716*/
717//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
718 if(top_type){
719 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
720 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
721 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
722 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
723
724 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
725 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
726
727 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
728 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
729
730 }else{
731 h->non_zero_count_cache[4+8*0]=
732 h->non_zero_count_cache[5+8*0]=
733 h->non_zero_count_cache[6+8*0]=
734 h->non_zero_count_cache[7+8*0]=
735
736 h->non_zero_count_cache[1+8*0]=
737 h->non_zero_count_cache[2+8*0]=
738
739 h->non_zero_count_cache[1+8*3]=
740 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
741
742 }
743
744 for (i=0; i<2; i++) {
745 if(left_type[i]){
746 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
747 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
748 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
749 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
750 }else{
751 h->non_zero_count_cache[3+8*1 + 2*8*i]=
752 h->non_zero_count_cache[3+8*2 + 2*8*i]=
753 h->non_zero_count_cache[0+8*1 + 8*i]=
754 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
755 }
756 }
757
758 if( h->pps.cabac ) {
759 // top_cbp
760 if(top_type) {
761 h->top_cbp = h->cbp_table[top_xy];
762 } else if(IS_INTRA(mb_type)) {
763 h->top_cbp = 0x1C0;
764 } else {
765 h->top_cbp = 0;
766 }
767 // left_cbp
768 if (left_type[0]) {
769 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
770 } else if(IS_INTRA(mb_type)) {
771 h->left_cbp = 0x1C0;
772 } else {
773 h->left_cbp = 0;
774 }
775 if (left_type[0]) {
776 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
777 }
778 if (left_type[1]) {
779 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
780 }
781 }
782
783#if 1
784 //FIXME direct mb can skip much of this
785 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
786 int list;
787 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
788 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
789 /*if(!h->mv_cache_clean[list]){
790 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
791 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
792 h->mv_cache_clean[list]= 1;
793 }*/
794 continue;
795 }
796 h->mv_cache_clean[list]= 0;
797
798 if(USES_LIST(top_type, list)){
799 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
800 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
804 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
805 h->ref_cache[list][scan8[0] + 0 - 1*8]=
806 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
807 h->ref_cache[list][scan8[0] + 2 - 1*8]=
808 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
809 }else{
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
813 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
814 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
815 }
816
817 //FIXME unify cleanup or sth
818 if(USES_LIST(left_type[0], list)){
819 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
820 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
821 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
823 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
824 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
825 }else{
826 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
828 h->ref_cache[list][scan8[0] - 1 + 0*8]=
829 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
830 }
831
832 if(USES_LIST(left_type[1], list)){
833 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
834 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
835 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
836 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
837 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
838 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
839 }else{
840 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
841 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
842 h->ref_cache[list][scan8[0] - 1 + 2*8]=
843 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
844 assert((!left_type[0]) == (!left_type[1]));
845 }
846
847 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
848 continue;
849
850 if(USES_LIST(topleft_type, list)){
851 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
852 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
853 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
854 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
855 }else{
856 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
857 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
858 }
859
860 if(USES_LIST(topright_type, list)){
861 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
862 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
863 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
864 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
865 }else{
866 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
867 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
868 }
869
870
871 h->ref_cache[list][scan8[5 ]+1] =
872 h->ref_cache[list][scan8[7 ]+1] =
873 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
874 h->ref_cache[list][scan8[4 ]] =
875 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
876 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
877 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
878 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
879 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
880 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
881
882 if( h->pps.cabac ) {
883 /* XXX beurk, Load mvd */
884 if(USES_LIST(top_type, list)){
885 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
886 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
887 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
890 }else{
891 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
892 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
893 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
895 }
896 if(USES_LIST(left_type[0], list)){
897 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
898 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
899 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
900 }else{
901 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
902 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
903 }
904 if(USES_LIST(left_type[1], list)){
905 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
906 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
907 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
908 }else{
909 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
910 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
911 }
912 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
913 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
914 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
915 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
916 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
917
918 if(h->slice_type == B_TYPE){
919 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
920
921 if(IS_DIRECT(top_type)){
922 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
923 }else if(IS_8X8(top_type)){
924 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
925 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
926 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
927 }else{
928 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
929 }
930
931 if(IS_DIRECT(left_type[0]))
932 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
933 else if(IS_8X8(left_type[0]))
934 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
935 else
936 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
937
938 if(IS_DIRECT(left_type[1]))
939 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
940 else if(IS_8X8(left_type[1]))
941 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
942 else
943 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
944 }
945 }
946
947 if(FRAME_MBAFF){
948#define MAP_MVS\
949 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
950 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
951 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
952 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
953 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
955 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
956 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
957 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
958 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
959 if(MB_FIELD){
960#define MAP_F2F(idx, mb_type)\
961 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
962 h->ref_cache[list][idx] <<= 1;\
963 h->mv_cache[list][idx][1] /= 2;\
964 h->mvd_cache[list][idx][1] /= 2;\
965 }
966 MAP_MVS
967#undef MAP_F2F
968 }else{
969#define MAP_F2F(idx, mb_type)\
970 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
971 h->ref_cache[list][idx] >>= 1;\
972 h->mv_cache[list][idx][1] <<= 1;\
973 h->mvd_cache[list][idx][1] <<= 1;\
974 }
975 MAP_MVS
976#undef MAP_F2F
977 }
978 }
979 }
980 }
981#endif
982
983 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
984}
985
986static inline void write_back_intra_pred_mode(H264Context *h){
987 MpegEncContext * const s = &h->s;
988 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
989
990 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
991 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
992 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
993 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
994 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
995 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
996 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
997}
998
999/**
1000 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1001 */
1002static inline int check_intra4x4_pred_mode(H264Context *h){
1003 MpegEncContext * const s = &h->s;
1004 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1005 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1006 int i;
1007
1008 if(!(h->top_samples_available&0x8000)){
1009 for(i=0; i<4; i++){
1010 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1011 if(status<0){
1012 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1013 return -1;
1014 } else if(status){
1015 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1016 }
1017 }
1018 }
1019
1020 if(!(h->left_samples_available&0x8000)){
1021 for(i=0; i<4; i++){
1022 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1023 if(status<0){
1024 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1025 return -1;
1026 } else if(status){
1027 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1028 }
1029 }
1030 }
1031
1032 return 0;
1033} //FIXME cleanup like next
1034
1035/**
1036 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1037 */
1038static inline int check_intra_pred_mode(H264Context *h, int mode){
1039 MpegEncContext * const s = &h->s;
1040 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1041 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1042
1043 if(mode < 0 || mode > 6) {
1044 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1045 return -1;
1046 }
1047
1048 if(!(h->top_samples_available&0x8000)){
1049 mode= top[ mode ];
1050 if(mode<0){
1051 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1052 return -1;
1053 }
1054 }
1055
1056 if(!(h->left_samples_available&0x8000)){
1057 mode= left[ mode ];
1058 if(mode<0){
1059 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1060 return -1;
1061 }
1062 }
1063
1064 return mode;
1065}
1066
1067/**
1068 * gets the predicted intra4x4 prediction mode.
1069 */
1070static inline int pred_intra_mode(H264Context *h, int n){
1071 const int index8= scan8[n];
1072 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1073 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1074 const int min= FFMIN(left, top);
1075
1076 tprintf("mode:%d %d min:%d\n", left ,top, min);
1077
1078 if(min<0) return DC_PRED;
1079 else return min;
1080}
1081
1082static inline void write_back_non_zero_count(H264Context *h){
1083 MpegEncContext * const s = &h->s;
1084 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1085
1086 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1087 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1088 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1089 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1090 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1091 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1092 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1093
1094 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1095 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1096 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1097
1098 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1099 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1100 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1101
1102 if(FRAME_MBAFF){
1103 // store all luma nnzs, for deblocking
1104 int v = 0, i;
1105 for(i=0; i<16; i++)
1106 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1107 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1108 }
1109}
1110
1111/**
1112 * gets the predicted number of non zero coefficients.
1113 * @param n block index
1114 */
1115static inline int pred_non_zero_count(H264Context *h, int n){
1116 const int index8= scan8[n];
1117 const int left= h->non_zero_count_cache[index8 - 1];
1118 const int top = h->non_zero_count_cache[index8 - 8];
1119 int i= left + top;
1120
1121 if(i<64) i= (i+1)>>1;
1122
1123 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1124
1125 return i&31;
1126}
1127
1128static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1129 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1130
1131 /* there is no consistent mapping of mvs to neighboring locations that will
1132 * make mbaff happy, so we can't move all this logic to fill_caches */
1133 if(FRAME_MBAFF){
1134 MpegEncContext *s = &h->s;
1135 const int *mb_types = s->current_picture_ptr->mb_type;
1136 const int16_t *mv;
1137 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1138 *C = h->mv_cache[list][scan8[0]-2];
1139
1140 if(!MB_FIELD
1141 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1142 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1143 if(IS_INTERLACED(mb_types[topright_xy])){
1144#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1145 const int x4 = X4, y4 = Y4;\
1146 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1147 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1148 return LIST_NOT_USED;\
1149 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1150 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1151 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1152 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1153
1154 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1155 }
1156 }
1157 if(topright_ref == PART_NOT_AVAILABLE
1158 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1159 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1160 if(!MB_FIELD
1161 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1162 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1163 }
1164 if(MB_FIELD
1165 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1166 && i >= scan8[0]+8){
1167 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1168 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1169 }
1170 }
1171#undef SET_DIAG_MV
1172 }
1173
1174 if(topright_ref != PART_NOT_AVAILABLE){
1175 *C= h->mv_cache[list][ i - 8 + part_width ];
1176 return topright_ref;
1177 }else{
1178 tprintf("topright MV not available\n");
1179
1180 *C= h->mv_cache[list][ i - 8 - 1 ];
1181 return h->ref_cache[list][ i - 8 - 1 ];
1182 }
1183}
1184
1185/**
1186 * gets the predicted MV.
1187 * @param n the block index
1188 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1189 * @param mx the x component of the predicted motion vector
1190 * @param my the y component of the predicted motion vector
1191 */
1192static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1193 const int index8= scan8[n];
1194 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1195 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1196 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1197 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1198 const int16_t * C;
1199 int diagonal_ref, match_count;
1200
1201 assert(part_width==1 || part_width==2 || part_width==4);
1202
1203/* mv_cache
1204 B . . A T T T T
1205 U . . L . . , .
1206 U . . L . . . .
1207 U . . L . . , .
1208 . . . L . . . .
1209*/
1210
1211 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1212 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1213 tprintf("pred_motion match_count=%d\n", match_count);
1214 if(match_count > 1){ //most common
1215 *mx= mid_pred(A[0], B[0], C[0]);
1216 *my= mid_pred(A[1], B[1], C[1]);
1217 }else if(match_count==1){
1218 if(left_ref==ref){
1219 *mx= A[0];
1220 *my= A[1];
1221 }else if(top_ref==ref){
1222 *mx= B[0];
1223 *my= B[1];
1224 }else{
1225 *mx= C[0];
1226 *my= C[1];
1227 }
1228 }else{
1229 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1230 *mx= A[0];
1231 *my= A[1];
1232 }else{
1233 *mx= mid_pred(A[0], B[0], C[0]);
1234 *my= mid_pred(A[1], B[1], C[1]);
1235 }
1236 }
1237
1238 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1239}
1240
1241/**
1242 * gets the directionally predicted 16x8 MV.
1243 * @param n the block index
1244 * @param mx the x component of the predicted motion vector
1245 * @param my the y component of the predicted motion vector
1246 */
1247static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1248 if(n==0){
1249 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1250 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1251
1252 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1253
1254 if(top_ref == ref){
1255 *mx= B[0];
1256 *my= B[1];
1257 return;
1258 }
1259 }else{
1260 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1261 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1262
1263 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1264
1265 if(left_ref == ref){
1266 *mx= A[0];
1267 *my= A[1];
1268 return;
1269 }
1270 }
1271
1272 //RARE
1273 pred_motion(h, n, 4, list, ref, mx, my);
1274}
1275
1276/**
1277 * gets the directionally predicted 8x16 MV.
1278 * @param n the block index
1279 * @param mx the x component of the predicted motion vector
1280 * @param my the y component of the predicted motion vector
1281 */
1282static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1283 if(n==0){
1284 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1285 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1286
1287 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1288
1289 if(left_ref == ref){
1290 *mx= A[0];
1291 *my= A[1];
1292 return;
1293 }
1294 }else{
1295 const int16_t * C;
1296 int diagonal_ref;
1297
1298 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1299
1300 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1301
1302 if(diagonal_ref == ref){
1303 *mx= C[0];
1304 *my= C[1];
1305 return;
1306 }
1307 }
1308
1309 //RARE
1310 pred_motion(h, n, 2, list, ref, mx, my);
1311}
1312
1313static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1314 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1315 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1316
1317 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1318
1319 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1320 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1321 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1322
1323 *mx = *my = 0;
1324 return;
1325 }
1326
1327 pred_motion(h, 0, 4, 0, 0, mx, my);
1328
1329 return;
1330}
1331
1332static inline void direct_dist_scale_factor(H264Context * const h){
1333 const int poc = h->s.current_picture_ptr->poc;
1334 const int poc1 = h->ref_list[1][0].poc;
1335 int i;
1336 for(i=0; i<h->ref_count[0]; i++){
1337 int poc0 = h->ref_list[0][i].poc;
1338 int td = clip(poc1 - poc0, -128, 127);
1339 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1340 h->dist_scale_factor[i] = 256;
1341 }else{
1342 int tb = clip(poc - poc0, -128, 127);
1343 int tx = (16384 + (ABS(td) >> 1)) / td;
1344 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1345 }
1346 }
1347 if(FRAME_MBAFF){
1348 for(i=0; i<h->ref_count[0]; i++){
1349 h->dist_scale_factor_field[2*i] =
1350 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1351 }
1352 }
1353}
1354static inline void direct_ref_list_init(H264Context * const h){
1355 MpegEncContext * const s = &h->s;
1356 Picture * const ref1 = &h->ref_list[1][0];
1357 Picture * const cur = s->current_picture_ptr;
1358 int list, i, j;
1359 if(cur->pict_type == I_TYPE)
1360 cur->ref_count[0] = 0;
1361 if(cur->pict_type != B_TYPE)
1362 cur->ref_count[1] = 0;
1363 for(list=0; list<2; list++){
1364 cur->ref_count[list] = h->ref_count[list];
1365 for(j=0; j<h->ref_count[list]; j++)
1366 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1367 }
1368 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1369 return;
1370 for(list=0; list<2; list++){
1371 for(i=0; i<ref1->ref_count[list]; i++){
1372 const int poc = ref1->ref_poc[list][i];
1373 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1374 for(j=0; j<h->ref_count[list]; j++)
1375 if(h->ref_list[list][j].poc == poc){
1376 h->map_col_to_list0[list][i] = j;
1377 break;
1378 }
1379 }
1380 }
1381 if(FRAME_MBAFF){
1382 for(list=0; list<2; list++){
1383 for(i=0; i<ref1->ref_count[list]; i++){
1384 j = h->map_col_to_list0[list][i];
1385 h->map_col_to_list0_field[list][2*i] = 2*j;
1386 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1387 }
1388 }
1389 }
1390}
1391
1392static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1393 MpegEncContext * const s = &h->s;
1394 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1395 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1396 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1397 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1398 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1399 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1400 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1401 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1402 const int is_b8x8 = IS_8X8(*mb_type);
1403 int sub_mb_type;
1404 int i8, i4;
1405
1406#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1407 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1408 /* FIXME save sub mb types from previous frames (or derive from MVs)
1409 * so we know exactly what block size to use */
1410 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1411 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1412 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1413 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1414 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1415 }else{
1416 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1417 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1418 }
1419 if(!is_b8x8)
1420 *mb_type |= MB_TYPE_DIRECT2;
1421 if(MB_FIELD)
1422 *mb_type |= MB_TYPE_INTERLACED;
1423
1424 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1425
1426 if(h->direct_spatial_mv_pred){
1427 int ref[2];
1428 int mv[2][2];
1429 int list;
1430
1431 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1432
1433 /* ref = min(neighbors) */
1434 for(list=0; list<2; list++){
1435 int refa = h->ref_cache[list][scan8[0] - 1];
1436 int refb = h->ref_cache[list][scan8[0] - 8];
1437 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1438 if(refc == -2)
1439 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1440 ref[list] = refa;
1441 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1442 ref[list] = refb;
1443 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1444 ref[list] = refc;
1445 if(ref[list] < 0)
1446 ref[list] = -1;
1447 }
1448
1449 if(ref[0] < 0 && ref[1] < 0){
1450 ref[0] = ref[1] = 0;
1451 mv[0][0] = mv[0][1] =
1452 mv[1][0] = mv[1][1] = 0;
1453 }else{
1454 for(list=0; list<2; list++){
1455 if(ref[list] >= 0)
1456 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1457 else
1458 mv[list][0] = mv[list][1] = 0;
1459 }
1460 }
1461
1462 if(ref[1] < 0){
1463 *mb_type &= ~MB_TYPE_P0L1;
1464 sub_mb_type &= ~MB_TYPE_P0L1;
1465 }else if(ref[0] < 0){
1466 *mb_type &= ~MB_TYPE_P0L0;
1467 sub_mb_type &= ~MB_TYPE_P0L0;
1468 }
1469
1470 if(IS_16X16(*mb_type)){
1471 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1472 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1473 if(!IS_INTRA(mb_type_col)
1474 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1475 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1476 && (h->x264_build>33 || !h->x264_build)))){
1477 if(ref[0] > 0)
1478 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1479 else
1480 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1481 if(ref[1] > 0)
1482 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1483 else
1484 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1485 }else{
1486 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1487 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1488 }
1489 }else{
1490 for(i8=0; i8<4; i8++){
1491 const int x8 = i8&1;
1492 const int y8 = i8>>1;
1493
1494 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1495 continue;
1496 h->sub_mb_type[i8] = sub_mb_type;
1497
1498 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1499 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1500 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1501 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1502
1503 /* col_zero_flag */
1504 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1505 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1506 && (h->x264_build>33 || !h->x264_build)))){
1507 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1508 if(IS_SUB_8X8(sub_mb_type)){
1509 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1510 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1511 if(ref[0] == 0)
1512 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1513 if(ref[1] == 0)
1514 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1515 }
1516 }else
1517 for(i4=0; i4<4; i4++){
1518 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1519 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1520 if(ref[0] == 0)
1521 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1522 if(ref[1] == 0)
1523 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1524 }
1525 }
1526 }
1527 }
1528 }
1529 }else{ /* direct temporal mv pred */
1530 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1531 const int *dist_scale_factor = h->dist_scale_factor;
1532
1533 if(FRAME_MBAFF){
1534 if(IS_INTERLACED(*mb_type)){
1535 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1536 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1537 dist_scale_factor = h->dist_scale_factor_field;
1538 }
1539 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1540 /* FIXME assumes direct_8x8_inference == 1 */
1541 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1542 int mb_types_col[2];
1543 int y_shift;
1544
1545 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1546 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1547 | (*mb_type & MB_TYPE_INTERLACED);
1548 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1549
1550 if(IS_INTERLACED(*mb_type)){
1551 /* frame to field scaling */
1552 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1553 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1554 if(s->mb_y&1){
1555 l1ref0 -= 2*h->b8_stride;
1556 l1ref1 -= 2*h->b8_stride;
1557 l1mv0 -= 4*h->b_stride;
1558 l1mv1 -= 4*h->b_stride;
1559 }
1560 y_shift = 0;
1561
1562 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1563 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1564 && !is_b8x8)
1565 *mb_type |= MB_TYPE_16x8;
1566 else
1567 *mb_type |= MB_TYPE_8x8;
1568 }else{
1569 /* field to frame scaling */
1570 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1571 * but in MBAFF, top and bottom POC are equal */
1572 int dy = (s->mb_y&1) ? 1 : 2;
1573 mb_types_col[0] =
1574 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1575 l1ref0 += dy*h->b8_stride;
1576 l1ref1 += dy*h->b8_stride;
1577 l1mv0 += 2*dy*h->b_stride;
1578 l1mv1 += 2*dy*h->b_stride;
1579 y_shift = 2;
1580
1581 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1582 && !is_b8x8)
1583 *mb_type |= MB_TYPE_16x16;
1584 else
1585 *mb_type |= MB_TYPE_8x8;
1586 }
1587
1588 for(i8=0; i8<4; i8++){
1589 const int x8 = i8&1;
1590 const int y8 = i8>>1;
1591 int ref0, scale;
1592 const int16_t (*l1mv)[2]= l1mv0;
1593
1594 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1595 continue;
1596 h->sub_mb_type[i8] = sub_mb_type;
1597
1598 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1599 if(IS_INTRA(mb_types_col[y8])){
1600 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1601 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1602 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1603 continue;
1604 }
1605
1606 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1607 if(ref0 >= 0)
1608 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1609 else{
1610 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1611 l1mv= l1mv1;
1612 }
1613 scale = dist_scale_factor[ref0];
1614 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1615
1616 {
1617 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1618 int my_col = (mv_col[1]<<y_shift)/2;
1619 int mx = (scale * mv_col[0] + 128) >> 8;
1620 int my = (scale * my_col + 128) >> 8;
1621 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1622 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1623 }
1624 }
1625 return;
1626 }
1627 }
1628
1629 /* one-to-one mv scaling */
1630
1631 if(IS_16X16(*mb_type)){
1632 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1633 if(IS_INTRA(mb_type_col)){
1634 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1635 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1636 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1637 }else{
1638 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1639 : map_col_to_list0[1][l1ref1[0]];
1640 const int scale = dist_scale_factor[ref0];
1641 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1642 int mv_l0[2];
1643 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1644 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1645 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1646 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1647 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1648 }
1649 }else{
1650 for(i8=0; i8<4; i8++){
1651 const int x8 = i8&1;
1652 const int y8 = i8>>1;
1653 int ref0, scale;
1654 const int16_t (*l1mv)[2]= l1mv0;
1655
1656 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1657 continue;
1658 h->sub_mb_type[i8] = sub_mb_type;
1659 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1660 if(IS_INTRA(mb_type_col)){
1661 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1662 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1663 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1664 continue;
1665 }
1666
1667 ref0 = l1ref0[x8 + y8*h->b8_stride];
1668 if(ref0 >= 0)
1669 ref0 = map_col_to_list0[0][ref0];
1670 else{
1671 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1672 l1mv= l1mv1;
1673 }
1674 scale = dist_scale_factor[ref0];
1675
1676 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1677 if(IS_SUB_8X8(sub_mb_type)){
1678 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1679 int mx = (scale * mv_col[0] + 128) >> 8;
1680 int my = (scale * mv_col[1] + 128) >> 8;
1681 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1682 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1683 }else
1684 for(i4=0; i4<4; i4++){
1685 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1686 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1687 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1688 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1689 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1690 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1691 }
1692 }
1693 }
1694 }
1695}
1696
1697static inline void write_back_motion(H264Context *h, int mb_type){
1698 MpegEncContext * const s = &h->s;
1699 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1700 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1701 int list;
1702
1703 if(!USES_LIST(mb_type, 0))
1704 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1705
1706 for(list=0; list<2; list++){
1707 int y;
1708 if(!USES_LIST(mb_type, list))
1709 continue;
1710
1711 for(y=0; y<4; y++){
1712 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1713 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1714 }
1715 if( h->pps.cabac ) {
1716 for(y=0; y<4; y++){
1717 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1718 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1719 }
1720 }
1721
1722 {
1723 uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1724 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1725 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1726 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1727 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1728 }
1729 }
1730
1731 if(h->slice_type == B_TYPE && h->pps.cabac){
1732 if(IS_8X8(mb_type)){
1733 uint8_t *direct_table = &h->direct_table[b8_xy];
1734 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1735 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1736 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1737 }
1738 }
1739}
1740
1741/**
1742 * Decodes a network abstraction layer unit.
1743 * @param consumed is the number of bytes used as input
1744 * @param length is the length of the array
1745 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1746 * @returns decoded bytes, might be src+1 if no escapes
1747 */
1748static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1749 int i, si, di;
1750 uint8_t *dst;
1751
1752// src[0]&0x80; //forbidden bit
1753 h->nal_ref_idc= src[0]>>5;
1754 h->nal_unit_type= src[0]&0x1F;
1755
1756 src++; length--;
1757#if 0
1758 for(i=0; i<length; i++)
1759 printf("%2X ", src[i]);
1760#endif
1761 for(i=0; i+1<length; i+=2){
1762 if(src[i]) continue;
1763 if(i>0 && src[i-1]==0) i--;
1764 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1765 if(src[i+2]!=3){
1766 /* startcode, so we must be past the end */
1767 length=i;
1768 }
1769 break;
1770 }
1771 }
1772
1773 if(i>=length-1){ //no escaped 0
1774 *dst_length= length;
1775 *consumed= length+1; //+1 for the header
1776 return src;
1777 }
1778
1779 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1780 dst= h->rbsp_buffer;
1781
1782//printf("decoding esc\n");
1783 si=di=0;
1784 while(si<length){
1785 //remove escapes (very rare 1:2^22)
1786 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1787 if(src[si+2]==3){ //escape
1788 dst[di++]= 0;
1789 dst[di++]= 0;
1790 si+=3;
1791 continue;
1792 }else //next start code
1793 break;
1794 }
1795
1796 dst[di++]= src[si++];
1797 }
1798
1799 *dst_length= di;
1800 *consumed= si + 1;//+1 for the header
1801//FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1802 return dst;
1803}
1804
1805#if 0
1806/**
1807 * @param src the data which should be escaped
1808 * @param dst the target buffer, dst+1 == src is allowed as a special case
1809 * @param length the length of the src data
1810 * @param dst_length the length of the dst array
1811 * @returns length of escaped data in bytes or -1 if an error occured
1812 */
1813static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1814 int i, escape_count, si, di;
1815 uint8_t *temp;
1816
1817 assert(length>=0);
1818 assert(dst_length>0);
1819
1820 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1821
1822 if(length==0) return 1;
1823
1824 escape_count= 0;
1825 for(i=0; i<length; i+=2){
1826 if(src[i]) continue;
1827 if(i>0 && src[i-1]==0)
1828 i--;
1829 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1830 escape_count++;
1831 i+=2;
1832 }
1833 }
1834
1835 if(escape_count==0){
1836 if(dst+1 != src)
1837 memcpy(dst+1, src, length);
1838 return length + 1;
1839 }
1840
1841 if(length + escape_count + 1> dst_length)
1842 return -1;
1843
1844 //this should be damn rare (hopefully)
1845
1846 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1847 temp= h->rbsp_buffer;
1848//printf("encoding esc\n");
1849
1850 si= 0;
1851 di= 0;
1852 while(si < length){
1853 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1854 temp[di++]= 0; si++;
1855 temp[di++]= 0; si++;
1856 temp[di++]= 3;
1857 temp[di++]= src[si++];
1858 }
1859 else
1860 temp[di++]= src[si++];
1861 }
1862 memcpy(dst+1, temp, length+escape_count);
1863
1864 assert(di == length+escape_count);
1865
1866 return di + 1;
1867}
1868
1869/**
1870 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1871 */
1872static void encode_rbsp_trailing(PutBitContext *pb){
1873 int length;
1874 put_bits(pb, 1, 1);
1875 length= (-put_bits_count(pb))&7;
1876 if(length) put_bits(pb, length, 0);
1877}
1878#endif
1879
1880/**
1881 * identifies the exact end of the bitstream
1882 * @return the length of the trailing, or 0 if damaged
1883 */
1884static int decode_rbsp_trailing(uint8_t *src){
1885 int v= *src;
1886 int r;
1887
1888 tprintf("rbsp trailing %X\n", v);
1889
1890 for(r=1; r<9; r++){
1891 if(v&1) return r;
1892 v>>=1;
1893 }
1894 return 0;
1895}
1896
1897/**
1898 * idct tranforms the 16 dc values and dequantize them.
1899 * @param qp quantization parameter
1900 */
1901static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1902#define stride 16
1903 int i;
1904 int temp[16]; //FIXME check if this is a good idea
1905 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1906 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1907
1908//memset(block, 64, 2*256);
1909//return;
1910 for(i=0; i<4; i++){
1911 const int offset= y_offset[i];
1912 const int z0= block[offset+stride*0] + block[offset+stride*4];
1913 const int z1= block[offset+stride*0] - block[offset+stride*4];
1914 const int z2= block[offset+stride*1] - block[offset+stride*5];
1915 const int z3= block[offset+stride*1] + block[offset+stride*5];
1916
1917 temp[4*i+0]= z0+z3;
1918 temp[4*i+1]= z1+z2;
1919 temp[4*i+2]= z1-z2;
1920 temp[4*i+3]= z0-z3;
1921 }
1922
1923 for(i=0; i<4; i++){
1924 const int offset= x_offset[i];
1925 const int z0= temp[4*0+i] + temp[4*2+i];
1926 const int z1= temp[4*0+i] - temp[4*2+i];
1927 const int z2= temp[4*1+i] - temp[4*3+i];
1928 const int z3= temp[4*1+i] + temp[4*3+i];
1929
1930 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1931 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1932 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1933 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1934 }
1935}
1936
1937#if 0
1938/**
1939 * dct tranforms the 16 dc values.
1940 * @param qp quantization parameter ??? FIXME
1941 */
1942static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1943// const int qmul= dequant_coeff[qp][0];
1944 int i;
1945 int temp[16]; //FIXME check if this is a good idea
1946 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1947 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1948
1949 for(i=0; i<4; i++){
1950 const int offset= y_offset[i];
1951 const int z0= block[offset+stride*0] + block[offset+stride*4];
1952 const int z1= block[offset+stride*0] - block[offset+stride*4];
1953 const int z2= block[offset+stride*1] - block[offset+stride*5];
1954 const int z3= block[offset+stride*1] + block[offset+stride*5];
1955
1956 temp[4*i+0]= z0+z3;
1957 temp[4*i+1]= z1+z2;
1958 temp[4*i+2]= z1-z2;
1959 temp[4*i+3]= z0-z3;
1960 }
1961
1962 for(i=0; i<4; i++){
1963 const int offset= x_offset[i];
1964 const int z0= temp[4*0+i] + temp[4*2+i];
1965 const int z1= temp[4*0+i] - temp[4*2+i];
1966 const int z2= temp[4*1+i] - temp[4*3+i];
1967 const int z3= temp[4*1+i] + temp[4*3+i];
1968
1969 block[stride*0 +offset]= (z0 + z3)>>1;
1970 block[stride*2 +offset]= (z1 + z2)>>1;
1971 block[stride*8 +offset]= (z1 - z2)>>1;
1972 block[stride*10+offset]= (z0 - z3)>>1;
1973 }
1974}
1975#endif
1976
1977#undef xStride
1978#undef stride
1979
1980static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1981 const int stride= 16*2;
1982 const int xStride= 16;
1983 int a,b,c,d,e;
1984
1985 a= block[stride*0 + xStride*0];
1986 b= block[stride*0 + xStride*1];
1987 c= block[stride*1 + xStride*0];
1988 d= block[stride*1 + xStride*1];
1989
1990 e= a-b;
1991 a= a+b;
1992 b= c-d;
1993 c= c+d;
1994
1995 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1996 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1997 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1998 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1999}
2000
2001#if 0
2002static void chroma_dc_dct_c(DCTELEM *block){
2003 const int stride= 16*2;
2004 const int xStride= 16;
2005 int a,b,c,d,e;
2006
2007 a= block[stride*0 + xStride*0];
2008 b= block[stride*0 + xStride*1];
2009 c= block[stride*1 + xStride*0];
2010 d= block[stride*1 + xStride*1];
2011
2012 e= a-b;
2013 a= a+b;
2014 b= c-d;
2015 c= c+d;
2016
2017 block[stride*0 + xStride*0]= (a+c);
2018 block[stride*0 + xStride*1]= (e+b);
2019 block[stride*1 + xStride*0]= (a-c);
2020 block[stride*1 + xStride*1]= (e-b);
2021}
2022#endif
2023
2024/**
2025 * gets the chroma qp.
2026 */
2027static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2028
2029 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2030}
2031
2032
2033#if 0
2034static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2035 int i;
2036 //FIXME try int temp instead of block
2037
2038 for(i=0; i<4; i++){
2039 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2040 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2041 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2042 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2043 const int z0= d0 + d3;
2044 const int z3= d0 - d3;
2045 const int z1= d1 + d2;
2046 const int z2= d1 - d2;
2047
2048 block[0 + 4*i]= z0 + z1;
2049 block[1 + 4*i]= 2*z3 + z2;
2050 block[2 + 4*i]= z0 - z1;
2051 block[3 + 4*i]= z3 - 2*z2;
2052 }
2053
2054 for(i=0; i<4; i++){
2055 const int z0= block[0*4 + i] + block[3*4 + i];
2056 const int z3= block[0*4 + i] - block[3*4 + i];
2057 const int z1= block[1*4 + i] + block[2*4 + i];
2058 const int z2= block[1*4 + i] - block[2*4 + i];
2059
2060 block[0*4 + i]= z0 + z1;
2061 block[1*4 + i]= 2*z3 + z2;
2062 block[2*4 + i]= z0 - z1;
2063 block[3*4 + i]= z3 - 2*z2;
2064 }
2065}
2066#endif
2067
2068//FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2069//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2070static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2071 int i;
2072 const int * const quant_table= quant_coeff[qscale];
2073 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2074 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2075 const unsigned int threshold2= (threshold1<<1);
2076 int last_non_zero;
2077
2078 if(seperate_dc){
2079 if(qscale<=18){
2080 //avoid overflows
2081 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2082 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2083 const unsigned int dc_threshold2= (dc_threshold1<<1);
2084
2085 int level= block[0]*quant_coeff[qscale+18][0];
2086 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2087 if(level>0){
2088 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2089 block[0]= level;
2090 }else{
2091 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2092 block[0]= -level;
2093 }
2094// last_non_zero = i;
2095 }else{
2096 block[0]=0;
2097 }
2098 }else{
2099 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2100 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2101 const unsigned int dc_threshold2= (dc_threshold1<<1);
2102
2103 int level= block[0]*quant_table[0];
2104 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2105 if(level>0){
2106 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2107 block[0]= level;
2108 }else{
2109 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2110 block[0]= -level;
2111 }
2112// last_non_zero = i;
2113 }else{
2114 block[0]=0;
2115 }
2116 }
2117 last_non_zero= 0;
2118 i=1;
2119 }else{
2120 last_non_zero= -1;
2121 i=0;
2122 }
2123
2124 for(; i<16; i++){
2125 const int j= scantable[i];
2126 int level= block[j]*quant_table[j];
2127
2128// if( bias+level >= (1<<(QMAT_SHIFT - 3))
2129// || bias-level >= (1<<(QMAT_SHIFT - 3))){
2130 if(((unsigned)(level+threshold1))>threshold2){
2131 if(level>0){
2132 level= (bias + level)>>QUANT_SHIFT;
2133 block[j]= level;
2134 }else{
2135 level= (bias - level)>>QUANT_SHIFT;
2136 block[j]= -level;
2137 }
2138 last_non_zero = i;
2139 }else{
2140 block[j]=0;
2141 }
2142 }
2143
2144 return last_non_zero;
2145}
2146
2147static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2148 const uint32_t a= ((uint32_t*)(src-stride))[0];
2149 ((uint32_t*)(src+0*stride))[0]= a;
2150 ((uint32_t*)(src+1*stride))[0]= a;
2151 ((uint32_t*)(src+2*stride))[0]= a;
2152 ((uint32_t*)(src+3*stride))[0]= a;
2153}
2154
2155static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2156 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2157 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2158 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2159 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2160}
2161
2162static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2163 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2164 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2165
2166 ((uint32_t*)(src+0*stride))[0]=
2167 ((uint32_t*)(src+1*stride))[0]=
2168 ((uint32_t*)(src+2*stride))[0]=
2169 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2170}
2171
2172static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2173 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2174
2175 ((uint32_t*)(src+0*stride))[0]=
2176 ((uint32_t*)(src+1*stride))[0]=
2177 ((uint32_t*)(src+2*stride))[0]=
2178 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2179}
2180
2181static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2182 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2183
2184 ((uint32_t*)(src+0*stride))[0]=
2185 ((uint32_t*)(src+1*stride))[0]=
2186 ((uint32_t*)(src+2*stride))[0]=
2187 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2188}
2189
2190static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2191 ((uint32_t*)(src+0*stride))[0]=
2192 ((uint32_t*)(src+1*stride))[0]=
2193 ((uint32_t*)(src+2*stride))[0]=
2194 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2195}
2196
2197
2198#define LOAD_TOP_RIGHT_EDGE\
2199 const int t4= topright[0];\
2200 const int t5= topright[1];\
2201 const int t6= topright[2];\
2202 const int t7= topright[3];\
2203
2204#define LOAD_LEFT_EDGE\
2205 const int l0= src[-1+0*stride];\
2206 const int l1= src[-1+1*stride];\
2207 const int l2= src[-1+2*stride];\
2208 const int l3= src[-1+3*stride];\
2209
2210#define LOAD_TOP_EDGE\
2211 const int t0= src[ 0-1*stride];\
2212 const int t1= src[ 1-1*stride];\
2213 const int t2= src[ 2-1*stride];\
2214 const int t3= src[ 3-1*stride];\
2215
2216static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2217 const int lt= src[-1-1*stride];
2218 LOAD_TOP_EDGE
2219 LOAD_LEFT_EDGE
2220
2221 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2222 src[0+2*stride]=
2223 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2224 src[0+1*stride]=
2225 src[1+2*stride]=
2226 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2227 src[0+0*stride]=
2228 src[1+1*stride]=
2229 src[2+2*stride]=
2230 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2231 src[1+0*stride]=
2232 src[2+1*stride]=
2233 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2234 src[2+0*stride]=
2235 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2236 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2237}
2238
2239static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2240 LOAD_TOP_EDGE
2241 LOAD_TOP_RIGHT_EDGE
2242// LOAD_LEFT_EDGE
2243
2244 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2245 src[1+0*stride]=
2246 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2247 src[2+0*stride]=
2248 src[1+1*stride]=
2249 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2250 src[3+0*stride]=
2251 src[2+1*stride]=
2252 src[1+2*stride]=
2253 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2254 src[3+1*stride]=
2255 src[2+2*stride]=
2256 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2257 src[3+2*stride]=
2258 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2259 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2260}
2261
2262static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2263 const int lt= src[-1-1*stride];
2264 LOAD_TOP_EDGE
2265 LOAD_LEFT_EDGE
2266 const __attribute__((unused)) int unu= l3;
2267
2268 src[0+0*stride]=
2269 src[1+2*stride]=(lt + t0 + 1)>>1;
2270 src[1+0*stride]=
2271 src[2+2*stride]=(t0 + t1 + 1)>>1;
2272 src[2+0*stride]=
2273 src[3+2*stride]=(t1 + t2 + 1)>>1;
2274 src[3+0*stride]=(t2 + t3 + 1)>>1;
2275 src[0+1*stride]=
2276 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2277 src[1+1*stride]=
2278 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2279 src[2+1*stride]=
2280 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2281 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2282 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2283 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2284}
2285
2286static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2287 LOAD_TOP_EDGE
2288 LOAD_TOP_RIGHT_EDGE
2289 const __attribute__((unused)) int unu= t7;
2290
2291 src[0+0*stride]=(t0 + t1 + 1)>>1;
2292 src[1+0*stride]=
2293 src[0+2*stride]=(t1 + t2 + 1)>>1;
2294 src[2+0*stride]=
2295 src[1+2*stride]=(t2 + t3 + 1)>>1;
2296 src[3+0*stride]=
2297 src[2+2*stride]=(t3 + t4+ 1)>>1;
2298 src[3+2*stride]=(t4 + t5+ 1)>>1;
2299 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2300 src[1+1*stride]=
2301 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2302 src[2+1*stride]=
2303 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2304 src[3+1*stride]=
2305 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2306 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2307}
2308
2309static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2310 LOAD_LEFT_EDGE
2311
2312 src[0+0*stride]=(l0 + l1 + 1)>>1;
2313 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2314 src[2+0*stride]=
2315 src[0+1*stride]=(l1 + l2 + 1)>>1;
2316 src[3+0*stride]=
2317 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2318 src[2+1*stride]=
2319 src[0+2*stride]=(l2 + l3 + 1)>>1;
2320 src[3+1*stride]=
2321 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2322 src[3+2*stride]=
2323 src[1+3*stride]=
2324 src[0+3*stride]=
2325 src[2+2*stride]=
2326 src[2+3*stride]=
2327 src[3+3*stride]=l3;
2328}
2329
2330static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2331 const int lt= src[-1-1*stride];
2332 LOAD_TOP_EDGE
2333 LOAD_LEFT_EDGE
2334 const __attribute__((unused)) int unu= t3;
2335
2336 src[0+0*stride]=
2337 src[2+1*stride]=(lt + l0 + 1)>>1;
2338 src[1+0*stride]=
2339 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2340 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2341 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2342 src[0+1*stride]=
2343 src[2+2*stride]=(l0 + l1 + 1)>>1;
2344 src[1+1*stride]=
2345 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2346 src[0+2*stride]=
2347 src[2+3*stride]=(l1 + l2+ 1)>>1;
2348 src[1+2*stride]=
2349 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2350 src[0+3*stride]=(l2 + l3 + 1)>>1;
2351 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2352}
2353
2354static void pred16x16_vertical_c(uint8_t *src, int stride){
2355 int i;
2356 const uint32_t a= ((uint32_t*)(src-stride))[0];
2357 const uint32_t b= ((uint32_t*)(src-stride))[1];
2358 const uint32_t c= ((uint32_t*)(src-stride))[2];
2359 const uint32_t d= ((uint32_t*)(src-stride))[3];
2360
2361 for(i=0; i<16; i++){
2362 ((uint32_t*)(src+i*stride))[0]= a;
2363 ((uint32_t*)(src+i*stride))[1]= b;
2364 ((uint32_t*)(src+i*stride))[2]= c;
2365 ((uint32_t*)(src+i*stride))[3]= d;
2366 }
2367}
2368
2369static void pred16x16_horizontal_c(uint8_t *src, int stride){
2370 int i;
2371
2372 for(i=0; i<16; i++){
2373 ((uint32_t*)(src+i*stride))[0]=
2374 ((uint32_t*)(src+i*stride))[1]=
2375 ((uint32_t*)(src+i*stride))[2]=
2376 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2377 }
2378}
2379
2380static void pred16x16_dc_c(uint8_t *src, int stride){
2381 int i, dc=0;
2382
2383 for(i=0;i<16; i++){
2384 dc+= src[-1+i*stride];
2385 }
2386
2387 for(i=0;i<16; i++){
2388 dc+= src[i-stride];
2389 }
2390
2391 dc= 0x01010101*((dc + 16)>>5);
2392
2393 for(i=0; i<16; i++){
2394 ((uint32_t*)(src+i*stride))[0]=
2395 ((uint32_t*)(src+i*stride))[1]=
2396 ((uint32_t*)(src+i*stride))[2]=
2397 ((uint32_t*)(src+i*stride))[3]= dc;
2398 }
2399}
2400
2401static void pred16x16_left_dc_c(uint8_t *src, int stride){
2402 int i, dc=0;
2403
2404 for(i=0;i<16; i++){
2405 dc+= src[-1+i*stride];
2406 }
2407
2408 dc= 0x01010101*((dc + 8)>>4);
2409
2410 for(i=0; i<16; i++){
2411 ((uint32_t*)(src+i*stride))[0]=
2412 ((uint32_t*)(src+i*stride))[1]=
2413 ((uint32_t*)(src+i*stride))[2]=
2414 ((uint32_t*)(src+i*stride))[3]= dc;
2415 }
2416}
2417
2418static void pred16x16_top_dc_c(uint8_t *src, int stride){
2419 int i, dc=0;
2420
2421 for(i=0;i<16; i++){
2422 dc+= src[i-stride];
2423 }
2424 dc= 0x01010101*((dc + 8)>>4);
2425
2426 for(i=0; i<16; i++){
2427 ((uint32_t*)(src+i*stride))[0]=
2428 ((uint32_t*)(src+i*stride))[1]=
2429 ((uint32_t*)(src+i*stride))[2]=
2430 ((uint32_t*)(src+i*stride))[3]= dc;
2431 }
2432}
2433
2434static void pred16x16_128_dc_c(uint8_t *src, int stride){
2435 int i;
2436
2437 for(i=0; i<16; i++){
2438 ((uint32_t*)(src+i*stride))[0]=
2439 ((uint32_t*)(src+i*stride))[1]=
2440 ((uint32_t*)(src+i*stride))[2]=
2441 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2442 }
2443}
2444
2445static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2446 int i, j, k;
2447 int a;
2448 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2449 const uint8_t * const src0 = src+7-stride;
2450 const uint8_t *src1 = src+8*stride-1;
2451 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2452 int H = src0[1] - src0[-1];
2453 int V = src1[0] - src2[ 0];
2454 for(k=2; k<=8; ++k) {
2455 src1 += stride; src2 -= stride;
2456 H += k*(src0[k] - src0[-k]);
2457 V += k*(src1[0] - src2[ 0]);
2458 }
2459 if(svq3){
2460 H = ( 5*(H/4) ) / 16;
2461 V = ( 5*(V/4) ) / 16;
2462
2463 /* required for 100% accuracy */
2464 i = H; H = V; V = i;
2465 }else{
2466 H = ( 5*H+32 ) >> 6;
2467 V = ( 5*V+32 ) >> 6;
2468 }
2469
2470 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2471 for(j=16; j>0; --j) {
2472 int b = a;
2473 a += V;
2474 for(i=-16; i<0; i+=4) {
2475 src[16+i] = cm[ (b ) >> 5 ];
2476 src[17+i] = cm[ (b+ H) >> 5 ];
2477 src[18+i] = cm[ (b+2*H) >> 5 ];
2478 src[19+i] = cm[ (b+3*H) >> 5 ];
2479 b += 4*H;
2480 }
2481 src += stride;
2482 }
2483}
2484
2485static void pred16x16_plane_c(uint8_t *src, int stride){
2486 pred16x16_plane_compat_c(src, stride, 0);
2487}
2488
2489static void pred8x8_vertical_c(uint8_t *src, int stride){
2490 int i;
2491 const uint32_t a= ((uint32_t*)(src-stride))[0];
2492 const uint32_t b= ((uint32_t*)(src-stride))[1];
2493
2494 for(i=0; i<8; i++){
2495 ((uint32_t*)(src+i*stride))[0]= a;
2496 ((uint32_t*)(src+i*stride))[1]= b;
2497 }
2498}
2499
2500static void pred8x8_horizontal_c(uint8_t *src, int stride){
2501 int i;
2502
2503 for(i=0; i<8; i++){
2504 ((uint32_t*)(src+i*stride))[0]=
2505 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2506 }
2507}
2508
2509static void pred8x8_128_dc_c(uint8_t *src, int stride){
2510 int i;
2511
2512 for(i=0; i<8; i++){
2513 ((uint32_t*)(src+i*stride))[0]=
2514 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2515 }
2516}
2517
2518static void pred8x8_left_dc_c(uint8_t *src, int stride){
2519 int i;
2520 int dc0, dc2;
2521
2522 dc0=dc2=0;
2523 for(i=0;i<4; i++){
2524 dc0+= src[-1+i*stride];
2525 dc2+= src[-1+(i+4)*stride];
2526 }
2527 dc0= 0x01010101*((dc0 + 2)>>2);
2528 dc2= 0x01010101*((dc2 + 2)>>2);
2529
2530 for(i=0; i<4; i++){
2531 ((uint32_t*)(src+i*stride))[0]=
2532 ((uint32_t*)(src+i*stride))[1]= dc0;
2533 }
2534 for(i=4; i<8; i++){
2535 ((uint32_t*)(src+i*stride))[0]=
2536 ((uint32_t*)(src+i*stride))[1]= dc2;
2537 }
2538}
2539
2540static void pred8x8_top_dc_c(uint8_t *src, int stride){
2541 int i;
2542 int dc0, dc1;
2543
2544 dc0=dc1=0;
2545 for(i=0;i<4; i++){
2546 dc0+= src[i-stride];
2547 dc1+= src[4+i-stride];
2548 }
2549 dc0= 0x01010101*((dc0 + 2)>>2);
2550 dc1= 0x01010101*((dc1 + 2)>>2);
2551
2552 for(i=0; i<4; i++){
2553 ((uint32_t*)(src+i*stride))[0]= dc0;
2554 ((uint32_t*)(src+i*stride))[1]= dc1;
2555 }
2556 for(i=4; i<8; i++){
2557 ((uint32_t*)(src+i*stride))[0]= dc0;
2558 ((uint32_t*)(src+i*stride))[1]= dc1;
2559 }
2560}
2561
2562
2563static void pred8x8_dc_c(uint8_t *src, int stride){
2564 int i;
2565 int dc0, dc1, dc2, dc3;
2566
2567 dc0=dc1=dc2=0;
2568 for(i=0;i<4; i++){
2569 dc0+= src[-1+i*stride] + src[i-stride];
2570 dc1+= src[4+i-stride];
2571 dc2+= src[-1+(i+4)*stride];
2572 }
2573 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2574 dc0= 0x01010101*((dc0 + 4)>>3);
2575 dc1= 0x01010101*((dc1 + 2)>>2);
2576 dc2= 0x01010101*((dc2 + 2)>>2);
2577
2578 for(i=0; i<4; i++){
2579 ((uint32_t*)(src+i*stride))[0]= dc0;
2580 ((uint32_t*)(src+i*stride))[1]= dc1;
2581 }
2582 for(i=4; i<8; i++){
2583 ((uint32_t*)(src+i*stride))[0]= dc2;
2584 ((uint32_t*)(src+i*stride))[1]= dc3;
2585 }
2586}
2587
2588static void pred8x8_plane_c(uint8_t *src, int stride){
2589 int j, k;
2590 int a;
2591 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2592 const uint8_t * const src0 = src+3-stride;
2593 const uint8_t *src1 = src+4*stride-1;
2594 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2595 int H = src0[1] - src0[-1];
2596 int V = src1[0] - src2[ 0];
2597 for(k=2; k<=4; ++k) {
2598 src1 += stride; src2 -= stride;
2599 H += k*(src0[k] - src0[-k]);
2600 V += k*(src1[0] - src2[ 0]);
2601 }
2602 H = ( 17*H+16 ) >> 5;
2603 V = ( 17*V+16 ) >> 5;
2604
2605 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2606 for(j=8; j>0; --j) {
2607 int b = a;
2608 a += V;
2609 src[0] = cm[ (b ) >> 5 ];
2610 src[1] = cm[ (b+ H) >> 5 ];
2611 src[2] = cm[ (b+2*H) >> 5 ];
2612 src[3] = cm[ (b+3*H) >> 5 ];
2613 src[4] = cm[ (b+4*H) >> 5 ];
2614 src[5] = cm[ (b+5*H) >> 5 ];
2615 src[6] = cm[ (b+6*H) >> 5 ];
2616 src[7] = cm[ (b+7*H) >> 5 ];
2617 src += stride;
2618 }
2619}
2620
2621#define SRC(x,y) src[(x)+(y)*stride]
2622#define PL(y) \
2623 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2624#define PREDICT_8x8_LOAD_LEFT \
2625 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2626 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2627 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2628 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2629
2630#define PT(x) \
2631 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2632#define PREDICT_8x8_LOAD_TOP \
2633 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2634 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2635 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2636 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2637 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2638
2639#define PTR(x) \
2640 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2641#define PREDICT_8x8_LOAD_TOPRIGHT \
2642 int t8, t9, t10, t11, t12, t13, t14, t15; \
2643 if(has_topright) { \
2644 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2645 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2646 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2647
2648#define PREDICT_8x8_LOAD_TOPLEFT \
2649 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2650
2651#define PREDICT_8x8_DC(v) \
2652 int y; \
2653 for( y = 0; y < 8; y++ ) { \
2654 ((uint32_t*)src)[0] = \
2655 ((uint32_t*)src)[1] = v; \
2656 src += stride; \
2657 }
2658
2659static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2660{
2661 PREDICT_8x8_DC(0x80808080);
2662}
2663static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2664{
2665 PREDICT_8x8_LOAD_LEFT;
2666 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2667 PREDICT_8x8_DC(dc);
2668}
2669static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2670{
2671 PREDICT_8x8_LOAD_TOP;
2672 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2673 PREDICT_8x8_DC(dc);
2674}
2675static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2676{
2677 PREDICT_8x8_LOAD_LEFT;
2678 PREDICT_8x8_LOAD_TOP;
2679 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2680 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2681 PREDICT_8x8_DC(dc);
2682}
2683static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2684{
2685 PREDICT_8x8_LOAD_LEFT;
2686#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2687 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2688 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2689#undef ROW
2690}
2691static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2692{
2693 int y;
2694 PREDICT_8x8_LOAD_TOP;
2695 src[0] = t0;
2696 src[1] = t1;
2697 src[2] = t2;
2698 src[3] = t3;
2699 src[4] = t4;
2700 src[5] = t5;
2701 src[6] = t6;
2702 src[7] = t7;
2703 for( y = 1; y < 8; y++ )
2704 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2705}
2706static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2707{
2708 PREDICT_8x8_LOAD_TOP;
2709 PREDICT_8x8_LOAD_TOPRIGHT;
2710 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2711 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2712 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2713 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2714 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2715 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2716 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2717 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2718 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2719 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2720 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2721 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2722 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2723 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2724 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2725}
2726static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2727{
2728 PREDICT_8x8_LOAD_TOP;
2729 PREDICT_8x8_LOAD_LEFT;
2730 PREDICT_8x8_LOAD_TOPLEFT;
2731 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2732 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2733 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2734 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2735 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2736 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2737 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2738 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2739 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2740 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2741 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2742 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2743 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2744 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2745 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2746
2747}
2748static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2749{
2750 PREDICT_8x8_LOAD_TOP;
2751 PREDICT_8x8_LOAD_LEFT;
2752 PREDICT_8x8_LOAD_TOPLEFT;
2753 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2754 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2755 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2756 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2757 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2758 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2759 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2760 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2761 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2762 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2763 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2764 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2765 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2766 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2767 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2768 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2769 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2770 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2771 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2772 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2773 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2774 SRC(7,0)= (t6 + t7 + 1) >> 1;
2775}
2776static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2777{
2778 PREDICT_8x8_LOAD_TOP;
2779 PREDICT_8x8_LOAD_LEFT;
2780 PREDICT_8x8_LOAD_TOPLEFT;
2781 SRC(0,7)= (l6 + l7 + 1) >> 1;
2782 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2783 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2784 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2785 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2786 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2787 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2788 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2789 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2790 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2791 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2792 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2793 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2794 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2795 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2796 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2797 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2798 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2799 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2800 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2801 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2802 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2803}
2804static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2805{
2806 PREDICT_8x8_LOAD_TOP;
2807 PREDICT_8x8_LOAD_TOPRIGHT;
2808 SRC(0,0)= (t0 + t1 + 1) >> 1;
2809 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2810 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2811 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2812 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2813 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2814 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2815 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2816 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2817 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2818 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2819 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2820 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2821 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2822 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2823 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2824 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2825 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2826 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2827 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2828 SRC(7,6)= (t10 + t11 + 1) >> 1;
2829 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2830}
2831static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2832{
2833 PREDICT_8x8_LOAD_LEFT;
2834 SRC(0,0)= (l0 + l1 + 1) >> 1;
2835 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2836 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2837 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2838 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2839 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2840 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2841 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2842 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2843 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2844 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2845 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2846 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2847 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2848 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2849 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2850 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2851 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2852}
2853#undef PREDICT_8x8_LOAD_LEFT
2854#undef PREDICT_8x8_LOAD_TOP
2855#undef PREDICT_8x8_LOAD_TOPLEFT
2856#undef PREDICT_8x8_LOAD_TOPRIGHT
2857#undef PREDICT_8x8_DC
2858#undef PTR
2859#undef PT
2860#undef PL
2861#undef SRC
2862
2863static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2864 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2865 int src_x_offset, int src_y_offset,
2866 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2867 MpegEncContext * const s = &h->s;
2868 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2869 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2870 const int luma_xy= (mx&3) + ((my&3)<<2);
2871 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2872 uint8_t * src_cb, * src_cr;
2873 int extra_width= h->emu_edge_width;
2874 int extra_height= h->emu_edge_height;
2875 int emu=0;
2876 const int full_mx= mx>>2;
2877 const int full_my= my>>2;
2878 const int pic_width = 16*s->mb_width;
2879 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2880
2881 if(!pic->data[0])
2882 return;
2883
2884 if(mx&7) extra_width -= 3;
2885 if(my&7) extra_height -= 3;
2886
2887 if( full_mx < 0-extra_width
2888 || full_my < 0-extra_height
2889 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2890 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2891 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2892 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2893 emu=1;
2894 }
2895
2896 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2897 if(!square){
2898 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2899 }
2900
2901 if(s->flags&CODEC_FLAG_GRAY) return;
2902
2903 if(MB_MBAFF){
2904 // chroma offset when predicting from a field of opposite parity
2905 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2906 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2907 }
2908 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2909 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2910
2911 if(emu){
2912 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2913 src_cb= s->edge_emu_buffer;
2914 }
2915 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2916
2917 if(emu){
2918 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2919 src_cr= s->edge_emu_buffer;
2920 }
2921 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2922}
2923
2924static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2925 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2926 int x_offset, int y_offset,
2927 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2928 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2929 int list0, int list1){
2930 MpegEncContext * const s = &h->s;
2931 qpel_mc_func *qpix_op= qpix_put;
2932 h264_chroma_mc_func chroma_op= chroma_put;
2933
2934 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2935 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2936 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2937 x_offset += 8*s->mb_x;
2938 y_offset += 8*(s->mb_y >> MB_MBAFF);
2939
2940 if(list0){
2941 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2942 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2943 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2944 qpix_op, chroma_op);
2945
2946 qpix_op= qpix_avg;
2947 chroma_op= chroma_avg;
2948 }
2949
2950 if(list1){
2951 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2952 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2953 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2954 qpix_op, chroma_op);
2955 }
2956}
2957
2958static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2959 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2960 int x_offset, int y_offset,
2961 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2962 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2963 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2964 int list0, int list1){
2965 MpegEncContext * const s = &h->s;
2966
2967 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2968 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2969 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2970 x_offset += 8*s->mb_x;
2971 y_offset += 8*(s->mb_y >> MB_MBAFF);
2972
2973 if(list0 && list1){
2974 /* don't optimize for luma-only case, since B-frames usually
2975 * use implicit weights => chroma too. */
2976 uint8_t *tmp_cb = s->obmc_scratchpad;
2977 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2978 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2979 int refn0 = h->ref_cache[0][ scan8[n] ];
2980 int refn1 = h->ref_cache[1][ scan8[n] ];
2981
2982 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2983 dest_y, dest_cb, dest_cr,
2984 x_offset, y_offset, qpix_put, chroma_put);
2985 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2986 tmp_y, tmp_cb, tmp_cr,
2987 x_offset, y_offset, qpix_put, chroma_put);
2988
2989 if(h->use_weight == 2){
2990 int weight0 = h->implicit_weight[refn0][refn1];
2991 int weight1 = 64 - weight0;
2992 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2993 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2994 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2995 }else{
2996 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2997 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2998 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2999 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3000 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3001 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3002 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3003 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3004 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3005 }
3006 }else{
3007 int list = list1 ? 1 : 0;
3008 int refn = h->ref_cache[list][ scan8[n] ];
3009 Picture *ref= &h->ref_list[list][refn];
3010 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3011 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3012 qpix_put, chroma_put);
3013
3014 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3015 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3016 if(h->use_weight_chroma){
3017 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3018 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3019 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3020 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3021 }
3022 }
3023}
3024
3025static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3026 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3027 int x_offset, int y_offset,
3028 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3029 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3030 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3031 int list0, int list1){
3032 if((h->use_weight==2 && list0 && list1
3033 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3034 || h->use_weight==1)
3035 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3036 x_offset, y_offset, qpix_put, chroma_put,
3037 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3038 else
3039 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3040 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3041}
3042
3043static inline void prefetch_motion(H264Context *h, int list){
3044 /* fetch pixels for estimated mv 4 macroblocks ahead
3045 * optimized for 64byte cache lines */
3046 MpegEncContext * const s = &h->s;
3047 const int refn = h->ref_cache[list][scan8[0]];
3048 if(refn >= 0){
3049 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3050 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3051 uint8_t **src= h->ref_list[list][refn].data;
3052 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3053 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3054 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3055 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3056 }
3057}
3058
3059static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3060 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3061 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3062 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3063 MpegEncContext * const s = &h->s;
3064 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3065 const int mb_type= s->current_picture.mb_type[mb_xy];
3066
3067 assert(IS_INTER(mb_type));
3068
3069 prefetch_motion(h, 0);
3070
3071 if(IS_16X16(mb_type)){
3072 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3073 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3074 &weight_op[0], &weight_avg[0],
3075 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3076 }else if(IS_16X8(mb_type)){
3077 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3078 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3079 &weight_op[1], &weight_avg[1],
3080 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3081 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3082 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3083 &weight_op[1], &weight_avg[1],
3084 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3085 }else if(IS_8X16(mb_type)){
3086 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3087 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3088 &weight_op[2], &weight_avg[2],
3089 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3090 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3091 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3092 &weight_op[2], &weight_avg[2],
3093 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3094 }else{
3095 int i;
3096
3097 assert(IS_8X8(mb_type));
3098
3099 for(i=0; i<4; i++){
3100 const int sub_mb_type= h->sub_mb_type[i];
3101 const int n= 4*i;
3102 int x_offset= (i&1)<<2;
3103 int y_offset= (i&2)<<1;
3104
3105 if(IS_SUB_8X8(sub_mb_type)){
3106 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3107 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3108 &weight_op[3], &weight_avg[3],
3109 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3110 }else if(IS_SUB_8X4(sub_mb_type)){
3111 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3112 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3113 &weight_op[4], &weight_avg[4],
3114 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3115 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3116 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3117 &weight_op[4], &weight_avg[4],
3118 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3119 }else if(IS_SUB_4X8(sub_mb_type)){
3120 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3121 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3122 &weight_op[5], &weight_avg[5],
3123 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3124 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3125 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3126 &weight_op[5], &weight_avg[5],
3127 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3128 }else{
3129 int j;
3130 assert(IS_SUB_4X4(sub_mb_type));
3131 for(j=0; j<4; j++){
3132 int sub_x_offset= x_offset + 2*(j&1);
3133 int sub_y_offset= y_offset + (j&2);
3134 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3135 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3136 &weight_op[6], &weight_avg[6],
3137 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3138 }
3139 }
3140 }
3141 }
3142
3143 prefetch_motion(h, 1);
3144}
3145
3146static void decode_init_vlc(H264Context *h){
3147 static int done = 0;
3148
3149 if (!done) {
3150 int i;
3151 done = 1;
3152
3153 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3154 &chroma_dc_coeff_token_len [0], 1, 1,
3155 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3156
3157 for(i=0; i<4; i++){
3158 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3159 &coeff_token_len [i][0], 1, 1,
3160 &coeff_token_bits[i][0], 1, 1, 1);
3161 }
3162
3163 for(i=0; i<3; i++){
3164 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3165 &chroma_dc_total_zeros_len [i][0], 1, 1,
3166 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3167 }
3168 for(i=0; i<15; i++){
3169 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3170 &total_zeros_len [i][0], 1, 1,
3171 &total_zeros_bits[i][0], 1, 1, 1);
3172 }
3173
3174 for(i=0; i<6; i++){
3175 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3176 &run_len [i][0], 1, 1,
3177 &run_bits[i][0], 1, 1, 1);
3178 }
3179 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3180 &run_len [6][0], 1, 1,
3181 &run_bits[6][0], 1, 1, 1);
3182 }
3183}
3184
3185/**
3186 * Sets the intra prediction function pointers.
3187 */
3188static void init_pred_ptrs(H264Context *h){
3189// MpegEncContext * const s = &h->s;
3190
3191 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3192 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3193 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3194 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3195 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3196 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3197 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3198 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3199 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3200 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3201 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3202 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3203
3204 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3205 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3206 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3207 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3208 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3209 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3210 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3211 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3212 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3213 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3214 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3215 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3216
3217 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3218 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3219 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3220 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3221 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3222 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3223 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3224
3225 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3226 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3227 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3228 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3229 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3230 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3231 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3232}
3233
3234static void free_tables(H264Context *h){
3235 av_freep(&h->intra4x4_pred_mode);
3236 av_freep(&h->chroma_pred_mode_table);
3237 av_freep(&h->cbp_table);
3238 av_freep(&h->mvd_table[0]);
3239 av_freep(&h->mvd_table[1]);
3240 av_freep(&h->direct_table);
3241 av_freep(&h->non_zero_count);
3242 av_freep(&h->slice_table_base);
3243 av_freep(&h->top_borders[1]);
3244 av_freep(&h->top_borders[0]);
3245 h->slice_table= NULL;
3246
3247 av_freep(&h->mb2b_xy);
3248 av_freep(&h->mb2b8_xy);
3249
3250 av_freep(&h->s.obmc_scratchpad);
3251}
3252
3253static void init_dequant8_coeff_table(H264Context *h){
3254 int i,q,x;
3255 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3256 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3257 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3258
3259 for(i=0; i<2; i++ ){
3260 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3261 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3262 break;
3263 }
3264
3265 for(q=0; q<52; q++){
3266 int shift = div6[q];
3267 int idx = rem6[q];
3268 for(x=0; x<64; x++)
3269 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3270 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3271 h->pps.scaling_matrix8[i][x]) << shift;
3272 }
3273 }
3274}
3275
3276static void init_dequant4_coeff_table(H264Context *h){
3277 int i,j,q,x;
3278 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3279 for(i=0; i<6; i++ ){
3280 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3281 for(j=0; j<i; j++){
3282 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3283 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3284 break;
3285 }
3286 }
3287 if(j<i)
3288 continue;
3289
3290 for(q=0; q<52; q++){
3291 int shift = div6[q] + 2;
3292 int idx = rem6[q];
3293 for(x=0; x<16; x++)
3294 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3295 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3296 h->pps.scaling_matrix4[i][x]) << shift;
3297 }
3298 }
3299}
3300
3301static void init_dequant_tables(H264Context *h){
3302 int i,x;
3303 init_dequant4_coeff_table(h);
3304 if(h->pps.transform_8x8_mode)
3305 init_dequant8_coeff_table(h);
3306 if(h->sps.transform_bypass){
3307 for(i=0; i<6; i++)
3308 for(x=0; x<16; x++)
3309 h->dequant4_coeff[i][0][x] = 1<<6;
3310 if(h->pps.transform_8x8_mode)
3311 for(i=0; i<2; i++)
3312 for(x=0; x<64; x++)
3313 h->dequant8_coeff[i][0][x] = 1<<6;
3314 }
3315}
3316
3317
3318/**
3319 * allocates tables.
3320 * needs width/height
3321 */
3322static int alloc_tables(H264Context *h){
3323 MpegEncContext * const s = &h->s;
3324 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3325 int x,y;
3326
3327 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3328
3329 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3330 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3331 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3332 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3333 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3334
3335 if( h->pps.cabac ) {
3336 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3338 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3339 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3340 }
3341
3342 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3343 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3344
3345 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3346 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3347 for(y=0; y<s->mb_height; y++){
3348 for(x=0; x<s->mb_width; x++){
3349 const int mb_xy= x + y*s->mb_stride;
3350 const int b_xy = 4*x + 4*y*h->b_stride;
3351 const int b8_xy= 2*x + 2*y*h->b8_stride;
3352
3353 h->mb2b_xy [mb_xy]= b_xy;
3354 h->mb2b8_xy[mb_xy]= b8_xy;
3355 }
3356 }
3357
3358 s->obmc_scratchpad = NULL;
3359
3360 if(!h->dequant4_coeff[0])
3361 init_dequant_tables(h);
3362
3363 return 0;
3364fail:
3365 free_tables(h);
3366 return -1;
3367}
3368
3369static void common_init(H264Context *h){
3370 MpegEncContext * const s = &h->s;
3371
3372 s->width = s->avctx->width;
3373 s->height = s->avctx->height;
3374 s->codec_id= s->avctx->codec->id;
3375
3376 init_pred_ptrs(h);
3377
3378 h->dequant_coeff_pps= -1;
3379 s->unrestricted_mv=1;
3380 s->decode=1; //FIXME
3381
3382 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3383 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3384}
3385
3386static int decode_init(AVCodecContext *avctx){
3387 H264Context *h= avctx->priv_data;
3388 MpegEncContext * const s = &h->s;
3389
3390 MPV_decode_defaults(s);
3391
3392 s->avctx = avctx;
3393 common_init(h);
3394
3395 s->out_format = FMT_H264;
3396 s->workaround_bugs= avctx->workaround_bugs;
3397
3398 // set defaults
3399// s->decode_mb= ff_h263_decode_mb;
3400 s->low_delay= 1;
3401 avctx->pix_fmt= PIX_FMT_YUV420P;
3402
3403 decode_init_vlc(h);
3404
3405 if(avctx->extradata_size > 0 && avctx->extradata &&
3406 *(char *)avctx->extradata == 1){
3407 h->is_avc = 1;
3408 h->got_avcC = 0;
3409 } else {
3410 h->is_avc = 0;
3411 }
3412
3413 return 0;
3414}
3415
3416static int frame_start(H264Context *h){
3417 MpegEncContext * const s = &h->s;
3418 int i;
3419
3420 if(MPV_frame_start(s, s->avctx) < 0)
3421 return -1;
3422 ff_er_frame_start(s);
3423
3424 assert(s->linesize && s->uvlinesize);
3425
3426 for(i=0; i<16; i++){
3427 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3428 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3429 }
3430 for(i=0; i<4; i++){
3431 h->block_offset[16+i]=
3432 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3433 h->block_offset[24+16+i]=
3434 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3435 }
3436
3437 /* can't be in alloc_tables because linesize isn't known there.
3438 * FIXME: redo bipred weight to not require extra buffer? */
3439 if(!s->obmc_scratchpad)
3440 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3441
3442 /* some macroblocks will be accessed before they're available */
3443 if(FRAME_MBAFF)
3444 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3445
3446// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3447 return 0;
3448}
3449
3450static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3451 MpegEncContext * const s = &h->s;
3452 int i;
3453
3454 src_y -= linesize;
3455 src_cb -= uvlinesize;
3456 src_cr -= uvlinesize;
3457
3458 // There are two lines saved, the line above the the top macroblock of a pair,
3459 // and the line above the bottom macroblock
3460 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3461 for(i=1; i<17; i++){
3462 h->left_border[i]= src_y[15+i* linesize];
3463 }
3464
3465 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3466 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3467
3468 if(!(s->flags&CODEC_FLAG_GRAY)){
3469 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3470 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3471 for(i=1; i<9; i++){
3472 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3473 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3474 }
3475 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3476 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3477 }
3478}
3479
3480static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3481 MpegEncContext * const s = &h->s;
3482 int temp8, i;
3483 uint64_t temp64;
3484 int deblock_left = (s->mb_x > 0);
3485 int deblock_top = (s->mb_y > 0);
3486
3487 src_y -= linesize + 1;
3488 src_cb -= uvlinesize + 1;
3489 src_cr -= uvlinesize + 1;
3490
3491#define XCHG(a,b,t,xchg)\
3492t= a;\
3493if(xchg)\
3494 a= b;\
3495b= t;
3496
3497 if(deblock_left){
3498 for(i = !deblock_top; i<17; i++){
3499 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3500 }
3501 }
3502
3503 if(deblock_top){
3504 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3505 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3506 if(s->mb_x+1 < s->mb_width){
3507 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3508 }
3509 }
3510
3511 if(!(s->flags&CODEC_FLAG_GRAY)){
3512 if(deblock_left){
3513 for(i = !deblock_top; i<9; i++){
3514 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3515 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3516 }
3517 }
3518 if(deblock_top){
3519 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3520 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3521 }
3522 }
3523}
3524
3525static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3526 MpegEncContext * const s = &h->s;
3527 int i;
3528
3529 src_y -= 2 * linesize;
3530 src_cb -= 2 * uvlinesize;
3531 src_cr -= 2 * uvlinesize;
3532
3533 // There are two lines saved, the line above the the top macroblock of a pair,
3534 // and the line above the bottom macroblock
3535 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3536 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3537 for(i=2; i<34; i++){
3538 h->left_border[i]= src_y[15+i* linesize];
3539 }
3540
3541 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3542 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3543 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3544 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3545
3546 if(!(s->flags&CODEC_FLAG_GRAY)){
3547 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3548 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3549 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3550 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3551 for(i=2; i<18; i++){
3552 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3553 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3554 }
3555 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3556 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3557 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3558 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3559 }
3560}
3561
3562static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3563 MpegEncContext * const s = &h->s;
3564 int temp8, i;
3565 uint64_t temp64;
3566 int deblock_left = (s->mb_x > 0);
3567 int deblock_top = (s->mb_y > 1);
3568
3569 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3570
3571 src_y -= 2 * linesize + 1;
3572 src_cb -= 2 * uvlinesize + 1;
3573 src_cr -= 2 * uvlinesize + 1;
3574
3575#define XCHG(a,b,t,xchg)\
3576t= a;\
3577if(xchg)\
3578 a= b;\
3579b= t;
3580
3581 if(deblock_left){
3582 for(i = (!deblock_top)<<1; i<34; i++){
3583 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3584 }
3585 }
3586
3587 if(deblock_top){
3588 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3589 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3590 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3591 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3592 if(s->mb_x+1 < s->mb_width){
3593 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3594 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3595 }
3596 }
3597
3598 if(!(s->flags&CODEC_FLAG_GRAY)){
3599 if(deblock_left){
3600 for(i = (!deblock_top) << 1; i<18; i++){
3601 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3602 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3603 }
3604 }
3605 if(deblock_top){
3606 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3607 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3608 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3609 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3610 }
3611 }
3612}
3613
3614static void hl_decode_mb(H264Context *h){
3615 MpegEncContext * const s = &h->s;
3616 const int mb_x= s->mb_x;
3617 const int mb_y= s->mb_y;
3618 const int mb_xy= mb_x + mb_y*s->mb_stride;
3619 const int mb_type= s->current_picture.mb_type[mb_xy];
3620 uint8_t *dest_y, *dest_cb, *dest_cr;
3621 int linesize, uvlinesize /*dct_offset*/;
3622 int i;
3623 int *block_offset = &h->block_offset[0];
3624 const unsigned int bottom = mb_y & 1;
3625 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3626 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3627 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3628
3629 if(!s->decode)
3630 return;
3631
3632 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3633 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3634 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3635
3636 if (MB_FIELD) {
3637 linesize = h->mb_linesize = s->linesize * 2;
3638 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3639 block_offset = &h->block_offset[24];
3640 if(mb_y&1){ //FIXME move out of this func?
3641 dest_y -= s->linesize*15;
3642 dest_cb-= s->uvlinesize*7;
3643 dest_cr-= s->uvlinesize*7;
3644 }
3645 if(FRAME_MBAFF) {
3646 int list;
3647 for(list=0; list<2; list++){
3648 if(!USES_LIST(mb_type, list))
3649 continue;
3650 if(IS_16X16(mb_type)){
3651 int8_t *ref = &h->ref_cache[list][scan8[0]];
3652 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3653 }else{
3654 for(i=0; i<16; i+=4){
3655 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3656 int ref = h->ref_cache[list][scan8[i]];
3657 if(ref >= 0)
3658 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3659 }
3660 }
3661 }
3662 }
3663 } else {
3664 linesize = h->mb_linesize = s->linesize;
3665 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3666// dct_offset = s->linesize * 16;
3667 }
3668
3669 if(transform_bypass){
3670 idct_dc_add =
3671 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3672 }else if(IS_8x8DCT(mb_type)){
3673 idct_dc_add = s->dsp.h264_idct8_dc_add;
3674 idct_add = s->dsp.h264_idct8_add;
3675 }else{
3676 idct_dc_add = s->dsp.h264_idct_dc_add;
3677 idct_add = s->dsp.h264_idct_add;
3678 }
3679
3680 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3681 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3682 int mbt_y = mb_y&~1;
3683 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3684 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3685 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3686 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3687 }
3688
3689 if (IS_INTRA_PCM(mb_type)) {
3690 unsigned int x, y;
3691
3692 // The pixels are stored in h->mb array in the same order as levels,
3693 // copy them in output in the correct order.
3694 for(i=0; i<16; i++) {
3695 for (y=0; y<4; y++) {
3696 for (x=0; x<4; x++) {
3697 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3698 }
3699 }
3700 }
3701 for(i=16; i<16+4; i++) {
3702 for (y=0; y<4; y++) {
3703 for (x=0; x<4; x++) {
3704 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3705 }
3706 }
3707 }
3708 for(i=20; i<20+4; i++) {
3709 for (y=0; y<4; y++) {
3710 for (x=0; x<4; x++) {
3711 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3712 }
3713 }
3714 }
3715 } else {
3716 if(IS_INTRA(mb_type)){
3717 if(h->deblocking_filter && !FRAME_MBAFF)
3718 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3719
3720 if(!(s->flags&CODEC_FLAG_GRAY)){
3721 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3722 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3723 }
3724
3725 if(IS_INTRA4x4(mb_type)){
3726 if(!s->encoding){
3727 if(IS_8x8DCT(mb_type)){
3728 for(i=0; i<16; i+=4){
3729 uint8_t * const ptr= dest_y + block_offset[i];
3730 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3731 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3732 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3733 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3734 if(nnz){
3735 if(nnz == 1 && h->mb[i*16])
3736 idct_dc_add(ptr, h->mb + i*16, linesize);
3737 else
3738 idct_add(ptr, h->mb + i*16, linesize);
3739 }
3740 }
3741 }else
3742 for(i=0; i<16; i++){
3743 uint8_t * const ptr= dest_y + block_offset[i];
3744 uint8_t *topright;
3745 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3746 int nnz, tr;
3747
3748 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3749 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3750 assert(mb_y || linesize <= block_offset[i]);
3751 if(!topright_avail){
3752 tr= ptr[3 - linesize]*0x01010101;
3753 topright= (uint8_t*) &tr;
3754 }else
3755 topright= ptr + 4 - linesize;
3756 }else
3757 topright= NULL;
3758
3759 h->pred4x4[ dir ](ptr, topright, linesize);
3760 nnz = h->non_zero_count_cache[ scan8[i] ];
3761 if(nnz){
3762 if(s->codec_id == CODEC_ID_H264){
3763 if(nnz == 1 && h->mb[i*16])
3764 idct_dc_add(ptr, h->mb + i*16, linesize);
3765 else
3766 idct_add(ptr, h->mb + i*16, linesize);
3767 }else
3768 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3769 }
3770 }
3771 }
3772 }else{
3773 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3774 if(s->codec_id == CODEC_ID_H264){
3775 if(!transform_bypass)
3776 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3777 }else
3778 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3779 }
3780 if(h->deblocking_filter && !FRAME_MBAFF)
3781 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3782 }else if(s->codec_id == CODEC_ID_H264){
3783 hl_motion(h, dest_y, dest_cb, dest_cr,
3784 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3785 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3786 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3787 }
3788
3789
3790 if(!IS_INTRA4x4(mb_type)){
3791 if(s->codec_id == CODEC_ID_H264){
3792 if(IS_INTRA16x16(mb_type)){
3793 for(i=0; i<16; i++){
3794 if(h->non_zero_count_cache[ scan8[i] ])
3795 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3796 else if(h->mb[i*16])
3797 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3798 }
3799 }else{
3800 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3801 for(i=0; i<16; i+=di){
3802 int nnz = h->non_zero_count_cache[ scan8[i] ];
3803 if(nnz){
3804 if(nnz==1 && h->mb[i*16])
3805 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3806 else
3807 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3808 }
3809 }
3810 }
3811 }else{
3812 for(i=0; i<16; i++){
3813 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3814 uint8_t * const ptr= dest_y + block_offset[i];
3815 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3816 }
3817 }
3818 }
3819 }
3820
3821 if(!(s->flags&CODEC_FLAG_GRAY)){
3822 uint8_t *dest[2] = {dest_cb, dest_cr};
3823 if(transform_bypass){
3824 idct_add = idct_dc_add = s->dsp.add_pixels4;
3825 }else{
3826 idct_add = s->dsp.h264_idct_add;
3827 idct_dc_add = s->dsp.h264_idct_dc_add;
3828 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3829 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3830 }
3831 if(s->codec_id == CODEC_ID_H264){
3832 for(i=16; i<16+8; i++){
3833 if(h->non_zero_count_cache[ scan8[i] ])
3834 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3835 else if(h->mb[i*16])
3836 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3837 }
3838 }else{
3839 for(i=16; i<16+8; i++){
3840 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3841 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3842 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3843 }
3844 }
3845 }
3846 }
3847 }
3848 if(h->deblocking_filter) {
3849 if (FRAME_MBAFF) {
3850 //FIXME try deblocking one mb at a time?
3851 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3852 const int mb_y = s->mb_y - 1;
3853 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3854 const int mb_xy= mb_x + mb_y*s->mb_stride;
3855 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3856 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3857 if (!bottom) return;
3858 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3859 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3860 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3861
3862 if(IS_INTRA(mb_type_top | mb_type_bottom))
3863 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3864
3865 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3866 // deblock a pair
3867 // top
3868 s->mb_y--;
3869 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3870 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3871 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3872 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3873 // bottom
3874 s->mb_y++;
3875 tprintf("call mbaff filter_mb\n");
3876 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3877 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3878 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3879 } else {
3880 tprintf("call filter_mb\n");
3881 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3882 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3883 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3884 }
3885 }
3886}
3887
3888/**
3889 * fills the default_ref_list.
3890 */
3891static int fill_default_ref_list(H264Context *h){
3892 MpegEncContext * const s = &h->s;
3893 int i;
3894 int smallest_poc_greater_than_current = -1;
3895 Picture sorted_short_ref[32];
3896
3897 if(h->slice_type==B_TYPE){
3898 int out_i;
3899 int limit= INT_MIN;
3900
3901 /* sort frame according to poc in B slice */
3902 for(out_i=0; out_i<h->short_ref_count; out_i++){
3903 int best_i=INT_MIN;
3904 int best_poc=INT_MAX;
3905
3906 for(i=0; i<h->short_ref_count; i++){
3907 const int poc= h->short_ref[i]->poc;
3908 if(poc > limit && poc < best_poc){
3909 best_poc= poc;
3910 best_i= i;
3911 }
3912 }
3913
3914 assert(best_i != INT_MIN);
3915
3916 limit= best_poc;
3917 sorted_short_ref[out_i]= *h->short_ref[best_i];
3918 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3919 if (-1 == smallest_poc_greater_than_current) {
3920 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3921 smallest_poc_greater_than_current = out_i;
3922 }
3923 }
3924 }
3925 }
3926
3927 if(s->picture_structure == PICT_FRAME){
3928 if(h->slice_type==B_TYPE){
3929 int list;
3930 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3931
3932 // find the largest poc
3933 for(list=0; list<2; list++){
3934 int index = 0;
3935 int j= -99;
3936 int step= list ? -1 : 1;
3937
3938 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3939 while(j<0 || j>= h->short_ref_count){
3940 if(j != -99 && step == (list ? -1 : 1))
3941 return -1;
3942 step = -step;
3943 j= smallest_poc_greater_than_current + (step>>1);
3944 }
3945 if(sorted_short_ref[j].reference != 3) continue;
3946 h->default_ref_list[list][index ]= sorted_short_ref[j];
3947 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3948 }
3949
3950 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3951 if(h->long_ref[i] == NULL) continue;
3952 if(h->long_ref[i]->reference != 3) continue;
3953
3954 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3955 h->default_ref_list[ list ][index++].pic_id= i;;
3956 }
3957
3958 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3959 // swap the two first elements of L1 when
3960 // L0 and L1 are identical
3961 Picture temp= h->default_ref_list[1][0];
3962 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3963 h->default_ref_list[1][1] = temp;
3964 }
3965
3966 if(index < h->ref_count[ list ])
3967 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3968 }
3969 }else{
3970 int index=0;
3971 for(i=0; i<h->short_ref_count; i++){
3972 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3973 h->default_ref_list[0][index ]= *h->short_ref[i];
3974 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3975 }
3976 for(i = 0; i < 16; i++){
3977 if(h->long_ref[i] == NULL) continue;
3978 if(h->long_ref[i]->reference != 3) continue;
3979 h->default_ref_list[0][index ]= *h->long_ref[i];
3980 h->default_ref_list[0][index++].pic_id= i;;
3981 }
3982 if(index < h->ref_count[0])
3983 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3984 }
3985 }else{ //FIELD
3986 if(h->slice_type==B_TYPE){
3987 }else{
3988 //FIXME second field balh
3989 }
3990 }
3991#ifdef TRACE
3992 for (i=0; i<h->ref_count[0]; i++) {
3993 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3994 }
3995 if(h->slice_type==B_TYPE){
3996 for (i=0; i<h->ref_count[1]; i++) {
3997 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3998 }
3999 }
4000#endif
4001 return 0;
4002}
4003
4004static void print_short_term(H264Context *h);
4005static void print_long_term(H264Context *h);
4006
4007static int decode_ref_pic_list_reordering(H264Context *h){
4008 MpegEncContext * const s = &h->s;
4009 int list, index;
4010
4011 print_short_term(h);
4012 print_long_term(h);
4013 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4014
4015 for(list=0; list<2; list++){
4016 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4017
4018 if(get_bits1(&s->gb)){
4019 int pred= h->curr_pic_num;
4020
4021 for(index=0; ; index++){
4022 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4023 int pic_id;
4024 int i;
4025 Picture *ref = NULL;
4026
4027 if(reordering_of_pic_nums_idc==3)
4028 break;
4029
4030 if(index >= h->ref_count[list]){
4031 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4032 return -1;
4033 }
4034
4035 if(reordering_of_pic_nums_idc<3){
4036 if(reordering_of_pic_nums_idc<2){
4037 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4038
4039 if(abs_diff_pic_num >= h->max_pic_num){
4040 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4041 return -1;
4042 }
4043
4044 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4045 else pred+= abs_diff_pic_num;
4046 pred &= h->max_pic_num - 1;
4047
4048 for(i= h->short_ref_count-1; i>=0; i--){
4049 ref = h->short_ref[i];
4050 assert(ref->reference == 3);
4051 assert(!ref->long_ref);
4052 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4053 break;
4054 }
4055 if(i>=0)
4056 ref->pic_id= ref->frame_num;
4057 }else{
4058 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4059 ref = h->long_ref[pic_id];
4060 ref->pic_id= pic_id;
4061 assert(ref->reference == 3);
4062 assert(ref->long_ref);
4063 i=0;
4064 }
4065
4066 if (i < 0) {
4067 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4068 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4069 } else {
4070 for(i=index; i+1<h->ref_count[list]; i++){
4071 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4072 break;
4073 }
4074 for(; i > index; i--){
4075 h->ref_list[list][i]= h->ref_list[list][i-1];
4076 }
4077 h->ref_list[list][index]= *ref;
4078 }
4079 }else{
4080 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4081 return -1;
4082 }
4083 }
4084 }
4085
4086 if(h->slice_type!=B_TYPE) break;
4087 }
4088 for(list=0; list<2; list++){
4089 for(index= 0; index < h->ref_count[list]; index++){
4090 if(!h->ref_list[list][index].data[0])
4091 h->ref_list[list][index]= s->current_picture;
4092 }
4093 if(h->slice_type!=B_TYPE) break;
4094 }
4095
4096 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4097 direct_dist_scale_factor(h);
4098 direct_ref_list_init(h);
4099 return 0;
4100}
4101
4102static void fill_mbaff_ref_list(H264Context *h){
4103 int list, i, j;
4104 for(list=0; list<2; list++){
4105 for(i=0; i<h->ref_count[list]; i++){
4106 Picture *frame = &h->ref_list[list][i];
4107 Picture *field = &h->ref_list[list][16+2*i];
4108 field[0] = *frame;
4109 for(j=0; j<3; j++)
4110 field[0].linesize[j] <<= 1;
4111 field[1] = field[0];
4112 for(j=0; j<3; j++)
4113 field[1].data[j] += frame->linesize[j];
4114
4115 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4116 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4117 for(j=0; j<2; j++){
4118 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4119 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4120 }
4121 }
4122 }
4123 for(j=0; j<h->ref_count[1]; j++){
4124 for(i=0; i<h->ref_count[0]; i++)
4125 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4126 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4127 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4128 }
4129}
4130
4131static int pred_weight_table(H264Context *h){
4132 MpegEncContext * const s = &h->s;
4133 int list, i;
4134 int luma_def, chroma_def;
4135
4136 h->use_weight= 0;
4137 h->use_weight_chroma= 0;
4138 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4139 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4140 luma_def = 1<<h->luma_log2_weight_denom;
4141 chroma_def = 1<<h->chroma_log2_weight_denom;
4142
4143 for(list=0; list<2; list++){
4144 for(i=0; i<h->ref_count[list]; i++){
4145 int luma_weight_flag, chroma_weight_flag;
4146
4147 luma_weight_flag= get_bits1(&s->gb);
4148 if(luma_weight_flag){
4149 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4150 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4151 if( h->luma_weight[list][i] != luma_def
4152 || h->luma_offset[list][i] != 0)
4153 h->use_weight= 1;
4154 }else{
4155 h->luma_weight[list][i]= luma_def;
4156 h->luma_offset[list][i]= 0;
4157 }
4158
4159 chroma_weight_flag= get_bits1(&s->gb);
4160 if(chroma_weight_flag){
4161 int j;
4162 for(j=0; j<2; j++){
4163 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4164 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4165 if( h->chroma_weight[list][i][j] != chroma_def
4166 || h->chroma_offset[list][i][j] != 0)
4167 h->use_weight_chroma= 1;
4168 }
4169 }else{
4170 int j;
4171 for(j=0; j<2; j++){
4172 h->chroma_weight[list][i][j]= chroma_def;
4173 h->chroma_offset[list][i][j]= 0;
4174 }
4175 }
4176 }
4177 if(h->slice_type != B_TYPE) break;
4178 }
4179 h->use_weight= h->use_weight || h->use_weight_chroma;
4180 return 0;
4181}
4182
4183static void implicit_weight_table(H264Context *h){
4184 MpegEncContext * const s = &h->s;
4185 int ref0, ref1;
4186 int cur_poc = s->current_picture_ptr->poc;
4187
4188 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4189 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4190 h->use_weight= 0;
4191 h->use_weight_chroma= 0;
4192 return;
4193 }
4194
4195 h->use_weight= 2;
4196 h->use_weight_chroma= 2;
4197 h->luma_log2_weight_denom= 5;
4198 h->chroma_log2_weight_denom= 5;
4199
4200 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4201 int poc0 = h->ref_list[0][ref0].poc;
4202 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4203 int poc1 = h->ref_list[1][ref1].poc;
4204 int td = clip(poc1 - poc0, -128, 127);
4205 if(td){
4206 int tb = clip(cur_poc - poc0, -128, 127);
4207 int tx = (16384 + (ABS(td) >> 1)) / td;
4208 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4209 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4210 h->implicit_weight[ref0][ref1] = 32;
4211 else
4212 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4213 }else
4214 h->implicit_weight[ref0][ref1] = 32;
4215 }
4216 }
4217}
4218
4219static inline void unreference_pic(H264Context *h, Picture *pic){
4220 int i;
4221 pic->reference=0;
4222 if(pic == h->delayed_output_pic)
4223 pic->reference=1;
4224 else{
4225 for(i = 0; h->delayed_pic[i]; i++)
4226 if(pic == h->delayed_pic[i]){
4227 pic->reference=1;
4228 break;
4229 }
4230 }
4231}
4232
4233/**
4234 * instantaneous decoder refresh.
4235 */
4236static void idr(H264Context *h){
4237 int i;
4238
4239 for(i=0; i<16; i++){
4240 if (h->long_ref[i] != NULL) {
4241 unreference_pic(h, h->long_ref[i]);
4242 h->long_ref[i]= NULL;
4243 }
4244 }
4245 h->long_ref_count=0;
4246
4247 for(i=0; i<h->short_ref_count; i++){
4248 unreference_pic(h, h->short_ref[i]);
4249 h->short_ref[i]= NULL;
4250 }
4251 h->short_ref_count=0;
4252}
4253
4254/* forget old pics after a seek */
4255static void flush_dpb(AVCodecContext *avctx){
4256 H264Context *h= avctx->priv_data;
4257 int i;
4258 for(i=0; i<16; i++) {
4259 if(h->delayed_pic[i])
4260 h->delayed_pic[i]->reference= 0;
4261 h->delayed_pic[i]= NULL;
4262 }
4263 if(h->delayed_output_pic)
4264 h->delayed_output_pic->reference= 0;
4265 h->delayed_output_pic= NULL;
4266 idr(h);
4267 if(h->s.current_picture_ptr)
4268 h->s.current_picture_ptr->reference= 0;
4269}
4270
4271/**
4272 *
4273 * @return the removed picture or NULL if an error occurs
4274 */
4275static Picture * remove_short(H264Context *h, int frame_num){
4276 MpegEncContext * const s = &h->s;
4277 int i;
4278
4279 if(s->avctx->debug&FF_DEBUG_MMCO)
4280 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4281
4282 for(i=0; i<h->short_ref_count; i++){
4283 Picture *pic= h->short_ref[i];
4284 if(s->avctx->debug&FF_DEBUG_MMCO)
4285 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4286 if(pic->frame_num == frame_num){
4287 h->short_ref[i]= NULL;
4288 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4289 h->short_ref_count--;
4290 return pic;
4291 }
4292 }
4293 return NULL;
4294}
4295
4296/**
4297 *
4298 * @return the removed picture or NULL if an error occurs
4299 */
4300static Picture * remove_long(H264Context *h, int i){
4301 Picture *pic;
4302
4303 pic= h->long_ref[i];
4304 h->long_ref[i]= NULL;
4305 if(pic) h->long_ref_count--;
4306
4307 return pic;
4308}
4309
4310/**
4311 * print short term list
4312 */
4313static void print_short_term(H264Context *h) {
4314 uint32_t i;
4315 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4316 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4317 for(i=0; i<h->short_ref_count; i++){
4318 Picture *pic= h->short_ref[i];
4319 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4320 }
4321 }
4322}
4323
4324/**
4325 * print long term list
4326 */
4327static void print_long_term(H264Context *h) {
4328 uint32_t i;
4329 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4330 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4331 for(i = 0; i < 16; i++){
4332 Picture *pic= h->long_ref[i];
4333 if (pic) {
4334 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4335 }
4336 }
4337 }
4338}
4339
4340/**
4341 * Executes the reference picture marking (memory management control operations).
4342 */
4343static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4344 MpegEncContext * const s = &h->s;
4345 int i, j;
4346 int current_is_long=0;
4347 Picture *pic;
4348
4349 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4350 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4351
4352 for(i=0; i<mmco_count; i++){
4353 if(s->avctx->debug&FF_DEBUG_MMCO)
4354 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4355
4356 switch(mmco[i].opcode){
4357 case MMCO_SHORT2UNUSED:
4358 pic= remove_short(h, mmco[i].short_frame_num);
4359 if(pic)
4360 unreference_pic(h, pic);
4361 else if(s->avctx->debug&FF_DEBUG_MMCO)
4362 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4363 break;
4364 case MMCO_SHORT2LONG:
4365 pic= remove_long(h, mmco[i].long_index);
4366 if(pic) unreference_pic(h, pic);
4367
4368 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4369 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4370 h->long_ref_count++;
4371 break;
4372 case MMCO_LONG2UNUSED:
4373 pic= remove_long(h, mmco[i].long_index);
4374 if(pic)
4375 unreference_pic(h, pic);
4376 else if(s->avctx->debug&FF_DEBUG_MMCO)
4377 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4378 break;
4379 case MMCO_LONG:
4380 pic= remove_long(h, mmco[i].long_index);
4381 if(pic) unreference_pic(h, pic);
4382
4383 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4384 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4385 h->long_ref_count++;
4386
4387 current_is_long=1;
4388 break;
4389 case MMCO_SET_MAX_LONG:
4390 assert(mmco[i].long_index <= 16);
4391 // just remove the long term which index is greater than new max
4392 for(j = mmco[i].long_index; j<16; j++){
4393 pic = remove_long(h, j);
4394 if (pic) unreference_pic(h, pic);
4395 }
4396 break;
4397 case MMCO_RESET:
4398 while(h->short_ref_count){
4399 pic= remove_short(h, h->short_ref[0]->frame_num);
4400 unreference_pic(h, pic);
4401 }
4402 for(j = 0; j < 16; j++) {
4403 pic= remove_long(h, j);
4404 if(pic) unreference_pic(h, pic);
4405 }
4406 break;
4407 default: assert(0);
4408 }
4409 }
4410
4411 if(!current_is_long){
4412 pic= remove_short(h, s->current_picture_ptr->frame_num);
4413 if(pic){
4414 unreference_pic(h, pic);
4415 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4416 }
4417
4418 if(h->short_ref_count)
4419 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4420
4421 h->short_ref[0]= s->current_picture_ptr;
4422 h->short_ref[0]->long_ref=0;
4423 h->short_ref_count++;
4424 }
4425
4426 print_short_term(h);
4427 print_long_term(h);
4428 return 0;
4429}
4430
4431static int decode_ref_pic_marking(H264Context *h){
4432 MpegEncContext * const s = &h->s;
4433 int i;
4434
4435 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4436 s->broken_link= get_bits1(&s->gb) -1;
4437 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4438 if(h->mmco[0].long_index == -1)
4439 h->mmco_index= 0;
4440 else{
4441 h->mmco[0].opcode= MMCO_LONG;
4442 h->mmco_index= 1;
4443 }
4444 }else{
4445 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4446 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4447 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4448
4449 h->mmco[i].opcode= opcode;
4450 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4451 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4452/* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4453 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4454 return -1;
4455 }*/
4456 }
4457 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4458 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4459 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4460 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4461 return -1;
4462 }
4463 }
4464
4465 if(opcode > MMCO_LONG){
4466 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4467 return -1;
4468 }
4469 if(opcode == MMCO_END)
4470 break;
4471 }
4472 h->mmco_index= i;
4473 }else{
4474 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4475
4476 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4477 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4478 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4479 h->mmco_index= 1;
4480 }else
4481 h->mmco_index= 0;
4482 }
4483 }
4484
4485 return 0;
4486}
4487
4488static int init_poc(H264Context *h){
4489 MpegEncContext * const s = &h->s;
4490 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4491 int field_poc[2];
4492
4493 if(h->nal_unit_type == NAL_IDR_SLICE){
4494 h->frame_num_offset= 0;
4495 }else{
4496 if(h->frame_num < h->prev_frame_num)
4497 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4498 else
4499 h->frame_num_offset= h->prev_frame_num_offset;
4500 }
4501
4502 if(h->sps.poc_type==0){
4503 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4504
4505 if(h->nal_unit_type == NAL_IDR_SLICE){
4506 h->prev_poc_msb=
4507 h->prev_poc_lsb= 0;
4508 }
4509
4510 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4511 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4512 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4513 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4514 else
4515 h->poc_msb = h->prev_poc_msb;
4516//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4517 field_poc[0] =
4518 field_poc[1] = h->poc_msb + h->poc_lsb;
4519 if(s->picture_structure == PICT_FRAME)
4520 field_poc[1] += h->delta_poc_bottom;
4521 }else if(h->sps.poc_type==1){
4522 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4523 int i;
4524
4525 if(h->sps.poc_cycle_length != 0)
4526 abs_frame_num = h->frame_num_offset + h->frame_num;
4527 else
4528 abs_frame_num = 0;
4529
4530 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4531 abs_frame_num--;
4532
4533 expected_delta_per_poc_cycle = 0;
4534 for(i=0; i < h->sps.poc_cycle_length; i++)
4535 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4536
4537 if(abs_frame_num > 0){
4538 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4539 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4540
4541 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4542 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4543 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4544 } else
4545 expectedpoc = 0;
4546
4547 if(h->nal_ref_idc == 0)
4548 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4549
4550 field_poc[0] = expectedpoc + h->delta_poc[0];
4551 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4552
4553 if(s->picture_structure == PICT_FRAME)
4554 field_poc[1] += h->delta_poc[1];
4555 }else{
4556 int poc;
4557 if(h->nal_unit_type == NAL_IDR_SLICE){
4558 poc= 0;
4559 }else{
4560 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4561 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4562 }
4563 field_poc[0]= poc;
4564 field_poc[1]= poc;
4565 }
4566
4567 if(s->picture_structure != PICT_BOTTOM_FIELD)
4568 s->current_picture_ptr->field_poc[0]= field_poc[0];
4569 if(s->picture_structure != PICT_TOP_FIELD)
4570 s->current_picture_ptr->field_poc[1]= field_poc[1];
4571 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4572 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4573
4574 return 0;
4575}
4576
4577/**
4578 * decodes a slice header.
4579 * this will allso call MPV_common_init() and frame_start() as needed
4580 */
4581static int decode_slice_header(H264Context *h){
4582 MpegEncContext * const s = &h->s;
4583 int first_mb_in_slice, pps_id;
4584 int num_ref_idx_active_override_flag;
4585 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4586 int slice_type;
4587 int default_ref_list_done = 0;
4588
4589 s->current_picture.reference= h->nal_ref_idc != 0;
4590 s->dropable= h->nal_ref_idc == 0;
4591
4592 first_mb_in_slice= get_ue_golomb(&s->gb);
4593
4594 slice_type= get_ue_golomb(&s->gb);
4595 if(slice_type > 9){
4596 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4597 return -1;
4598 }
4599 if(slice_type > 4){
4600 slice_type -= 5;
4601 h->slice_type_fixed=1;
4602 }else
4603 h->slice_type_fixed=0;
4604
4605 slice_type= slice_type_map[ slice_type ];
4606 if (slice_type == I_TYPE
4607 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4608 default_ref_list_done = 1;
4609 }
4610 h->slice_type= slice_type;
4611
4612 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4613
4614 pps_id= get_ue_golomb(&s->gb);
4615 if(pps_id>255){
4616 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4617 return -1;
4618 }
4619 h->pps= h->pps_buffer[pps_id];
4620 if(h->pps.slice_group_count == 0){
4621 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4622 return -1;
4623 }
4624
4625 h->sps= h->sps_buffer[ h->pps.sps_id ];
4626 if(h->sps.log2_max_frame_num == 0){
4627 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4628 return -1;
4629 }
4630
4631 if(h->dequant_coeff_pps != pps_id){
4632 h->dequant_coeff_pps = pps_id;
4633 init_dequant_tables(h);
4634 }
4635
4636 s->mb_width= h->sps.mb_width;
4637 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4638
4639 h->b_stride= s->mb_width*4;
4640 h->b8_stride= s->mb_width*2;
4641
4642 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4643 if(h->sps.frame_mbs_only_flag)
4644 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4645 else
4646 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4647
4648 if (s->context_initialized
4649 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4650 free_tables(h);
4651 MPV_common_end(s);
4652 }
4653 if (!s->context_initialized) {
4654 if (MPV_common_init(s) < 0)
4655 return -1;
4656
4657 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4658 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4659 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4660 }else{
4661 int i;
4662 for(i=0; i<16; i++){
4663#define T(x) (x>>2) | ((x<<2) & 0xF)
4664 h->zigzag_scan[i] = T(zigzag_scan[i]);
4665 h-> field_scan[i] = T( field_scan[i]);
4666#undef T
4667 }
4668 }
4669 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4670 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4671 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4672 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4673 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4674 }else{
4675 int i;
4676 for(i=0; i<64; i++){
4677#define T(x) (x>>3) | ((x&7)<<3)
4678 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4679 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4680 h->field_scan8x8[i] = T(field_scan8x8[i]);
4681 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4682#undef T
4683 }
4684 }
4685 if(h->sps.transform_bypass){ //FIXME same ugly
4686 h->zigzag_scan_q0 = zigzag_scan;
4687 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4688 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4689 h->field_scan_q0 = field_scan;
4690 h->field_scan8x8_q0 = field_scan8x8;
4691 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4692 }else{
4693 h->zigzag_scan_q0 = h->zigzag_scan;
4694 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4695 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4696 h->field_scan_q0 = h->field_scan;
4697 h->field_scan8x8_q0 = h->field_scan8x8;
4698 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4699 }
4700
4701 alloc_tables(h);
4702
4703 s->avctx->width = s->width;
4704 s->avctx->height = s->height;
4705 s->avctx->sample_aspect_ratio= h->sps.sar;
4706 if(!s->avctx->sample_aspect_ratio.den)
4707 s->avctx->sample_aspect_ratio.den = 1;
4708
4709 if(h->sps.timing_info_present_flag){
4710 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4711 if(h->x264_build > 0 && h->x264_build < 44)
4712 s->avctx->time_base.den *= 2;
4713 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4714 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4715 }
4716 }
4717
4718 if(h->slice_num == 0){
4719 if(frame_start(h) < 0)
4720 return -1;
4721 }
4722
4723 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4724 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4725
4726 h->mb_mbaff = 0;
4727 h->mb_aff_frame = 0;
4728 if(h->sps.frame_mbs_only_flag){
4729 s->picture_structure= PICT_FRAME;
4730 }else{
4731 if(get_bits1(&s->gb)) { //field_pic_flag
4732 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4733 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4734 } else {
4735 s->picture_structure= PICT_FRAME;
4736 h->mb_aff_frame = h->sps.mb_aff;
4737 }
4738 }
4739
4740 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4741 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4742 if(s->mb_y >= s->mb_height){
4743 return -1;
4744 }
4745
4746 if(s->picture_structure==PICT_FRAME){
4747 h->curr_pic_num= h->frame_num;
4748 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4749 }else{
4750 h->curr_pic_num= 2*h->frame_num;
4751 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4752 }
4753
4754 if(h->nal_unit_type == NAL_IDR_SLICE){
4755 get_ue_golomb(&s->gb); /* idr_pic_id */
4756 }
4757
4758 if(h->sps.poc_type==0){
4759 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4760
4761 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4762 h->delta_poc_bottom= get_se_golomb(&s->gb);
4763 }
4764 }
4765
4766 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4767 h->delta_poc[0]= get_se_golomb(&s->gb);
4768
4769 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4770 h->delta_poc[1]= get_se_golomb(&s->gb);
4771 }
4772
4773 init_poc(h);
4774
4775 if(h->pps.redundant_pic_cnt_present){
4776 h->redundant_pic_count= get_ue_golomb(&s->gb);
4777 }
4778
4779 //set defaults, might be overriden a few line later
4780 h->ref_count[0]= h->pps.ref_count[0];
4781 h->ref_count[1]= h->pps.ref_count[1];
4782
4783 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4784 if(h->slice_type == B_TYPE){
4785 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4786 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4787 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4788 }
4789 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4790
4791 if(num_ref_idx_active_override_flag){
4792 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4793 if(h->slice_type==B_TYPE)
4794 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4795
4796 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4797 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4798 return -1;
4799 }
4800 }
4801 }
4802
4803 if(!default_ref_list_done){
4804 fill_default_ref_list(h);
4805 }
4806
4807 if(decode_ref_pic_list_reordering(h) < 0)
4808 return -1;
4809
4810 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4811 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4812 pred_weight_table(h);
4813 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4814 implicit_weight_table(h);
4815 else
4816 h->use_weight = 0;
4817
4818 if(s->current_picture.reference)
4819 decode_ref_pic_marking(h);
4820
4821 if(FRAME_MBAFF)
4822 fill_mbaff_ref_list(h);
4823
4824 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4825 h->cabac_init_idc = get_ue_golomb(&s->gb);
4826
4827 h->last_qscale_diff = 0;
4828 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4829 if(s->qscale<0 || s->qscale>51){
4830 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4831 return -1;
4832 }
4833 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4834 //FIXME qscale / qp ... stuff
4835 if(h->slice_type == SP_TYPE){
4836 get_bits1(&s->gb); /* sp_for_switch_flag */
4837 }
4838 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4839 get_se_golomb(&s->gb); /* slice_qs_delta */
4840 }
4841
4842 h->deblocking_filter = 1;
4843 h->slice_alpha_c0_offset = 0;
4844 h->slice_beta_offset = 0;
4845 if( h->pps.deblocking_filter_parameters_present ) {
4846 h->deblocking_filter= get_ue_golomb(&s->gb);
4847 if(h->deblocking_filter < 2)
4848 h->deblocking_filter^= 1; // 1<->0
4849
4850 if( h->deblocking_filter ) {
4851 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4852 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4853 }
4854 }
4855 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4856 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4857 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4858 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4859 h->deblocking_filter= 0;
4860
4861#if 0 //FMO
4862 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4863 slice_group_change_cycle= get_bits(&s->gb, ?);
4864#endif
4865
4866 h->slice_num++;
4867
4868 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4869 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4870
4871 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4872 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4873 h->slice_num,
4874 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4875 first_mb_in_slice,
4876 av_get_pict_type_char(h->slice_type),
4877 pps_id, h->frame_num,
4878 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4879 h->ref_count[0], h->ref_count[1],
4880 s->qscale,
4881 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4882 h->use_weight,
4883 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4884 );
4885 }
4886
4887 return 0;
4888}
4889
4890/**
4891 *
4892 */
4893static inline int get_level_prefix(GetBitContext *gb){
4894 unsigned int buf;
4895 int log;
4896
4897 OPEN_READER(re, gb);
4898 UPDATE_CACHE(re, gb);
4899 buf=GET_CACHE(re, gb);
4900
4901 log= 32 - av_log2(buf);
4902#ifdef TRACE
4903 print_bin(buf>>(32-log), log);
4904 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4905#endif
4906
4907 LAST_SKIP_BITS(re, gb, log);
4908 CLOSE_READER(re, gb);
4909
4910 return log-1;
4911}
4912
4913static inline int get_dct8x8_allowed(H264Context *h){
4914 int i;
4915 for(i=0; i<4; i++){
4916 if(!IS_SUB_8X8(h->sub_mb_type[i])
4917 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4918 return 0;
4919 }
4920 return 1;
4921}
4922
4923/**
4924 * decodes a residual block.
4925 * @param n block index
4926 * @param scantable scantable
4927 * @param max_coeff number of coefficients in the block
4928 * @return <0 if an error occured
4929 */
4930static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4931 MpegEncContext * const s = &h->s;
4932 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4933 int level[16];
4934 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4935
4936 //FIXME put trailing_onex into the context
4937
4938 if(n == CHROMA_DC_BLOCK_INDEX){
4939 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4940 total_coeff= coeff_token>>2;
4941 }else{
4942 if(n == LUMA_DC_BLOCK_INDEX){
4943 total_coeff= pred_non_zero_count(h, 0);
4944 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4945 total_coeff= coeff_token>>2;
4946 }else{
4947 total_coeff= pred_non_zero_count(h, n);
4948 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4949 total_coeff= coeff_token>>2;
4950 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4951 }
4952 }
4953
4954 //FIXME set last_non_zero?
4955
4956 if(total_coeff==0)
4957 return 0;
4958
4959 trailing_ones= coeff_token&3;
4960 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4961 assert(total_coeff<=16);
4962
4963 for(i=0; i<trailing_ones; i++){
4964 level[i]= 1 - 2*get_bits1(gb);
4965 }
4966
4967 if(i<total_coeff) {
4968 int level_code, mask;
4969 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4970 int prefix= get_level_prefix(gb);
4971
4972 //first coefficient has suffix_length equal to 0 or 1
4973 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4974 if(suffix_length)
4975 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4976 else
4977 level_code= (prefix<<suffix_length); //part
4978 }else if(prefix==14){
4979 if(suffix_length)
4980 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4981 else
4982 level_code= prefix + get_bits(gb, 4); //part
4983 }else if(prefix==15){
4984 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4985 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4986 }else{
4987 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4988 return -1;
4989 }
4990
4991 if(trailing_ones < 3) level_code += 2;
4992
4993 suffix_length = 1;
4994 if(level_code > 5)
4995 suffix_length++;
4996 mask= -(level_code&1);
4997 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4998 i++;
4999
5000 //remaining coefficients have suffix_length > 0
5001 for(;i<total_coeff;i++) {
5002 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5003 prefix = get_level_prefix(gb);
5004 if(prefix<15){
5005 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5006 }else if(prefix==15){
5007 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5008 }else{
5009 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5010 return -1;
5011 }
5012 mask= -(level_code&1);
5013 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5014 if(level_code > suffix_limit[suffix_length])
5015 suffix_length++;
5016 }
5017 }
5018
5019 if(total_coeff == max_coeff)
5020 zeros_left=0;
5021 else{
5022 if(n == CHROMA_DC_BLOCK_INDEX)
5023 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5024 else
5025 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5026 }
5027
5028 coeff_num = zeros_left + total_coeff - 1;
5029 j = scantable[coeff_num];
5030 if(n > 24){
5031 block[j] = level[0];
5032 for(i=1;i<total_coeff;i++) {
5033 if(zeros_left <= 0)
5034 run_before = 0;
5035 else if(zeros_left < 7){
5036 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5037 }else{
5038 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5039 }
5040 zeros_left -= run_before;
5041 coeff_num -= 1 + run_before;
5042 j= scantable[ coeff_num ];
5043
5044 block[j]= level[i];
5045 }
5046 }else{
5047 block[j] = (level[0] * qmul[j] + 32)>>6;
5048 for(i=1;i<total_coeff;i++) {
5049 if(zeros_left <= 0)
5050 run_before = 0;
5051 else if(zeros_left < 7){
5052 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5053 }else{
5054 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5055 }
5056 zeros_left -= run_before;
5057 coeff_num -= 1 + run_before;
5058 j= scantable[ coeff_num ];
5059
5060 block[j]= (level[i] * qmul[j] + 32)>>6;
5061 }
5062 }
5063
5064 if(zeros_left<0){
5065 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5066 return -1;
5067 }
5068
5069 return 0;
5070}
5071
5072static void predict_field_decoding_flag(H264Context *h){
5073 MpegEncContext * const s = &h->s;
5074 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5075 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5076 ? s->current_picture.mb_type[mb_xy-1]
5077 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5078 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5079 : 0;
5080 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5081}
5082
5083/**
5084 * decodes a P_SKIP or B_SKIP macroblock
5085 */
5086static void decode_mb_skip(H264Context *h){
5087 MpegEncContext * const s = &h->s;
5088 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5089 int mb_type=0;
5090
5091 memset(h->non_zero_count[mb_xy], 0, 16);
5092 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5093
5094 if(MB_FIELD)
5095 mb_type|= MB_TYPE_INTERLACED;
5096
5097 if( h->slice_type == B_TYPE )
5098 {
5099 // just for fill_caches. pred_direct_motion will set the real mb_type
5100 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5101
5102 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5103 pred_direct_motion(h, &mb_type);
5104 if(h->pps.cabac){
5105 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5106 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5107 }
5108 }
5109 else
5110 {
5111 int mx, my;
5112 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5113
5114 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5115 pred_pskip_motion(h, &mx, &my);
5116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5117 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5118 if(h->pps.cabac)
5119 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5120 }
5121
5122 write_back_motion(h, mb_type);
5123 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
5124 s->current_picture.qscale_table[mb_xy]= s->qscale;
5125 h->slice_table[ mb_xy ]= h->slice_num;
5126 h->prev_mb_skipped= 1;
5127}
5128
5129/**
5130 * decodes a macroblock
5131 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5132 */
5133static int decode_mb_cavlc(H264Context *h){
5134 MpegEncContext * const s = &h->s;
5135 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5136 int mb_type, partition_count, cbp;
5137 int dct8x8_allowed= h->pps.transform_8x8_mode;
5138
5139 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5140
5141 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5142 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5143 down the code */
5144 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5145 if(s->mb_skip_run==-1)
5146 s->mb_skip_run= get_ue_golomb(&s->gb);
5147
5148 if (s->mb_skip_run--) {
5149 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5150 if(s->mb_skip_run==0)
5151 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5152 else
5153 predict_field_decoding_flag(h);
5154 }
5155 decode_mb_skip(h);
5156 return 0;
5157 }
5158 }
5159 if(FRAME_MBAFF){
5160 if( (s->mb_y&1) == 0 )
5161 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5162 }else
5163 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5164
5165 h->prev_mb_skipped= 0;
5166
5167 mb_type= get_ue_golomb(&s->gb);
5168 if(h->slice_type == B_TYPE){
5169 if(mb_type < 23){
5170 partition_count= b_mb_type_info[mb_type].partition_count;
5171 mb_type= b_mb_type_info[mb_type].type;
5172 }else{
5173 mb_type -= 23;
5174 goto decode_intra_mb;
5175 }
5176 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5177 if(mb_type < 5){
5178 partition_count= p_mb_type_info[mb_type].partition_count;
5179 mb_type= p_mb_type_info[mb_type].type;
5180 }else{
5181 mb_type -= 5;
5182 goto decode_intra_mb;
5183 }
5184 }else{
5185 assert(h->slice_type == I_TYPE);
5186decode_intra_mb:
5187 if(mb_type > 25){
5188 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5189 return -1;
5190 }
5191 partition_count=0;
5192 cbp= i_mb_type_info[mb_type].cbp;
5193 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5194 mb_type= i_mb_type_info[mb_type].type;
5195 }
5196
5197 if(MB_FIELD)
5198 mb_type |= MB_TYPE_INTERLACED;
5199
5200 h->slice_table[ mb_xy ]= h->slice_num;
5201
5202 if(IS_INTRA_PCM(mb_type)){
5203 unsigned int x, y;
5204
5205 // we assume these blocks are very rare so we dont optimize it
5206 align_get_bits(&s->gb);
5207
5208 // The pixels are stored in the same order as levels in h->mb array.
5209 for(y=0; y<16; y++){
5210 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5211 for(x=0; x<16; x++){
5212 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5213 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5214 }
5215 }
5216 for(y=0; y<8; y++){
5217 const int index= 256 + 4*(y&3) + 32*(y>>2);
5218 for(x=0; x<8; x++){
5219 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5220 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5221 }
5222 }
5223 for(y=0; y<8; y++){
5224 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5225 for(x=0; x<8; x++){
5226 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5227 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5228 }
5229 }
5230
5231 // In deblocking, the quantizer is 0
5232 s->current_picture.qscale_table[mb_xy]= 0;
5233 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5234 // All coeffs are present
5235 memset(h->non_zero_count[mb_xy], 16, 16);
5236
5237 s->current_picture.mb_type[mb_xy]= mb_type;
5238 return 0;
5239 }
5240
5241 if(MB_MBAFF){
5242 h->ref_count[0] <<= 1;
5243 h->ref_count[1] <<= 1;
5244 }
5245
5246 fill_caches(h, mb_type, 0);
5247
5248 //mb_pred
5249 if(IS_INTRA(mb_type)){
5250// init_top_left_availability(h);
5251 if(IS_INTRA4x4(mb_type)){
5252 int i;
5253 int di = 1;
5254 if(dct8x8_allowed && get_bits1(&s->gb)){
5255 mb_type |= MB_TYPE_8x8DCT;
5256 di = 4;
5257 }
5258
5259// fill_intra4x4_pred_table(h);
5260 for(i=0; i<16; i+=di){
5261 int mode= pred_intra_mode(h, i);
5262
5263 if(!get_bits1(&s->gb)){
5264 const int rem_mode= get_bits(&s->gb, 3);
5265 mode = rem_mode + (rem_mode >= mode);
5266 }
5267
5268 if(di==4)
5269 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5270 else
5271 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5272 }
5273 write_back_intra_pred_mode(h);
5274 if( check_intra4x4_pred_mode(h) < 0)
5275 return -1;
5276 }else{
5277 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5278 if(h->intra16x16_pred_mode < 0)
5279 return -1;
5280 }
5281 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5282
5283 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5284 if(h->chroma_pred_mode < 0)
5285 return -1;
5286 }else if(partition_count==4){
5287 int i, j, sub_partition_count[4], list, ref[2][4];
5288
5289 if(h->slice_type == B_TYPE){
5290 for(i=0; i<4; i++){
5291 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5292 if(h->sub_mb_type[i] >=13){
5293 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5294 return -1;
5295 }
5296 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5297 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5298 }
5299 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5300 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5301 pred_direct_motion(h, &mb_type);
5302 h->ref_cache[0][scan8[4]] =
5303 h->ref_cache[1][scan8[4]] =
5304 h->ref_cache[0][scan8[12]] =
5305 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5306 }
5307 }else{
5308 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5309 for(i=0; i<4; i++){
5310 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5311 if(h->sub_mb_type[i] >=4){
5312 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5313 return -1;
5314 }
5315 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5316 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5317 }
5318 }
5319
5320 for(list=0; list<2; list++){
5321 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5322 if(ref_count == 0) continue;
5323 for(i=0; i<4; i++){
5324 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5325 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5326 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5327 }else{
5328 //FIXME
5329 ref[list][i] = -1;
5330 }
5331 }
5332 }
5333
5334 if(dct8x8_allowed)
5335 dct8x8_allowed = get_dct8x8_allowed(h);
5336
5337 for(list=0; list<2; list++){
5338 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5339 if(ref_count == 0) continue;
5340
5341 for(i=0; i<4; i++){
5342 if(IS_DIRECT(h->sub_mb_type[i])) {
5343 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5344 continue;
5345 }
5346 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5347 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5348
5349 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5350 const int sub_mb_type= h->sub_mb_type[i];
5351 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5352 for(j=0; j<sub_partition_count[i]; j++){
5353 int mx, my;
5354 const int index= 4*i + block_width*j;
5355 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5356 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5357 mx += get_se_golomb(&s->gb);
5358 my += get_se_golomb(&s->gb);
5359 tprintf("final mv:%d %d\n", mx, my);
5360
5361 if(IS_SUB_8X8(sub_mb_type)){
5362 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5363 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5364 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5365 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5366 }else if(IS_SUB_8X4(sub_mb_type)){
5367 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5368 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5369 }else if(IS_SUB_4X8(sub_mb_type)){
5370 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5371 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5372 }else{
5373 assert(IS_SUB_4X4(sub_mb_type));
5374 mv_cache[ 0 ][0]= mx;
5375 mv_cache[ 0 ][1]= my;
5376 }
5377 }
5378 }else{
5379 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5380 p[0] = p[1]=
5381 p[8] = p[9]= 0;
5382 }
5383 }
5384 }
5385 }else if(IS_DIRECT(mb_type)){
5386 pred_direct_motion(h, &mb_type);
5387 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5388 }else{
5389 int list, mx, my, i;
5390 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5391 if(IS_16X16(mb_type)){
5392 for(list=0; list<2; list++){
5393 if(h->ref_count[list]>0){
5394 if(IS_DIR(mb_type, 0, list)){
5395 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5396 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5397 }else
5398 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5399 }
5400 }
5401 for(list=0; list<2; list++){
5402 if(IS_DIR(mb_type, 0, list)){
5403 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5404 mx += get_se_golomb(&s->gb);
5405 my += get_se_golomb(&s->gb);
5406 tprintf("final mv:%d %d\n", mx, my);
5407
5408 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5409 }else
5410 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5411 }
5412 }
5413 else if(IS_16X8(mb_type)){
5414 for(list=0; list<2; list++){
5415 if(h->ref_count[list]>0){
5416 for(i=0; i<2; i++){
5417 if(IS_DIR(mb_type, i, list)){
5418 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5419 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5420 }else
5421 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5422 }
5423 }
5424 }
5425 for(list=0; list<2; list++){
5426 for(i=0; i<2; i++){
5427 if(IS_DIR(mb_type, i, list)){
5428 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5429 mx += get_se_golomb(&s->gb);
5430 my += get_se_golomb(&s->gb);
5431 tprintf("final mv:%d %d\n", mx, my);
5432
5433 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5434 }else
5435 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5436 }
5437 }
5438 }else{
5439 assert(IS_8X16(mb_type));
5440 for(list=0; list<2; list++){
5441 if(h->ref_count[list]>0){
5442 for(i=0; i<2; i++){
5443 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5444 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5445 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5446 }else
5447 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5448 }
5449 }
5450 }
5451 for(list=0; list<2; list++){
5452 for(i=0; i<2; i++){
5453 if(IS_DIR(mb_type, i, list)){
5454 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5455 mx += get_se_golomb(&s->gb);
5456 my += get_se_golomb(&s->gb);
5457 tprintf("final mv:%d %d\n", mx, my);
5458
5459 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5460 }else
5461 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5462 }
5463 }
5464 }
5465 }
5466
5467 if(IS_INTER(mb_type))
5468 write_back_motion(h, mb_type);
5469
5470 if(!IS_INTRA16x16(mb_type)){
5471 cbp= get_ue_golomb(&s->gb);
5472 if(cbp > 47){
5473 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5474 return -1;
5475 }
5476
5477 if(IS_INTRA4x4(mb_type))
5478 cbp= golomb_to_intra4x4_cbp[cbp];
5479 else
5480 cbp= golomb_to_inter_cbp[cbp];
5481 }
5482
5483 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5484 if(get_bits1(&s->gb))
5485 mb_type |= MB_TYPE_8x8DCT;
5486 }
5487 s->current_picture.mb_type[mb_xy]= mb_type;
5488
5489 if(cbp || IS_INTRA16x16(mb_type)){
5490 int i8x8, i4x4, chroma_idx;
5491 int chroma_qp, dquant;
5492 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5493 const uint8_t *scan, *scan8x8, *dc_scan;
5494
5495// fill_non_zero_count_cache(h);
5496
5497 if(IS_INTERLACED(mb_type)){
5498 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5499 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5500 dc_scan= luma_dc_field_scan;
5501 }else{
5502 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5503 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5504 dc_scan= luma_dc_zigzag_scan;
5505 }
5506
5507 dquant= get_se_golomb(&s->gb);
5508
5509 if( dquant > 25 || dquant < -26 ){
5510 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5511 return -1;
5512 }
5513
5514 s->qscale += dquant;
5515 if(((unsigned)s->qscale) > 51){
5516 if(s->qscale<0) s->qscale+= 52;
5517 else s->qscale-= 52;
5518 }
5519
5520 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5521 if(IS_INTRA16x16(mb_type)){
5522 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5523 return -1; //FIXME continue if partitioned and other return -1 too
5524 }
5525
5526 assert((cbp&15) == 0 || (cbp&15) == 15);
5527
5528 if(cbp&15){
5529 for(i8x8=0; i8x8<4; i8x8++){
5530 for(i4x4=0; i4x4<4; i4x4++){
5531 const int index= i4x4 + 4*i8x8;
5532 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5533 return -1;
5534 }
5535 }
5536 }
5537 }else{
5538 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5539 }
5540 }else{
5541 for(i8x8=0; i8x8<4; i8x8++){
5542 if(cbp & (1<<i8x8)){
5543 if(IS_8x8DCT(mb_type)){
5544 DCTELEM *buf = &h->mb[64*i8x8];
5545 uint8_t *nnz;
5546 for(i4x4=0; i4x4<4; i4x4++){
5547 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5548 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5549 return -1;
5550 }
5551 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5552 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5553 }else{
5554 for(i4x4=0; i4x4<4; i4x4++){
5555 const int index= i4x4 + 4*i8x8;
5556
5557 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5558 return -1;
5559 }
5560 }
5561 }
5562 }else{
5563 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5564 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5565 }
5566 }
5567 }
5568
5569 if(cbp&0x30){
5570 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5571 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5572 return -1;
5573 }
5574 }
5575
5576 if(cbp&0x20){
5577 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5578 for(i4x4=0; i4x4<4; i4x4++){
5579 const int index= 16 + 4*chroma_idx + i4x4;
5580 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5581 return -1;
5582 }
5583 }
5584 }
5585 }else{
5586 uint8_t * const nnz= &h->non_zero_count_cache[0];
5587 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5588 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5589 }
5590 }else{
5591 uint8_t * const nnz= &h->non_zero_count_cache[0];
5592 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5593 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5594 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5595 }
5596 s->current_picture.qscale_table[mb_xy]= s->qscale;
5597 write_back_non_zero_count(h);
5598
5599 if(MB_MBAFF){
5600 h->ref_count[0] >>= 1;
5601 h->ref_count[1] >>= 1;
5602 }
5603
5604 return 0;
5605}
5606
5607static int decode_cabac_field_decoding_flag(H264Context *h) {
5608 MpegEncContext * const s = &h->s;
5609 const int mb_x = s->mb_x;
5610 const int mb_y = s->mb_y & ~1;
5611 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5612 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5613
5614 unsigned int ctx = 0;
5615
5616 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5617 ctx += 1;
5618 }
5619 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5620 ctx += 1;
5621 }
5622
5623 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5624}
5625
5626static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5627 uint8_t *state= &h->cabac_state[ctx_base];
5628 int mb_type;
5629
5630 if(intra_slice){
5631 MpegEncContext * const s = &h->s;
5632 const int mba_xy = h->left_mb_xy[0];
5633 const int mbb_xy = h->top_mb_xy;
5634 int ctx=0;
5635 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5636 ctx++;
5637 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5638 ctx++;
5639 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5640 return 0; /* I4x4 */
5641 state += 2;
5642 }else{
5643 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5644 return 0; /* I4x4 */
5645 }
5646
5647 if( get_cabac_terminate( &h->cabac ) )
5648 return 25; /* PCM */
5649
5650 mb_type = 1; /* I16x16 */
5651 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5652 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
5653 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
5654 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
5655 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
5656 return mb_type;
5657}
5658
5659static int decode_cabac_mb_type( H264Context *h ) {
5660 MpegEncContext * const s = &h->s;
5661
5662 if( h->slice_type == I_TYPE ) {
5663 return decode_cabac_intra_mb_type(h, 3, 1);
5664 } else if( h->slice_type == P_TYPE ) {
5665 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5666 /* P-type */
5667 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5668 /* P_L0_D16x16, P_8x8 */
5669 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
5670 } else {
5671 /* P_L0_D8x16, P_L0_D16x8 */
5672 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
5673 }
5674 } else {
5675 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5676 }
5677 } else if( h->slice_type == B_TYPE ) {
5678 const int mba_xy = h->left_mb_xy[0];
5679 const int mbb_xy = h->top_mb_xy;
5680 int ctx = 0;
5681 int bits;
5682
5683 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5684 ctx++;
5685 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5686 ctx++;
5687
5688 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5689 return 0; /* B_Direct_16x16 */
5690
5691 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5692 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5693 }
5694
5695 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5696 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5697 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5698 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5699 if( bits < 8 )
5700 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5701 else if( bits == 13 ) {
5702 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5703 } else if( bits == 14 )
5704 return 11; /* B_L1_L0_8x16 */
5705 else if( bits == 15 )
5706 return 22; /* B_8x8 */
5707
5708 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5709 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5710 } else {
5711 /* TODO SI/SP frames? */
5712 return -1;
5713 }
5714}
5715
5716static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5717 MpegEncContext * const s = &h->s;
5718 int mba_xy, mbb_xy;
5719 int ctx = 0;
5720
5721 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5722 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5723 mba_xy = mb_xy - 1;
5724 if( (mb_y&1)
5725 && h->slice_table[mba_xy] == h->slice_num
5726 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5727 mba_xy += s->mb_stride;
5728 if( MB_FIELD ){
5729 mbb_xy = mb_xy - s->mb_stride;
5730 if( !(mb_y&1)
5731 && h->slice_table[mbb_xy] == h->slice_num
5732 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5733 mbb_xy -= s->mb_stride;
5734 }else
5735 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5736 }else{
5737 int mb_xy = mb_x + mb_y*s->mb_stride;
5738 mba_xy = mb_xy - 1;
5739 mbb_xy = mb_xy - s->mb_stride;
5740 }
5741
5742 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5743 ctx++;
5744 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5745 ctx++;
5746
5747 if( h->slice_type == B_TYPE )
5748 ctx += 13;
5749 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5750}
5751
5752static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5753 int mode = 0;
5754
5755 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5756 return pred_mode;
5757
5758 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5759 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5760 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5761
5762 if( mode >= pred_mode )
5763 return mode + 1;
5764 else
5765 return mode;
5766}
5767
5768static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5769 const int mba_xy = h->left_mb_xy[0];
5770 const int mbb_xy = h->top_mb_xy;
5771
5772 int ctx = 0;
5773
5774 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5775 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5776 ctx++;
5777
5778 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5779 ctx++;
5780
5781 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5782 return 0;
5783
5784 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5785 return 1;
5786 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5787 return 2;
5788 else
5789 return 3;
5790}
5791
5792static const uint8_t block_idx_x[16] = {
5793 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5794};
5795static const uint8_t block_idx_y[16] = {
5796 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5797};
5798static const uint8_t block_idx_xy[4][4] = {
5799 { 0, 2, 8, 10},
5800 { 1, 3, 9, 11},
5801 { 4, 6, 12, 14},
5802 { 5, 7, 13, 15}
5803};
5804
5805static int decode_cabac_mb_cbp_luma( H264Context *h) {
5806 int cbp = 0;
5807 int cbp_b = -1;
5808 int i8x8;
5809
5810 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5811 cbp_b = h->top_cbp;
5812 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5813 }
5814
5815 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5816 int cbp_a = -1;
5817 int x, y;
5818 int ctx = 0;
5819
5820 x = block_idx_x[4*i8x8];
5821 y = block_idx_y[4*i8x8];
5822
5823 if( x > 0 )
5824 cbp_a = cbp;
5825 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5826 cbp_a = h->left_cbp;
5827 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5828 }
5829
5830 if( y > 0 )
5831 cbp_b = cbp;
5832
5833 /* No need to test for skip as we put 0 for skip block */
5834 /* No need to test for IPCM as we put 1 for IPCM block */
5835 if( cbp_a >= 0 ) {
5836 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5837 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5838 ctx++;
5839 }
5840
5841 if( cbp_b >= 0 ) {
5842 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5843 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5844 ctx += 2;
5845 }
5846
5847 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5848 cbp |= 1 << i8x8;
5849 }
5850 }
5851 return cbp;
5852}
5853static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5854 int ctx;
5855 int cbp_a, cbp_b;
5856
5857 cbp_a = (h->left_cbp>>4)&0x03;
5858 cbp_b = (h-> top_cbp>>4)&0x03;
5859
5860 ctx = 0;
5861 if( cbp_a > 0 ) ctx++;
5862 if( cbp_b > 0 ) ctx += 2;
5863 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5864 return 0;
5865
5866 ctx = 4;
5867 if( cbp_a == 2 ) ctx++;
5868 if( cbp_b == 2 ) ctx += 2;
5869 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5870}
5871static int decode_cabac_mb_dqp( H264Context *h) {
5872 MpegEncContext * const s = &h->s;
5873 int mbn_xy;
5874 int ctx = 0;
5875 int val = 0;
5876
5877 if( s->mb_x > 0 )
5878 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5879 else
5880 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5881
5882 if( h->last_qscale_diff != 0 )
5883 ctx++;
5884
5885 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5886 if( ctx < 2 )
5887 ctx = 2;
5888 else
5889 ctx = 3;
5890 val++;
5891 if(val > 102) //prevent infinite loop
5892 return INT_MIN;
5893 }
5894
5895 if( val&0x01 )
5896 return (val + 1)/2;
5897 else
5898 return -(val + 1)/2;
5899}
5900static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5901 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5902 return 0; /* 8x8 */
5903 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5904 return 1; /* 8x4 */
5905 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5906 return 2; /* 4x8 */
5907 return 3; /* 4x4 */
5908}
5909static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5910 int type;
5911 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5912 return 0; /* B_Direct_8x8 */
5913 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5914 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5915 type = 3;
5916 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5917 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5918 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5919 type += 4;
5920 }
5921 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5922 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5923 return type;
5924}
5925
5926static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5927 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5928}
5929
5930static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5931 int refa = h->ref_cache[list][scan8[n] - 1];
5932 int refb = h->ref_cache[list][scan8[n] - 8];
5933 int ref = 0;
5934 int ctx = 0;
5935
5936 if( h->slice_type == B_TYPE) {
5937 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5938 ctx++;
5939 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5940 ctx += 2;
5941 } else {
5942 if( refa > 0 )
5943 ctx++;
5944 if( refb > 0 )
5945 ctx += 2;
5946 }
5947
5948 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5949 ref++;
5950 if( ctx < 4 )
5951 ctx = 4;
5952 else
5953 ctx = 5;
5954 }
5955 return ref;
5956}
5957
5958static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5959 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5960 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5961 int ctxbase = (l == 0) ? 40 : 47;
5962 int ctx, mvd;
5963
5964 if( amvd < 3 )
5965 ctx = 0;
5966 else if( amvd > 32 )
5967 ctx = 2;
5968 else
5969 ctx = 1;
5970
5971 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5972 return 0;
5973
5974 mvd= 1;
5975 ctx= 3;
5976 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5977 mvd++;
5978 if( ctx < 6 )
5979 ctx++;
5980 }
5981
5982 if( mvd >= 9 ) {
5983 int k = 3;
5984 while( get_cabac_bypass( &h->cabac ) ) {
5985 mvd += 1 << k;
5986 k++;
5987 }
5988 while( k-- ) {
5989 if( get_cabac_bypass( &h->cabac ) )
5990 mvd += 1 << k;
5991 }
5992 }
5993 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5994 else return mvd;
5995}
5996
5997static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5998 int nza, nzb;
5999 int ctx = 0;
6000
6001 if( cat == 0 ) {
6002 nza = h->left_cbp&0x100;
6003 nzb = h-> top_cbp&0x100;
6004 } else if( cat == 1 || cat == 2 ) {
6005 nza = h->non_zero_count_cache[scan8[idx] - 1];
6006 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6007 } else if( cat == 3 ) {
6008 nza = (h->left_cbp>>(6+idx))&0x01;
6009 nzb = (h-> top_cbp>>(6+idx))&0x01;
6010 } else {
6011 assert(cat == 4);
6012 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6013 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6014 }
6015
6016 if( nza > 0 )
6017 ctx++;
6018
6019 if( nzb > 0 )
6020 ctx += 2;
6021
6022 return ctx + 4 * cat;
6023}
6024
6025static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6026 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6027 static const int significant_coeff_flag_offset[2][6] = {
6028 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6029 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6030 };
6031 static const int last_coeff_flag_offset[2][6] = {
6032 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6033 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6034 };
6035 static const int coeff_abs_level_m1_offset[6] = {
6036 227+0, 227+10, 227+20, 227+30, 227+39, 426
6037 };
6038 static const int significant_coeff_flag_offset_8x8[2][63] = {
6039 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6040 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6041 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6042 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6043 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6044 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6045 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6046 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6047 };
6048 static const int last_coeff_flag_offset_8x8[63] = {
6049 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6050 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6051 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6052 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6053 };
6054
6055 int index[64];
6056
6057 int i, last;
6058 int coeff_count = 0;
6059
6060 int abslevel1 = 1;
6061 int abslevelgt1 = 0;
6062
6063 uint8_t *significant_coeff_ctx_base;
6064 uint8_t *last_coeff_ctx_base;
6065 uint8_t *abs_level_m1_ctx_base;
6066
6067 /* cat: 0-> DC 16x16 n = 0
6068 * 1-> AC 16x16 n = luma4x4idx
6069 * 2-> Luma4x4 n = luma4x4idx
6070 * 3-> DC Chroma n = iCbCr
6071 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6072 * 5-> Luma8x8 n = 4 * luma8x8idx
6073 */
6074
6075 /* read coded block flag */
6076 if( cat != 5 ) {
6077 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6078 if( cat == 1 || cat == 2 )
6079 h->non_zero_count_cache[scan8[n]] = 0;
6080 else if( cat == 4 )
6081 h->non_zero_count_cache[scan8[16+n]] = 0;
6082
6083 return 0;
6084 }
6085 }
6086
6087 significant_coeff_ctx_base = h->cabac_state
6088 + significant_coeff_flag_offset[MB_FIELD][cat];
6089 last_coeff_ctx_base = h->cabac_state
6090 + last_coeff_flag_offset[MB_FIELD][cat];
6091 abs_level_m1_ctx_base = h->cabac_state
6092 + coeff_abs_level_m1_offset[cat];
6093
6094 if( cat == 5 ) {
6095#define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6096 for(last= 0; last < coefs; last++) { \
6097 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6098 if( get_cabac( &h->cabac, sig_ctx )) { \
6099 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6100 index[coeff_count++] = last; \
6101 if( get_cabac( &h->cabac, last_ctx ) ) { \
6102 last= max_coeff; \
6103 break; \
6104 } \
6105 } \
6106 }
6107 const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6108 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6109 } else {
6110 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6111 }
6112 if( last == max_coeff -1 ) {
6113 index[coeff_count++] = last;
6114 }
6115 assert(coeff_count > 0);
6116
6117 if( cat == 0 )
6118 h->cbp_table[mb_xy] |= 0x100;
6119 else if( cat == 1 || cat == 2 )
6120 h->non_zero_count_cache[scan8[n]] = coeff_count;
6121 else if( cat == 3 )
6122 h->cbp_table[mb_xy] |= 0x40 << n;
6123 else if( cat == 4 )
6124 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6125 else {
6126 assert( cat == 5 );
6127 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6128 }
6129
6130 for( i = coeff_count - 1; i >= 0; i-- ) {
6131 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6132 int j= scantable[index[i]];
6133
6134 if( get_cabac( &h->cabac, ctx ) == 0 ) {
6135 if( !qmul ) {
6136 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
6137 else block[j] = 1;
6138 }else{
6139 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
6140 else block[j] = ( qmul[j] + 32) >> 6;
6141 }
6142
6143 abslevel1++;
6144 } else {
6145 int coeff_abs = 2;
6146 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6147 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
6148 coeff_abs++;
6149 }
6150
6151 if( coeff_abs >= 15 ) {
6152 int j = 0;
6153 while( get_cabac_bypass( &h->cabac ) ) {
6154 coeff_abs += 1 << j;
6155 j++;
6156 }
6157
6158 while( j-- ) {
6159 if( get_cabac_bypass( &h->cabac ) )
6160 coeff_abs += 1 << j ;
6161 }
6162 }
6163
6164 if( !qmul ) {
6165 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
6166 else block[j] = coeff_abs;
6167 }else{
6168 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6169 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6170 }
6171
6172 abslevelgt1++;
6173 }
6174 }
6175 return 0;
6176}
6177
6178static void inline compute_mb_neighbors(H264Context *h)
6179{
6180 MpegEncContext * const s = &h->s;
6181 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6182 h->top_mb_xy = mb_xy - s->mb_stride;
6183 h->left_mb_xy[0] = mb_xy - 1;
6184 if(FRAME_MBAFF){
6185 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6186 const int top_pair_xy = pair_xy - s->mb_stride;
6187 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6188 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6189 const int curr_mb_frame_flag = !MB_FIELD;
6190 const int bottom = (s->mb_y & 1);
6191 if (bottom
6192 ? !curr_mb_frame_flag // bottom macroblock
6193 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6194 ) {
6195 h->top_mb_xy -= s->mb_stride;
6196 }
6197 if (left_mb_frame_flag != curr_mb_frame_flag) {
6198 h->left_mb_xy[0] = pair_xy - 1;
6199 }
6200 }
6201 return;
6202}
6203
6204/**
6205 * decodes a macroblock
6206 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6207 */
6208static int decode_mb_cabac(H264Context *h) {
6209 MpegEncContext * const s = &h->s;
6210 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6211 int mb_type, partition_count, cbp = 0;
6212 int dct8x8_allowed= h->pps.transform_8x8_mode;
6213
6214 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6215
6216 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6217 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6218 int skip;
6219 /* a skipped mb needs the aff flag from the following mb */
6220 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6221 predict_field_decoding_flag(h);
6222 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6223 skip = h->next_mb_skipped;
6224 else
6225 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6226 /* read skip flags */
6227 if( skip ) {
6228 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6229 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6230 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6231 if(h->next_mb_skipped)
6232 predict_field_decoding_flag(h);
6233 else
6234 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6235 }
6236
6237 decode_mb_skip(h);
6238
6239 h->cbp_table[mb_xy] = 0;
6240 h->chroma_pred_mode_table[mb_xy] = 0;
6241 h->last_qscale_diff = 0;
6242
6243 return 0;
6244
6245 }
6246 }
6247 if(FRAME_MBAFF){
6248 if( (s->mb_y&1) == 0 )
6249 h->mb_mbaff =
6250 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6251 }else
6252 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6253
6254 h->prev_mb_skipped = 0;
6255
6256 compute_mb_neighbors(h);
6257 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6258 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6259 return -1;
6260 }
6261
6262 if( h->slice_type == B_TYPE ) {
6263 if( mb_type < 23 ){
6264 partition_count= b_mb_type_info[mb_type].partition_count;
6265 mb_type= b_mb_type_info[mb_type].type;
6266 }else{
6267 mb_type -= 23;
6268 goto decode_intra_mb;
6269 }
6270 } else if( h->slice_type == P_TYPE ) {
6271 if( mb_type < 5) {
6272 partition_count= p_mb_type_info[mb_type].partition_count;
6273 mb_type= p_mb_type_info[mb_type].type;
6274 } else {
6275 mb_type -= 5;
6276 goto decode_intra_mb;
6277 }
6278 } else {
6279 assert(h->slice_type == I_TYPE);
6280decode_intra_mb:
6281 partition_count = 0;
6282 cbp= i_mb_type_info[mb_type].cbp;
6283 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6284 mb_type= i_mb_type_info[mb_type].type;
6285 }
6286 if(MB_FIELD)
6287 mb_type |= MB_TYPE_INTERLACED;
6288
6289 h->slice_table[ mb_xy ]= h->slice_num;
6290
6291 if(IS_INTRA_PCM(mb_type)) {
6292 const uint8_t *ptr;
6293 unsigned int x, y;
6294
6295 // We assume these blocks are very rare so we dont optimize it.
6296 // FIXME The two following lines get the bitstream position in the cabac
6297 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6298 ptr= h->cabac.bytestream;
6299 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6300
6301 // The pixels are stored in the same order as levels in h->mb array.
6302 for(y=0; y<16; y++){
6303 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6304 for(x=0; x<16; x++){
6305 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6306 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6307 }
6308 }
6309 for(y=0; y<8; y++){
6310 const int index= 256 + 4*(y&3) + 32*(y>>2);
6311 for(x=0; x<8; x++){
6312 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6313 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6314 }
6315 }
6316 for(y=0; y<8; y++){
6317 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6318 for(x=0; x<8; x++){
6319 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6320 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6321 }
6322 }
6323
6324 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6325
6326 // All blocks are present
6327 h->cbp_table[mb_xy] = 0x1ef;
6328 h->chroma_pred_mode_table[mb_xy] = 0;
6329 // In deblocking, the quantizer is 0
6330 s->current_picture.qscale_table[mb_xy]= 0;
6331 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6332 // All coeffs are present
6333 memset(h->non_zero_count[mb_xy], 16, 16);
6334 s->current_picture.mb_type[mb_xy]= mb_type;
6335 return 0;
6336 }
6337
6338 if(MB_MBAFF){
6339 h->ref_count[0] <<= 1;
6340 h->ref_count[1] <<= 1;
6341 }
6342
6343 fill_caches(h, mb_type, 0);
6344
6345 if( IS_INTRA( mb_type ) ) {
6346 int i;
6347 if( IS_INTRA4x4( mb_type ) ) {
6348 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6349 mb_type |= MB_TYPE_8x8DCT;
6350 for( i = 0; i < 16; i+=4 ) {
6351 int pred = pred_intra_mode( h, i );
6352 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6353 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6354 }
6355 } else {
6356 for( i = 0; i < 16; i++ ) {
6357 int pred = pred_intra_mode( h, i );
6358 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6359
6360 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6361 }
6362 }
6363 write_back_intra_pred_mode(h);
6364 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6365 } else {
6366 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6367 if( h->intra16x16_pred_mode < 0 ) return -1;
6368 }
6369 h->chroma_pred_mode_table[mb_xy] =
6370 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6371
6372 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6373 if( h->chroma_pred_mode < 0 ) return -1;
6374 } else if( partition_count == 4 ) {
6375 int i, j, sub_partition_count[4], list, ref[2][4];
6376
6377 if( h->slice_type == B_TYPE ) {
6378 for( i = 0; i < 4; i++ ) {
6379 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6380 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6381 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6382 }
6383 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
6384 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
6385 pred_direct_motion(h, &mb_type);
6386 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6387 for( i = 0; i < 4; i++ )
6388 if( IS_DIRECT(h->sub_mb_type[i]) )
6389 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6390 }
6391 }
6392 } else {
6393 for( i = 0; i < 4; i++ ) {
6394 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6395 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6396 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6397 }
6398 }
6399
6400 for( list = 0; list < 2; list++ ) {
6401 if( h->ref_count[list] > 0 ) {
6402 for( i = 0; i < 4; i++ ) {
6403 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6404 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6405 if( h->ref_count[list] > 1 )
6406 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6407 else
6408 ref[list][i] = 0;
6409 } else {
6410 ref[list][i] = -1;
6411 }
6412 h->ref_cache[list][ scan8[4*i]+1 ]=
6413 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6414 }
6415 }
6416 }
6417
6418 if(dct8x8_allowed)
6419 dct8x8_allowed = get_dct8x8_allowed(h);
6420
6421 for(list=0; list<2; list++){
6422 for(i=0; i<4; i++){
6423 if(IS_DIRECT(h->sub_mb_type[i])){
6424 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6425 continue;
6426 }
6427 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6428
6429 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6430 const int sub_mb_type= h->sub_mb_type[i];
6431 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6432 for(j=0; j<sub_partition_count[i]; j++){
6433 int mpx, mpy;
6434 int mx, my;
6435 const int index= 4*i + block_width*j;
6436 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6437 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6438 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6439
6440 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6441 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6442 tprintf("final mv:%d %d\n", mx, my);
6443
6444 if(IS_SUB_8X8(sub_mb_type)){
6445 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6446 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6447 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6448 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6449
6450 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6451 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6452 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6453 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6454 }else if(IS_SUB_8X4(sub_mb_type)){
6455 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6456 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6457
6458 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6459 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6460 }else if(IS_SUB_4X8(sub_mb_type)){
6461 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6462 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6463
6464 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6465 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6466 }else{
6467 assert(IS_SUB_4X4(sub_mb_type));
6468 mv_cache[ 0 ][0]= mx;
6469 mv_cache[ 0 ][1]= my;
6470
6471 mvd_cache[ 0 ][0]= mx - mpx;
6472 mvd_cache[ 0 ][1]= my - mpy;
6473 }
6474 }
6475 }else{
6476 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6477 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6478 p[0] = p[1] = p[8] = p[9] = 0;
6479 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6480 }
6481 }
6482 }
6483 } else if( IS_DIRECT(mb_type) ) {
6484 pred_direct_motion(h, &mb_type);
6485 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6486 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6487 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6488 } else {
6489 int list, mx, my, i, mpx, mpy;
6490 if(IS_16X16(mb_type)){
6491 for(list=0; list<2; list++){
6492 if(IS_DIR(mb_type, 0, list)){
6493 if(h->ref_count[list] > 0 ){
6494 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6495 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6496 }
6497 }else
6498 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6499 }
6500 for(list=0; list<2; list++){
6501 if(IS_DIR(mb_type, 0, list)){
6502 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6503
6504 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6505 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6506 tprintf("final mv:%d %d\n", mx, my);
6507
6508 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6509 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6510 }else
6511 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6512 }
6513 }
6514 else if(IS_16X8(mb_type)){
6515 for(list=0; list<2; list++){
6516 if(h->ref_count[list]>0){
6517 for(i=0; i<2; i++){
6518 if(IS_DIR(mb_type, i, list)){
6519 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6520 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6521 }else
6522 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6523 }
6524 }
6525 }
6526 for(list=0; list<2; list++){
6527 for(i=0; i<2; i++){
6528 if(IS_DIR(mb_type, i, list)){
6529 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6530 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6531 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6532 tprintf("final mv:%d %d\n", mx, my);
6533
6534 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6535 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6536 }else{
6537 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6538 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6539 }
6540 }
6541 }
6542 }else{
6543 assert(IS_8X16(mb_type));
6544 for(list=0; list<2; list++){
6545 if(h->ref_count[list]>0){
6546 for(i=0; i<2; i++){
6547 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6548 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6549 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6550 }else
6551 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6552 }
6553 }
6554 }
6555 for(list=0; list<2; list++){
6556 for(i=0; i<2; i++){
6557 if(IS_DIR(mb_type, i, list)){
6558 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6559 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6560 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6561
6562 tprintf("final mv:%d %d\n", mx, my);
6563 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6564 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6565 }else{
6566 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6567 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6568 }
6569 }
6570 }
6571 }
6572 }
6573
6574 if( IS_INTER( mb_type ) ) {
6575 h->chroma_pred_mode_table[mb_xy] = 0;
6576 write_back_motion( h, mb_type );
6577 }
6578
6579 if( !IS_INTRA16x16( mb_type ) ) {
6580 cbp = decode_cabac_mb_cbp_luma( h );
6581 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6582 }
6583
6584 h->cbp_table[mb_xy] = cbp;
6585
6586 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6587 if( decode_cabac_mb_transform_size( h ) )
6588 mb_type |= MB_TYPE_8x8DCT;
6589 }
6590 s->current_picture.mb_type[mb_xy]= mb_type;
6591
6592 if( cbp || IS_INTRA16x16( mb_type ) ) {
6593 const uint8_t *scan, *scan8x8, *dc_scan;
6594 int dqp;
6595
6596 if(IS_INTERLACED(mb_type)){
6597 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6598 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6599 dc_scan= luma_dc_field_scan;
6600 }else{
6601 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6602 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6603 dc_scan= luma_dc_zigzag_scan;
6604 }
6605
6606 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6607 if( dqp == INT_MIN ){
6608 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6609 return -1;
6610 }
6611 s->qscale += dqp;
6612 if(((unsigned)s->qscale) > 51){
6613 if(s->qscale<0) s->qscale+= 52;
6614 else s->qscale-= 52;
6615 }
6616 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6617
6618 if( IS_INTRA16x16( mb_type ) ) {
6619 int i;
6620 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6621 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6622 return -1;
6623 if( cbp&15 ) {
6624 for( i = 0; i < 16; i++ ) {
6625 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6626 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6627 return -1;
6628 }
6629 } else {
6630 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6631 }
6632 } else {
6633 int i8x8, i4x4;
6634 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6635 if( cbp & (1<<i8x8) ) {
6636 if( IS_8x8DCT(mb_type) ) {
6637 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6638 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6639 return -1;
6640 } else
6641 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6642 const int index = 4*i8x8 + i4x4;
6643 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6644 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6645 return -1;
6646 }
6647 } else {
6648 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6649 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6650 }
6651 }
6652 }
6653
6654 if( cbp&0x30 ){
6655 int c;
6656 for( c = 0; c < 2; c++ ) {
6657 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6658 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6659 return -1;
6660 }
6661 }
6662
6663 if( cbp&0x20 ) {
6664 int c, i;
6665 for( c = 0; c < 2; c++ ) {
6666 for( i = 0; i < 4; i++ ) {
6667 const int index = 16 + 4 * c + i;
6668 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6669 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6670 return -1;
6671 }
6672 }
6673 } else {
6674 uint8_t * const nnz= &h->non_zero_count_cache[0];
6675 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6676 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6677 }
6678 } else {
6679 uint8_t * const nnz= &h->non_zero_count_cache[0];
6680 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6681 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6682 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6683 h->last_qscale_diff = 0;
6684 }
6685
6686 s->current_picture.qscale_table[mb_xy]= s->qscale;
6687 write_back_non_zero_count(h);
6688
6689 if(MB_MBAFF){
6690 h->ref_count[0] >>= 1;
6691 h->ref_count[1] >>= 1;
6692 }
6693
6694 return 0;
6695}
6696
6697
6698static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6699 int i, d;
6700 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6701 const int alpha = alpha_table[index_a];
6702 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6703
6704 if( bS[0] < 4 ) {
6705 int8_t tc[4];
6706 for(i=0; i<4; i++)
6707 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6708 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6709 } else {
6710 /* 16px edge length, because bS=4 is triggered by being at
6711 * the edge of an intra MB, so all 4 bS are the same */
6712 for( d = 0; d < 16; d++ ) {
6713 const int p0 = pix[-1];
6714 const int p1 = pix[-2];
6715 const int p2 = pix[-3];
6716
6717 const int q0 = pix[0];
6718 const int q1 = pix[1];
6719 const int q2 = pix[2];
6720
6721 if( ABS( p0 - q0 ) < alpha &&
6722 ABS( p1 - p0 ) < beta &&
6723 ABS( q1 - q0 ) < beta ) {
6724
6725 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6726 if( ABS( p2 - p0 ) < beta)
6727 {
6728 const int p3 = pix[-4];
6729 /* p0', p1', p2' */
6730 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6731 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6732 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6733 } else {
6734 /* p0' */
6735 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6736 }
6737 if( ABS( q2 - q0 ) < beta)
6738 {
6739 const int q3 = pix[3];
6740 /* q0', q1', q2' */
6741 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6742 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6743 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6744 } else {
6745 /* q0' */
6746 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6747 }
6748 }else{
6749 /* p0', q0' */
6750 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6751 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6752 }
6753 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6754 }
6755 pix += stride;
6756 }
6757 }
6758}
6759static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6760 int i;
6761 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6762 const int alpha = alpha_table[index_a];
6763 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6764
6765 if( bS[0] < 4 ) {
6766 int8_t tc[4];
6767 for(i=0; i<4; i++)
6768 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6769 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6770 } else {
6771 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6772 }
6773}
6774
6775static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6776 int i;
6777 for( i = 0; i < 16; i++, pix += stride) {
6778 int index_a;
6779 int alpha;
6780 int beta;
6781
6782 int qp_index;
6783 int bS_index = (i >> 1);
6784 if (!MB_FIELD) {
6785 bS_index &= ~1;
6786 bS_index |= (i & 1);
6787 }
6788
6789 if( bS[bS_index] == 0 ) {
6790 continue;
6791 }
6792
6793 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6794 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6795 alpha = alpha_table[index_a];
6796 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6797
6798 if( bS[bS_index] < 4 ) {
6799 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6800 const int p0 = pix[-1];
6801 const int p1 = pix[-2];
6802 const int p2 = pix[-3];
6803 const int q0 = pix[0];
6804 const int q1 = pix[1];
6805 const int q2 = pix[2];
6806
6807 if( ABS( p0 - q0 ) < alpha &&
6808 ABS( p1 - p0 ) < beta &&
6809 ABS( q1 - q0 ) < beta ) {
6810 int tc = tc0;
6811 int i_delta;
6812
6813 if( ABS( p2 - p0 ) < beta ) {
6814 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6815 tc++;
6816 }
6817 if( ABS( q2 - q0 ) < beta ) {
6818 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6819 tc++;
6820 }
6821
6822 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6823 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6824 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6825 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6826 }
6827 }else{
6828 const int p0 = pix[-1];
6829 const int p1 = pix[-2];
6830 const int p2 = pix[-3];
6831
6832 const int q0 = pix[0];
6833 const int q1 = pix[1];
6834 const int q2 = pix[2];
6835
6836 if( ABS( p0 - q0 ) < alpha &&
6837 ABS( p1 - p0 ) < beta &&
6838 ABS( q1 - q0 ) < beta ) {
6839
6840 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6841 if( ABS( p2 - p0 ) < beta)
6842 {
6843 const int p3 = pix[-4];
6844 /* p0', p1', p2' */
6845 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6846 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6847 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6848 } else {
6849 /* p0' */
6850 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6851 }
6852 if( ABS( q2 - q0 ) < beta)
6853 {
6854 const int q3 = pix[3];
6855 /* q0', q1', q2' */
6856 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6857 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6858 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6859 } else {
6860 /* q0' */
6861 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6862 }
6863 }else{
6864 /* p0', q0' */
6865 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6866 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6867 }
6868 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6869 }
6870 }
6871 }
6872}
6873static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6874 int i;
6875 for( i = 0; i < 8; i++, pix += stride) {
6876 int index_a;
6877 int alpha;
6878 int beta;
6879
6880 int qp_index;
6881 int bS_index = i;
6882
6883 if( bS[bS_index] == 0 ) {
6884 continue;
6885 }
6886
6887 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6888 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6889 alpha = alpha_table[index_a];
6890 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6891
6892 if( bS[bS_index] < 4 ) {
6893 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6894 const int p0 = pix[-1];
6895 const int p1 = pix[-2];
6896 const int q0 = pix[0];
6897 const int q1 = pix[1];
6898
6899 if( ABS( p0 - q0 ) < alpha &&
6900 ABS( p1 - p0 ) < beta &&
6901 ABS( q1 - q0 ) < beta ) {
6902 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6903
6904 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6905 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6906 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6907 }
6908 }else{
6909 const int p0 = pix[-1];
6910 const int p1 = pix[-2];
6911 const int q0 = pix[0];
6912 const int q1 = pix[1];
6913
6914 if( ABS( p0 - q0 ) < alpha &&
6915 ABS( p1 - p0 ) < beta &&
6916 ABS( q1 - q0 ) < beta ) {
6917
6918 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6919 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6920 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6921 }
6922 }
6923 }
6924}
6925
6926static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6927 int i, d;
6928 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6929 const int alpha = alpha_table[index_a];
6930 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6931 const int pix_next = stride;
6932
6933 if( bS[0] < 4 ) {
6934 int8_t tc[4];
6935 for(i=0; i<4; i++)
6936 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6937 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6938 } else {
6939 /* 16px edge length, see filter_mb_edgev */
6940 for( d = 0; d < 16; d++ ) {
6941 const int p0 = pix[-1*pix_next];
6942 const int p1 = pix[-2*pix_next];
6943 const int p2 = pix[-3*pix_next];
6944 const int q0 = pix[0];
6945 const int q1 = pix[1*pix_next];
6946 const int q2 = pix[2*pix_next];
6947
6948 if( ABS( p0 - q0 ) < alpha &&
6949 ABS( p1 - p0 ) < beta &&
6950 ABS( q1 - q0 ) < beta ) {
6951
6952 const int p3 = pix[-4*pix_next];
6953 const int q3 = pix[ 3*pix_next];
6954
6955 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6956 if( ABS( p2 - p0 ) < beta) {
6957 /* p0', p1', p2' */
6958 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6959 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6960 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6961 } else {
6962 /* p0' */
6963 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6964 }
6965 if( ABS( q2 - q0 ) < beta) {
6966 /* q0', q1', q2' */
6967 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6968 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6969 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6970 } else {
6971 /* q0' */
6972 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6973 }
6974 }else{
6975 /* p0', q0' */
6976 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6977 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6978 }
6979 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6980 }
6981 pix++;
6982 }
6983 }
6984}
6985
6986static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6987 int i;
6988 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6989 const int alpha = alpha_table[index_a];
6990 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6991
6992 if( bS[0] < 4 ) {
6993 int8_t tc[4];
6994 for(i=0; i<4; i++)
6995 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6996 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6997 } else {
6998 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6999 }
7000}
7001
7002static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7003 MpegEncContext * const s = &h->s;
7004 const int mb_xy= mb_x + mb_y*s->mb_stride;
7005 const int mb_type = s->current_picture.mb_type[mb_xy];
7006 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7007 int first_vertical_edge_done = 0;
7008 int dir;
7009 /* FIXME: A given frame may occupy more than one position in
7010 * the reference list. So ref2frm should be populated with
7011 * frame numbers, not indices. */
7012 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7013 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7014
7015 //for sufficiently low qp, filtering wouldn't do anything
7016 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7017 if(!FRAME_MBAFF){
7018 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7019 int qp = s->current_picture.qscale_table[mb_xy];
7020 if(qp <= qp_thresh
7021 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7022 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7023 return;
7024 }
7025 }
7026
7027 if (FRAME_MBAFF
7028 // left mb is in picture
7029 && h->slice_table[mb_xy-1] != 255
7030 // and current and left pair do not have the same interlaced type
7031 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7032 // and left mb is in the same slice if deblocking_filter == 2
7033 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7034 /* First vertical edge is different in MBAFF frames
7035 * There are 8 different bS to compute and 2 different Qp
7036 */
7037 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7038 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7039 int bS[8];
7040 int qp[2];
7041 int chroma_qp[2];
7042 int mb_qp, mbn0_qp, mbn1_qp;
7043 int i;
7044 first_vertical_edge_done = 1;
7045
7046 if( IS_INTRA(mb_type) )
7047 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7048 else {
7049 for( i = 0; i < 8; i++ ) {
7050 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7051
7052 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7053 bS[i] = 4;
7054 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7055 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7056 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7057 bS[i] = 2;
7058 else
7059 bS[i] = 1;
7060 }
7061 }
7062
7063 mb_qp = s->current_picture.qscale_table[mb_xy];
7064 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7065 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7066 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7067 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7068 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7069 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7070 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7071 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7072
7073 /* Filter edge */
7074 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7075 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7076 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7077 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7078 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7079 }
7080 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7081 for( dir = 0; dir < 2; dir++ )
7082 {
7083 int edge;
7084 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7085 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7086 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7087
7088 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7089 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7090 // how often to recheck mv-based bS when iterating between edges
7091 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7092 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7093 // how often to recheck mv-based bS when iterating along each edge
7094 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7095
7096 if (first_vertical_edge_done) {
7097 start = 1;
7098 first_vertical_edge_done = 0;
7099 }
7100
7101 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7102 start = 1;
7103
7104 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7105 && !IS_INTERLACED(mb_type)
7106 && IS_INTERLACED(mbm_type)
7107 ) {
7108 // This is a special case in the norm where the filtering must
7109 // be done twice (one each of the field) even if we are in a
7110 // frame macroblock.
7111 //
7112 static const int nnz_idx[4] = {4,5,6,3};
7113 unsigned int tmp_linesize = 2 * linesize;
7114 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7115 int mbn_xy = mb_xy - 2 * s->mb_stride;
7116 int qp, chroma_qp;
7117 int i, j;
7118 int bS[4];
7119
7120 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7121 if( IS_INTRA(mb_type) ||
7122 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7123 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7124 } else {
7125 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7126 for( i = 0; i < 4; i++ ) {
7127 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7128 mbn_nnz[nnz_idx[i]] != 0 )
7129 bS[i] = 2;
7130 else
7131 bS[i] = 1;
7132 }
7133 }
7134 // Do not use s->qscale as luma quantizer because it has not the same
7135 // value in IPCM macroblocks.
7136 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7137 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7138 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7139 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7140 chroma_qp = ( h->chroma_qp +
7141 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7142 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7143 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7144 }
7145
7146 start = 1;
7147 }
7148
7149 /* Calculate bS */
7150 for( edge = start; edge < edges; edge++ ) {
7151 /* mbn_xy: neighbor macroblock */
7152 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7153 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7154 int bS[4];
7155 int qp;
7156
7157 if( (edge&1) && IS_8x8DCT(mb_type) )
7158 continue;
7159
7160 if( IS_INTRA(mb_type) ||
7161 IS_INTRA(mbn_type) ) {
7162 int value;
7163 if (edge == 0) {
7164 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7165 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7166 ) {
7167 value = 4;
7168 } else {
7169 value = 3;
7170 }
7171 } else {
7172 value = 3;
7173 }
7174 bS[0] = bS[1] = bS[2] = bS[3] = value;
7175 } else {
7176 int i, l;
7177 int mv_done;
7178
7179 if( edge & mask_edge ) {
7180 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7181 mv_done = 1;
7182 }
7183 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7184 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7185 mv_done = 1;
7186 }
7187 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7188 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7189 int bn_idx= b_idx - (dir ? 8:1);
7190 int v = 0;
7191 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7192 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7193 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7194 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7195 }
7196 bS[0] = bS[1] = bS[2] = bS[3] = v;
7197 mv_done = 1;
7198 }
7199 else
7200 mv_done = 0;
7201
7202 for( i = 0; i < 4; i++ ) {
7203 int x = dir == 0 ? edge : i;
7204 int y = dir == 0 ? i : edge;
7205 int b_idx= 8 + 4 + x + 8*y;
7206 int bn_idx= b_idx - (dir ? 8:1);
7207
7208 if( h->non_zero_count_cache[b_idx] != 0 ||
7209 h->non_zero_count_cache[bn_idx] != 0 ) {
7210 bS[i] = 2;
7211 }
7212 else if(!mv_done)
7213 {
7214 bS[i] = 0;
7215 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7216 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7217 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7218 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7219 bS[i] = 1;
7220 break;
7221 }
7222 }
7223 }
7224 }
7225
7226 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7227 continue;
7228 }
7229
7230 /* Filter edge */
7231 // Do not use s->qscale as luma quantizer because it has not the same
7232 // value in IPCM macroblocks.
7233 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7234 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7235 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7236 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7237 if( dir == 0 ) {
7238 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7239 if( (edge&1) == 0 ) {
7240 int chroma_qp = ( h->chroma_qp +
7241 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7242 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7243 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7244 }
7245 } else {
7246 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7247 if( (edge&1) == 0 ) {
7248 int chroma_qp = ( h->chroma_qp +
7249 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7250 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7251 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7252 }
7253 }
7254 }
7255 }
7256}
7257
7258static int decode_slice(H264Context *h){
7259 MpegEncContext * const s = &h->s;
7260 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7261
7262 s->mb_skip_run= -1;
7263
7264 if( h->pps.cabac ) {
7265 int i;
7266
7267 /* realign */
7268 align_get_bits( &s->gb );
7269
7270 /* init cabac */
7271 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
7272 ff_init_cabac_decoder( &h->cabac,
7273 s->gb.buffer + get_bits_count(&s->gb)/8,
7274 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7275 /* calculate pre-state */
7276 for( i= 0; i < 460; i++ ) {
7277 int pre;
7278 if( h->slice_type == I_TYPE )
7279 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7280 else
7281 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7282
7283 if( pre <= 63 )
7284 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7285 else
7286 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7287 }
7288
7289 for(;;){
7290 int ret = decode_mb_cabac(h);
7291 int eos;
7292
7293 if(ret>=0) hl_decode_mb(h);
7294
7295 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7296 s->mb_y++;
7297
7298 if(ret>=0) ret = decode_mb_cabac(h);
7299
7300 if(ret>=0) hl_decode_mb(h);
7301 s->mb_y--;
7302 }
7303 eos = get_cabac_terminate( &h->cabac );
7304
7305 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
7306 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7307 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7308 return -1;
7309 }
7310
7311 if( ++s->mb_x >= s->mb_width ) {
7312 s->mb_x = 0;
7313 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7314 ++s->mb_y;
7315 if(FRAME_MBAFF) {
7316 ++s->mb_y;
7317 }
7318 }
7319
7320 if( eos || s->mb_y >= s->mb_height ) {
7321 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7322 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7323 return 0;
7324 }
7325 }
7326
7327 } else {
7328 for(;;){
7329 int ret = decode_mb_cavlc(h);
7330
7331 if(ret>=0) hl_decode_mb(h);
7332
7333 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7334 s->mb_y++;
7335 ret = decode_mb_cavlc(h);
7336
7337 if(ret>=0) hl_decode_mb(h);
7338 s->mb_y--;
7339 }
7340
7341 if(ret<0){
7342 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7343 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7344
7345 return -1;
7346 }
7347
7348 if(++s->mb_x >= s->mb_width){
7349 s->mb_x=0;
7350 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7351 ++s->mb_y;
7352 if(FRAME_MBAFF) {
7353 ++s->mb_y;
7354 }
7355 if(s->mb_y >= s->mb_height){
7356 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7357
7358 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7359 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7360
7361 return 0;
7362 }else{
7363 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7364
7365 return -1;
7366 }
7367 }
7368 }
7369
7370 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7371 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7372 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7373 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7374
7375 return 0;
7376 }else{
7377 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7378
7379 return -1;
7380 }
7381 }
7382 }
7383 }
7384
7385#if 0
7386 for(;s->mb_y < s->mb_height; s->mb_y++){
7387 for(;s->mb_x < s->mb_width; s->mb_x++){
7388 int ret= decode_mb(h);
7389
7390 hl_decode_mb(h);
7391
7392 if(ret<0){
7393 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7394 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7395
7396 return -1;
7397 }
7398
7399 if(++s->mb_x >= s->mb_width){
7400 s->mb_x=0;
7401 if(++s->mb_y >= s->mb_height){
7402 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7403 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7404
7405 return 0;
7406 }else{
7407 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7408
7409 return -1;
7410 }
7411 }
7412 }
7413
7414 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7415 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7416 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7417
7418 return 0;
7419 }else{
7420 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7421
7422 return -1;
7423 }
7424 }
7425 }
7426 s->mb_x=0;
7427 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7428 }
7429#endif
7430 return -1; //not reached
7431}
7432
7433static int decode_unregistered_user_data(H264Context *h, int size){
7434 MpegEncContext * const s = &h->s;
7435 uint8_t user_data[16+256];
7436 int e, build, i;
7437
7438 if(size<16)
7439 return -1;
7440
7441 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7442 user_data[i]= get_bits(&s->gb, 8);
7443 }
7444
7445 user_data[i]= 0;
7446 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7447 if(e==1 && build>=0)
7448 h->x264_build= build;
7449
7450 if(s->avctx->debug & FF_DEBUG_BUGS)
7451 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7452
7453 for(; i<size; i++)
7454 skip_bits(&s->gb, 8);
7455
7456 return 0;
7457}
7458
7459static int decode_sei(H264Context *h){
7460 MpegEncContext * const s = &h->s;
7461
7462 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7463 int size, type;
7464
7465 type=0;
7466 do{
7467 type+= show_bits(&s->gb, 8);
7468 }while(get_bits(&s->gb, 8) == 255);
7469
7470 size=0;
7471 do{
7472 size+= show_bits(&s->gb, 8);
7473 }while(get_bits(&s->gb, 8) == 255);
7474
7475 switch(type){
7476 case 5:
7477 if(decode_unregistered_user_data(h, size) < 0)
7478 return -1;
7479 break;
7480 default:
7481 skip_bits(&s->gb, 8*size);
7482 }
7483
7484 //FIXME check bits here
7485 align_get_bits(&s->gb);
7486 }
7487
7488 return 0;
7489}
7490
7491static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7492 MpegEncContext * const s = &h->s;
7493 int cpb_count, i;
7494 cpb_count = get_ue_golomb(&s->gb) + 1;
7495 get_bits(&s->gb, 4); /* bit_rate_scale */
7496 get_bits(&s->gb, 4); /* cpb_size_scale */
7497 for(i=0; i<cpb_count; i++){
7498 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7499 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7500 get_bits1(&s->gb); /* cbr_flag */
7501 }
7502 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7503 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7504 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7505 get_bits(&s->gb, 5); /* time_offset_length */
7506}
7507
7508static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7509 MpegEncContext * const s = &h->s;
7510 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7511 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7512
7513 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7514
7515 if( aspect_ratio_info_present_flag ) {
7516 aspect_ratio_idc= get_bits(&s->gb, 8);
7517 if( aspect_ratio_idc == EXTENDED_SAR ) {
7518 sps->sar.num= get_bits(&s->gb, 16);
7519 sps->sar.den= get_bits(&s->gb, 16);
7520 }else if(aspect_ratio_idc < 14){
7521 sps->sar= pixel_aspect[aspect_ratio_idc];
7522 }else{
7523 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7524 return -1;
7525 }
7526 }else{
7527 sps->sar.num=
7528 sps->sar.den= 0;
7529 }
7530// s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7531
7532 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7533 get_bits1(&s->gb); /* overscan_appropriate_flag */
7534 }
7535
7536 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7537 get_bits(&s->gb, 3); /* video_format */
7538 get_bits1(&s->gb); /* video_full_range_flag */
7539 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7540 get_bits(&s->gb, 8); /* colour_primaries */
7541 get_bits(&s->gb, 8); /* transfer_characteristics */
7542 get_bits(&s->gb, 8); /* matrix_coefficients */
7543 }
7544 }
7545
7546 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7547 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7548 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7549 }
7550
7551 sps->timing_info_present_flag = get_bits1(&s->gb);
7552 if(sps->timing_info_present_flag){
7553 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7554 sps->time_scale = get_bits_long(&s->gb, 32);
7555 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7556 }
7557
7558 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7559 if(nal_hrd_parameters_present_flag)
7560 decode_hrd_parameters(h, sps);
7561 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7562 if(vcl_hrd_parameters_present_flag)
7563 decode_hrd_parameters(h, sps);
7564 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7565 get_bits1(&s->gb); /* low_delay_hrd_flag */
7566 get_bits1(&s->gb); /* pic_struct_present_flag */
7567
7568 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7569 if(sps->bitstream_restriction_flag){
7570 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7571 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7572 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7573 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7574 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7575 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7576 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7577 }
7578
7579 return 0;
7580}
7581
7582static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7583 const uint8_t *jvt_list, const uint8_t *fallback_list){
7584 MpegEncContext * const s = &h->s;
7585 int i, last = 8, next = 8;
7586 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7587 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7588 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7589 else
7590 for(i=0;i<size;i++){
7591 if(next)
7592 next = (last + get_se_golomb(&s->gb)) & 0xff;
7593 if(!i && !next){ /* matrix not written, we use the preset one */
7594 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7595 break;
7596 }
7597 last = factors[scan[i]] = next ? next : last;
7598 }
7599}
7600
7601static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7602 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7603 MpegEncContext * const s = &h->s;
7604 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7605 const uint8_t *fallback[4] = {
7606 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7607 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7608 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7609 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7610 };
7611 if(get_bits1(&s->gb)){
7612 sps->scaling_matrix_present |= is_sps;
7613 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7614 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7615 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7616 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7617 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7618 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7619 if(is_sps || pps->transform_8x8_mode){
7620 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7621 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7622 }
7623 } else if(fallback_sps) {
7624 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7625 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7626 }
7627}
7628
7629static inline int decode_seq_parameter_set(H264Context *h){
7630 MpegEncContext * const s = &h->s;
7631 int profile_idc, level_idc;
7632 int sps_id, i;
7633 SPS *sps;
7634
7635 profile_idc= get_bits(&s->gb, 8);
7636 get_bits1(&s->gb); //constraint_set0_flag
7637 get_bits1(&s->gb); //constraint_set1_flag
7638 get_bits1(&s->gb); //constraint_set2_flag
7639 get_bits1(&s->gb); //constraint_set3_flag
7640 get_bits(&s->gb, 4); // reserved
7641 level_idc= get_bits(&s->gb, 8);
7642 sps_id= get_ue_golomb(&s->gb);
7643
7644 sps= &h->sps_buffer[ sps_id ];
7645 sps->profile_idc= profile_idc;
7646 sps->level_idc= level_idc;
7647
7648 if(sps->profile_idc >= 100){ //high profile
7649 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7650 get_bits1(&s->gb); //residual_color_transform_flag
7651 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7652 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7653 sps->transform_bypass = get_bits1(&s->gb);
7654 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7655 }else
7656 sps->scaling_matrix_present = 0;
7657
7658 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7659 sps->poc_type= get_ue_golomb(&s->gb);
7660
7661 if(sps->poc_type == 0){ //FIXME #define
7662 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7663 } else if(sps->poc_type == 1){//FIXME #define
7664 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7665 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7666 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7667 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7668
7669 for(i=0; i<sps->poc_cycle_length; i++)
7670 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7671 }
7672 if(sps->poc_type > 2){
7673 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7674 return -1;
7675 }
7676
7677 sps->ref_frame_count= get_ue_golomb(&s->gb);
7678 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7679 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7680 }
7681 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7682 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7683 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7684 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7685 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7686 return -1;
7687
7688 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7689 if(!sps->frame_mbs_only_flag)
7690 sps->mb_aff= get_bits1(&s->gb);
7691 else
7692 sps->mb_aff= 0;
7693
7694 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7695
7696#ifndef ALLOW_INTERLACE
7697 if(sps->mb_aff)
7698 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it compilation time\n");
7699#endif
7700 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7701 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7702
7703 sps->crop= get_bits1(&s->gb);
7704 if(sps->crop){
7705 sps->crop_left = get_ue_golomb(&s->gb);
7706 sps->crop_right = get_ue_golomb(&s->gb);
7707 sps->crop_top = get_ue_golomb(&s->gb);
7708 sps->crop_bottom= get_ue_golomb(&s->gb);
7709 if(sps->crop_left || sps->crop_top){
7710 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7711 }
7712 }else{
7713 sps->crop_left =
7714 sps->crop_right =
7715 sps->crop_top =
7716 sps->crop_bottom= 0;
7717 }
7718
7719 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7720 if( sps->vui_parameters_present_flag )
7721 decode_vui_parameters(h, sps);
7722
7723 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7724 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7725 sps_id, sps->profile_idc, sps->level_idc,
7726 sps->poc_type,
7727 sps->ref_frame_count,
7728 sps->mb_width, sps->mb_height,
7729 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7730 sps->direct_8x8_inference_flag ? "8B8" : "",
7731 sps->crop_left, sps->crop_right,
7732 sps->crop_top, sps->crop_bottom,
7733 sps->vui_parameters_present_flag ? "VUI" : ""
7734 );
7735 }
7736 return 0;
7737}
7738
7739static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7740 MpegEncContext * const s = &h->s;
7741 int pps_id= get_ue_golomb(&s->gb);
7742 PPS *pps= &h->pps_buffer[pps_id];
7743
7744 pps->sps_id= get_ue_golomb(&s->gb);
7745 pps->cabac= get_bits1(&s->gb);
7746 pps->pic_order_present= get_bits1(&s->gb);
7747 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7748 if(pps->slice_group_count > 1 ){
7749 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7750 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7751 switch(pps->mb_slice_group_map_type){
7752 case 0:
7753#if 0
7754| for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7755| run_length[ i ] |1 |ue(v) |
7756#endif
7757 break;
7758 case 2:
7759#if 0
7760| for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7761|{ | | |
7762| top_left_mb[ i ] |1 |ue(v) |
7763| bottom_right_mb[ i ] |1 |ue(v) |
7764| } | | |
7765#endif
7766 break;
7767 case 3:
7768 case 4:
7769 case 5:
7770#if 0
7771| slice_group_change_direction_flag |1 |u(1) |
7772| slice_group_change_rate_minus1 |1 |ue(v) |
7773#endif
7774 break;
7775 case 6:
7776#if 0
7777| slice_group_id_cnt_minus1 |1 |ue(v) |
7778| for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7779|) | | |
7780| slice_group_id[ i ] |1 |u(v) |
7781#endif
7782 break;
7783 }
7784 }
7785 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7786 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7787 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7788 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7789 return -1;
7790 }
7791
7792 pps->weighted_pred= get_bits1(&s->gb);
7793 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7794 pps->init_qp= get_se_golomb(&s->gb) + 26;
7795 pps->init_qs= get_se_golomb(&s->gb) + 26;
7796 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7797 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7798 pps->constrained_intra_pred= get_bits1(&s->gb);
7799 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7800
7801 pps->transform_8x8_mode= 0;
7802 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7803 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7804 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7805
7806 if(get_bits_count(&s->gb) < bit_length){
7807 pps->transform_8x8_mode= get_bits1(&s->gb);
7808 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7809 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7810 }
7811
7812 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7813 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7814 pps_id, pps->sps_id,
7815 pps->cabac ? "CABAC" : "CAVLC",
7816 pps->slice_group_count,
7817 pps->ref_count[0], pps->ref_count[1],
7818 pps->weighted_pred ? "weighted" : "",
7819 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7820 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7821 pps->constrained_intra_pred ? "CONSTR" : "",
7822 pps->redundant_pic_cnt_present ? "REDU" : "",
7823 pps->transform_8x8_mode ? "8x8DCT" : ""
7824 );
7825 }
7826
7827 return 0;
7828}
7829
7830/**
7831 * finds the end of the current frame in the bitstream.
7832 * @return the position of the first byte of the next frame, or -1
7833 */
7834static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7835 int i;
7836 uint32_t state;
7837 ParseContext *pc = &(h->s.parse_context);
7838//printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7839// mb_addr= pc->mb_addr - 1;
7840 state= pc->state;
7841 for(i=0; i<=buf_size; i++){
7842 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7843 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7844 if(pc->frame_start_found){
7845 // If there isn't one more byte in the buffer
7846 // the test on first_mb_in_slice cannot be done yet
7847 // do it at next call.
7848 if (i >= buf_size) break;
7849 if (buf[i] & 0x80) {
7850 // first_mb_in_slice is 0, probably the first nal of a new
7851 // slice
7852 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7853 pc->state=-1;
7854 pc->frame_start_found= 0;
7855 return i-4;
7856 }
7857 }
7858 pc->frame_start_found = 1;
7859 }
7860 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7861 if(pc->frame_start_found){
7862 pc->state=-1;
7863 pc->frame_start_found= 0;
7864 return i-4;
7865 }
7866 }
7867 if (i<buf_size)
7868 state= (state<<8) | buf[i];
7869 }
7870
7871 pc->state= state;
7872 return END_NOT_FOUND;
7873}
7874
7875#ifdef CONFIG_H264_PARSER
7876static int h264_parse(AVCodecParserContext *s,
7877 AVCodecContext *avctx,
7878 uint8_t **poutbuf, int *poutbuf_size,
7879 const uint8_t *buf, int buf_size)
7880{
7881 H264Context *h = s->priv_data;
7882 ParseContext *pc = &h->s.parse_context;
7883 int next;
7884
7885 next= find_frame_end(h, buf, buf_size);
7886
7887 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7888 *poutbuf = NULL;
7889 *poutbuf_size = 0;
7890 return buf_size;
7891 }
7892
7893 *poutbuf = (uint8_t *)buf;
7894 *poutbuf_size = buf_size;
7895 return next;
7896}
7897
7898static int h264_split(AVCodecContext *avctx,
7899 const uint8_t *buf, int buf_size)
7900{
7901 int i;
7902 uint32_t state = -1;
7903 int has_sps= 0;
7904
7905 for(i=0; i<=buf_size; i++){
7906 if((state&0xFFFFFF1F) == 0x107)
7907 has_sps=1;
7908/* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7909 }*/
7910 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7911 if(has_sps){
7912 while(i>4 && buf[i-5]==0) i--;
7913 return i-4;
7914 }
7915 }
7916 if (i<buf_size)
7917 state= (state<<8) | buf[i];
7918 }
7919 return 0;
7920}
7921#endif /* CONFIG_H264_PARSER */
7922
7923static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7924 MpegEncContext * const s = &h->s;
7925 AVCodecContext * const avctx= s->avctx;
7926 int buf_index=0;
7927#if 0
7928 int i;
7929 for(i=0; i<50; i++){
7930 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7931 }
7932#endif
7933 h->slice_num = 0;
7934 s->current_picture_ptr= NULL;
7935 for(;;){
7936 int consumed;
7937 int dst_length;
7938 int bit_length;
7939 uint8_t *ptr;
7940 int i, nalsize = 0;
7941
7942 if(h->is_avc) {
7943 if(buf_index >= buf_size) break;
7944 nalsize = 0;
7945 for(i = 0; i < h->nal_length_size; i++)
7946 nalsize = (nalsize << 8) | buf[buf_index++];
7947 if(nalsize <= 1){
7948 if(nalsize == 1){
7949 buf_index++;
7950 continue;
7951 }else{
7952 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7953 break;
7954 }
7955 }
7956 } else {
7957 // start code prefix search
7958 for(; buf_index + 3 < buf_size; buf_index++){
7959 // this should allways succeed in the first iteration
7960 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7961 break;
7962 }
7963
7964 if(buf_index+3 >= buf_size) break;
7965
7966 buf_index+=3;
7967 }
7968
7969 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7970 while(ptr[dst_length - 1] == 0 && dst_length > 1)
7971 dst_length--;
7972 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7973
7974 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7975 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7976 }
7977
7978 if (h->is_avc && (nalsize != consumed))
7979 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7980
7981 buf_index += consumed;
7982
7983 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7984 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7985 continue;
7986
7987 switch(h->nal_unit_type){
7988 case NAL_IDR_SLICE:
7989 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7990 case NAL_SLICE:
7991 init_get_bits(&s->gb, ptr, bit_length);
7992 h->intra_gb_ptr=
7993 h->inter_gb_ptr= &s->gb;
7994 s->data_partitioning = 0;
7995
7996 if(decode_slice_header(h) < 0){
7997 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7998 break;
7999 }
8000 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8001 if(h->redundant_pic_count==0 && s->hurry_up < 5
8002 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8003 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8004 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8005 && avctx->skip_frame < AVDISCARD_ALL)
8006 decode_slice(h);
8007 break;
8008 case NAL_DPA:
8009 init_get_bits(&s->gb, ptr, bit_length);
8010 h->intra_gb_ptr=
8011 h->inter_gb_ptr= NULL;
8012 s->data_partitioning = 1;
8013
8014 if(decode_slice_header(h) < 0){
8015 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8016 }
8017 break;
8018 case NAL_DPB:
8019 init_get_bits(&h->intra_gb, ptr, bit_length);
8020 h->intra_gb_ptr= &h->intra_gb;
8021 break;
8022 case NAL_DPC:
8023 init_get_bits(&h->inter_gb, ptr, bit_length);
8024 h->inter_gb_ptr= &h->inter_gb;
8025
8026 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8027 && s->hurry_up < 5
8028 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8029 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8030 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8031 && avctx->skip_frame < AVDISCARD_ALL)
8032 decode_slice(h);
8033 break;
8034 case NAL_SEI:
8035 init_get_bits(&s->gb, ptr, bit_length);
8036 decode_sei(h);
8037 break;
8038 case NAL_SPS:
8039 init_get_bits(&s->gb, ptr, bit_length);
8040 decode_seq_parameter_set(h);
8041
8042 if(s->flags& CODEC_FLAG_LOW_DELAY)
8043 s->low_delay=1;
8044
8045 if(avctx->has_b_frames < 2)
8046 avctx->has_b_frames= !s->low_delay;
8047 break;
8048 case NAL_PPS:
8049 init_get_bits(&s->gb, ptr, bit_length);
8050
8051 decode_picture_parameter_set(h, bit_length);
8052
8053 break;
8054 case NAL_AUD:
8055 case NAL_END_SEQUENCE:
8056 case NAL_END_STREAM:
8057 case NAL_FILLER_DATA:
8058 case NAL_SPS_EXT:
8059 case NAL_AUXILIARY_SLICE:
8060 break;
8061 default:
8062 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8063 }
8064 }
8065
8066 if(!s->current_picture_ptr) return buf_index; //no frame
8067
8068 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8069 s->current_picture_ptr->pict_type= s->pict_type;
8070
8071 h->prev_frame_num_offset= h->frame_num_offset;
8072 h->prev_frame_num= h->frame_num;
8073 if(s->current_picture_ptr->reference){
8074 h->prev_poc_msb= h->poc_msb;
8075 h->prev_poc_lsb= h->poc_lsb;
8076 }
8077 if(s->current_picture_ptr->reference)
8078 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8079
8080 ff_er_frame_end(s);
8081
8082 MPV_frame_end(s);
8083
8084 return buf_index;
8085}
8086
8087/**
8088 * returns the number of bytes consumed for building the current frame
8089 */
8090static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8091 if(s->flags&CODEC_FLAG_TRUNCATED){
8092 pos -= s->parse_context.last_index;
8093 if(pos<0) pos=0; // FIXME remove (unneeded?)
8094
8095 return pos;
8096 }else{
8097 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8098 if(pos+10>buf_size) pos=buf_size; // oops ;)
8099
8100 return pos;
8101 }
8102}
8103
8104static int decode_frame(AVCodecContext *avctx,
8105 void *data, int *data_size,
8106 uint8_t *buf, int buf_size)
8107{
8108 H264Context *h = avctx->priv_data;
8109 MpegEncContext *s = &h->s;
8110 AVFrame *pict = data;
8111 int buf_index;
8112
8113 s->flags= avctx->flags;
8114 s->flags2= avctx->flags2;
8115
8116 /* no supplementary picture */
8117 if (buf_size == 0) {
8118 return 0;
8119 }
8120
8121 if(s->flags&CODEC_FLAG_TRUNCATED){
8122 int next= find_frame_end(h, buf, buf_size);
8123
8124 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8125 return buf_size;
8126//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8127 }
8128
8129 if(h->is_avc && !h->got_avcC) {
8130 int i, cnt, nalsize;
8131 unsigned char *p = avctx->extradata;
8132 if(avctx->extradata_size < 7) {
8133 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8134 return -1;
8135 }
8136 if(*p != 1) {
8137 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8138 return -1;
8139 }
8140 /* sps and pps in the avcC always have length coded with 2 bytes,
8141 so put a fake nal_length_size = 2 while parsing them */
8142 h->nal_length_size = 2;
8143 // Decode sps from avcC
8144 cnt = *(p+5) & 0x1f; // Number of sps
8145 p += 6;
8146 for (i = 0; i < cnt; i++) {
8147 nalsize = BE_16(p) + 2;
8148 if(decode_nal_units(h, p, nalsize) < 0) {
8149 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8150 return -1;
8151 }
8152 p += nalsize;
8153 }
8154 // Decode pps from avcC
8155 cnt = *(p++); // Number of pps
8156 for (i = 0; i < cnt; i++) {
8157 nalsize = BE_16(p) + 2;
8158 if(decode_nal_units(h, p, nalsize) != nalsize) {
8159 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8160 return -1;
8161 }
8162 p += nalsize;
8163 }
8164 // Now store right nal length size, that will be use to parse all other nals
8165 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8166 // Do not reparse avcC
8167 h->got_avcC = 1;
8168 }
8169
8170 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8171 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8172 return -1;
8173 }
8174
8175 buf_index=decode_nal_units(h, buf, buf_size);
8176 if(buf_index < 0)
8177 return -1;
8178
8179 //FIXME do something with unavailable reference frames
8180
8181// if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8182 if(!s->current_picture_ptr){
8183 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8184 return -1;
8185 }
8186
8187 {
8188 Picture *out = s->current_picture_ptr;
8189#if 0 //decode order
8190 *data_size = sizeof(AVFrame);
8191#else
8192 /* Sort B-frames into display order */
8193 Picture *cur = s->current_picture_ptr;
8194 Picture *prev = h->delayed_output_pic;
8195 int i, pics, cross_idr, out_of_order, out_idx;
8196
8197 if(h->sps.bitstream_restriction_flag
8198 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8199 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8200 s->low_delay = 0;
8201 }
8202
8203 pics = 0;
8204 while(h->delayed_pic[pics]) pics++;
8205 h->delayed_pic[pics++] = cur;
8206 if(cur->reference == 0)
8207 cur->reference = 1;
8208
8209 cross_idr = 0;
8210 for(i=0; h->delayed_pic[i]; i++)
8211 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8212 cross_idr = 1;
8213
8214 out = h->delayed_pic[0];
8215 out_idx = 0;
8216 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8217 if(h->delayed_pic[i]->poc < out->poc){
8218 out = h->delayed_pic[i];
8219 out_idx = i;
8220 }
8221
8222 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8223 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8224 { }
8225 else if(prev && pics <= s->avctx->has_b_frames)
8226 out = prev;
8227 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8228 || (s->low_delay &&
8229 ((!cross_idr && prev && out->poc > prev->poc + 2)
8230 || cur->pict_type == B_TYPE)))
8231 {
8232 s->low_delay = 0;
8233 s->avctx->has_b_frames++;
8234 out = prev;
8235 }
8236 else if(out_of_order)
8237 out = prev;
8238
8239 if(out_of_order || pics > s->avctx->has_b_frames){
8240 for(i=out_idx; h->delayed_pic[i]; i++)
8241 h->delayed_pic[i] = h->delayed_pic[i+1];
8242 }
8243
8244 if(prev == out)
8245 *data_size = 0;
8246 else
8247 *data_size = sizeof(AVFrame);
8248 if(prev && prev != out && prev->reference == 1)
8249 prev->reference = 0;
8250 h->delayed_output_pic = out;
8251#endif
8252
8253 if(out)
8254 *pict= *(AVFrame*)out;
8255 else
8256 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8257 }
8258
8259 assert(pict->data[0] || !*data_size);
8260 ff_print_debug_info(s, pict);
8261//printf("out %d\n", (int)pict->data[0]);
8262#if 0 //?
8263
8264 /* Return the Picture timestamp as the frame number */
8265 /* we substract 1 because it is added on utils.c */
8266 avctx->frame_number = s->picture_number - 1;
8267#endif
8268 return get_consumed_bytes(s, buf_index, buf_size);
8269}
8270#if 0
8271static inline void fill_mb_avail(H264Context *h){
8272 MpegEncContext * const s = &h->s;
8273 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8274
8275 if(s->mb_y){
8276 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8277 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8278 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8279 }else{
8280 h->mb_avail[0]=
8281 h->mb_avail[1]=
8282 h->mb_avail[2]= 0;
8283 }
8284 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8285 h->mb_avail[4]= 1; //FIXME move out
8286 h->mb_avail[5]= 0; //FIXME move out
8287}
8288#endif
8289
8290#if 0 //selftest
8291#define COUNT 8000
8292#define SIZE (COUNT*40)
8293int main(){
8294 int i;
8295 uint8_t temp[SIZE];
8296 PutBitContext pb;
8297 GetBitContext gb;
8298// int int_temp[10000];
8299 DSPContext dsp;
8300 AVCodecContext avctx;
8301
8302 dsputil_init(&dsp, &avctx);
8303
8304 init_put_bits(&pb, temp, SIZE);
8305 printf("testing unsigned exp golomb\n");
8306 for(i=0; i<COUNT; i++){
8307 START_TIMER
8308 set_ue_golomb(&pb, i);
8309 STOP_TIMER("set_ue_golomb");
8310 }
8311 flush_put_bits(&pb);
8312
8313 init_get_bits(&gb, temp, 8*SIZE);
8314 for(i=0; i<COUNT; i++){
8315 int j, s;
8316
8317 s= show_bits(&gb, 24);
8318
8319 START_TIMER
8320 j= get_ue_golomb(&gb);
8321 if(j != i){
8322 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8323// return -1;
8324 }
8325 STOP_TIMER("get_ue_golomb");
8326 }
8327
8328
8329 init_put_bits(&pb, temp, SIZE);
8330 printf("testing signed exp golomb\n");
8331 for(i=0; i<COUNT; i++){
8332 START_TIMER
8333 set_se_golomb(&pb, i - COUNT/2);
8334 STOP_TIMER("set_se_golomb");
8335 }
8336 flush_put_bits(&pb);
8337
8338 init_get_bits(&gb, temp, 8*SIZE);
8339 for(i=0; i<COUNT; i++){
8340 int j, s;
8341
8342 s= show_bits(&gb, 24);
8343
8344 START_TIMER
8345 j= get_se_golomb(&gb);
8346 if(j != i - COUNT/2){
8347 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8348// return -1;
8349 }
8350 STOP_TIMER("get_se_golomb");
8351 }
8352
8353 printf("testing 4x4 (I)DCT\n");
8354
8355 DCTELEM block[16];
8356 uint8_t src[16], ref[16];
8357 uint64_t error= 0, max_error=0;
8358
8359 for(i=0; i<COUNT; i++){
8360 int j;
8361// printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8362 for(j=0; j<16; j++){
8363 ref[j]= random()%255;
8364 src[j]= random()%255;
8365 }
8366
8367 h264_diff_dct_c(block, src, ref, 4);
8368
8369 //normalize
8370 for(j=0; j<16; j++){
8371// printf("%d ", block[j]);
8372 block[j]= block[j]*4;
8373 if(j&1) block[j]= (block[j]*4 + 2)/5;
8374 if(j&4) block[j]= (block[j]*4 + 2)/5;
8375 }
8376// printf("\n");
8377
8378 s->dsp.h264_idct_add(ref, block, 4);
8379/* for(j=0; j<16; j++){
8380 printf("%d ", ref[j]);
8381 }
8382 printf("\n");*/
8383
8384 for(j=0; j<16; j++){
8385 int diff= ABS(src[j] - ref[j]);
8386
8387 error+= diff*diff;
8388 max_error= FFMAX(max_error, diff);
8389 }
8390 }
8391 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8392#if 0
8393 printf("testing quantizer\n");
8394 for(qp=0; qp<52; qp++){
8395 for(i=0; i<16; i++)
8396 src1_block[i]= src2_block[i]= random()%255;
8397
8398 }
8399#endif
8400 printf("Testing NAL layer\n");
8401
8402 uint8_t bitstream[COUNT];
8403 uint8_t nal[COUNT*2];
8404 H264Context h;
8405 memset(&h, 0, sizeof(H264Context));
8406
8407 for(i=0; i<COUNT; i++){
8408 int zeros= i;
8409 int nal_length;
8410 int consumed;
8411 int out_length;
8412 uint8_t *out;
8413 int j;
8414
8415 for(j=0; j<COUNT; j++){
8416 bitstream[j]= (random() % 255) + 1;
8417 }
8418
8419 for(j=0; j<zeros; j++){
8420 int pos= random() % COUNT;
8421 while(bitstream[pos] == 0){
8422 pos++;
8423 pos %= COUNT;
8424 }
8425 bitstream[pos]=0;
8426 }
8427
8428 START_TIMER
8429
8430 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8431 if(nal_length<0){
8432 printf("encoding failed\n");
8433 return -1;
8434 }
8435
8436 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8437
8438 STOP_TIMER("NAL")
8439
8440 if(out_length != COUNT){
8441 printf("incorrect length %d %d\n", out_length, COUNT);
8442 return -1;
8443 }
8444
8445 if(consumed != nal_length){
8446 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8447 return -1;
8448 }
8449
8450 if(memcmp(bitstream, out, COUNT)){
8451 printf("missmatch\n");
8452 return -1;
8453 }
8454 }
8455
8456 printf("Testing RBSP\n");
8457
8458
8459 return 0;
8460}
8461#endif
8462
8463
8464static int decode_end(AVCodecContext *avctx)
8465{
8466 H264Context *h = avctx->priv_data;
8467 MpegEncContext *s = &h->s;
8468
8469 av_freep(&h->rbsp_buffer);
8470 free_tables(h); //FIXME cleanup init stuff perhaps
8471 MPV_common_end(s);
8472
8473// memset(h, 0, sizeof(H264Context));
8474
8475 return 0;
8476}
8477
8478
8479AVCodec h264_decoder = {
8480 "h264",
8481 CODEC_TYPE_VIDEO,
8482 CODEC_ID_H264,
8483 sizeof(H264Context),
8484 decode_init,
8485 NULL,
8486 decode_end,
8487 decode_frame,
8488 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8489 .flush= flush_dpb,
8490};
8491
8492#ifdef CONFIG_H264_PARSER
8493AVCodecParser h264_parser = {
8494 { CODEC_ID_H264 },
8495 sizeof(H264Context),
8496 NULL,
8497 h264_parse,
8498 ff_parse_close,
8499 h264_split,
8500};
8501#endif
8502
8503#include "svq3.c"
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette