1 | /*
|
---|
2 | * jrevdct.c
|
---|
3 | *
|
---|
4 | * Copyright (C) 1991, 1992, Thomas G. Lane.
|
---|
5 | * This file is part of the Independent JPEG Group's software.
|
---|
6 | * For conditions of distribution and use, see the accompanying README file.
|
---|
7 | *
|
---|
8 | * This file contains the basic inverse-DCT transformation subroutine.
|
---|
9 | *
|
---|
10 | * This implementation is based on an algorithm described in
|
---|
11 | * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
|
---|
12 | * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
|
---|
13 | * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
|
---|
14 | * The primary algorithm described there uses 11 multiplies and 29 adds.
|
---|
15 | * We use their alternate method with 12 multiplies and 32 adds.
|
---|
16 | * The advantage of this method is that no data path contains more than one
|
---|
17 | * multiplication; this allows a very simple and accurate implementation in
|
---|
18 | * scaled fixed-point arithmetic, with a minimal number of shifts.
|
---|
19 | *
|
---|
20 | * I've made lots of modifications to attempt to take advantage of the
|
---|
21 | * sparse nature of the DCT matrices we're getting. Although the logic
|
---|
22 | * is cumbersome, it's straightforward and the resulting code is much
|
---|
23 | * faster.
|
---|
24 | *
|
---|
25 | * A better way to do this would be to pass in the DCT block as a sparse
|
---|
26 | * matrix, perhaps with the difference cases encoded.
|
---|
27 | */
|
---|
28 |
|
---|
29 | /**
|
---|
30 | * @file jrevdct.c
|
---|
31 | * Independent JPEG Group's LLM idct.
|
---|
32 | */
|
---|
33 |
|
---|
34 | #include "common.h"
|
---|
35 | #include "dsputil.h"
|
---|
36 |
|
---|
37 | #define EIGHT_BIT_SAMPLES
|
---|
38 |
|
---|
39 | #define DCTSIZE 8
|
---|
40 | #define DCTSIZE2 64
|
---|
41 |
|
---|
42 | #define GLOBAL
|
---|
43 |
|
---|
44 | #define RIGHT_SHIFT(x, n) ((x) >> (n))
|
---|
45 |
|
---|
46 | typedef DCTELEM DCTBLOCK[DCTSIZE2];
|
---|
47 |
|
---|
48 | #define CONST_BITS 13
|
---|
49 |
|
---|
50 | /*
|
---|
51 | * This routine is specialized to the case DCTSIZE = 8.
|
---|
52 | */
|
---|
53 |
|
---|
54 | #if DCTSIZE != 8
|
---|
55 | Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
|
---|
56 | #endif
|
---|
57 |
|
---|
58 |
|
---|
59 | /*
|
---|
60 | * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
|
---|
61 | * on each column. Direct algorithms are also available, but they are
|
---|
62 | * much more complex and seem not to be any faster when reduced to code.
|
---|
63 | *
|
---|
64 | * The poop on this scaling stuff is as follows:
|
---|
65 | *
|
---|
66 | * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
|
---|
67 | * larger than the true IDCT outputs. The final outputs are therefore
|
---|
68 | * a factor of N larger than desired; since N=8 this can be cured by
|
---|
69 | * a simple right shift at the end of the algorithm. The advantage of
|
---|
70 | * this arrangement is that we save two multiplications per 1-D IDCT,
|
---|
71 | * because the y0 and y4 inputs need not be divided by sqrt(N).
|
---|
72 | *
|
---|
73 | * We have to do addition and subtraction of the integer inputs, which
|
---|
74 | * is no problem, and multiplication by fractional constants, which is
|
---|
75 | * a problem to do in integer arithmetic. We multiply all the constants
|
---|
76 | * by CONST_SCALE and convert them to integer constants (thus retaining
|
---|
77 | * CONST_BITS bits of precision in the constants). After doing a
|
---|
78 | * multiplication we have to divide the product by CONST_SCALE, with proper
|
---|
79 | * rounding, to produce the correct output. This division can be done
|
---|
80 | * cheaply as a right shift of CONST_BITS bits. We postpone shifting
|
---|
81 | * as long as possible so that partial sums can be added together with
|
---|
82 | * full fractional precision.
|
---|
83 | *
|
---|
84 | * The outputs of the first pass are scaled up by PASS1_BITS bits so that
|
---|
85 | * they are represented to better-than-integral precision. These outputs
|
---|
86 | * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
|
---|
87 | * with the recommended scaling. (To scale up 12-bit sample data further, an
|
---|
88 | * intermediate int32 array would be needed.)
|
---|
89 | *
|
---|
90 | * To avoid overflow of the 32-bit intermediate results in pass 2, we must
|
---|
91 | * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
|
---|
92 | * shows that the values given below are the most effective.
|
---|
93 | */
|
---|
94 |
|
---|
95 | #ifdef EIGHT_BIT_SAMPLES
|
---|
96 | #define PASS1_BITS 2
|
---|
97 | #else
|
---|
98 | #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
|
---|
99 | #endif
|
---|
100 |
|
---|
101 | #define ONE ((int32_t) 1)
|
---|
102 |
|
---|
103 | #define CONST_SCALE (ONE << CONST_BITS)
|
---|
104 |
|
---|
105 | /* Convert a positive real constant to an integer scaled by CONST_SCALE.
|
---|
106 | * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
|
---|
107 | * you will pay a significant penalty in run time. In that case, figure
|
---|
108 | * the correct integer constant values and insert them by hand.
|
---|
109 | */
|
---|
110 |
|
---|
111 | /* Actually FIX is no longer used, we precomputed them all */
|
---|
112 | #define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5))
|
---|
113 |
|
---|
114 | /* Descale and correctly round an int32_t value that's scaled by N bits.
|
---|
115 | * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
|
---|
116 | * the fudge factor is correct for either sign of X.
|
---|
117 | */
|
---|
118 |
|
---|
119 | #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
|
---|
120 |
|
---|
121 | /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
|
---|
122 | * For 8-bit samples with the recommended scaling, all the variable
|
---|
123 | * and constant values involved are no more than 16 bits wide, so a
|
---|
124 | * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
|
---|
125 | * this provides a useful speedup on many machines.
|
---|
126 | * There is no way to specify a 16x16->32 multiply in portable C, but
|
---|
127 | * some C compilers will do the right thing if you provide the correct
|
---|
128 | * combination of casts.
|
---|
129 | * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
|
---|
130 | */
|
---|
131 |
|
---|
132 | #ifdef EIGHT_BIT_SAMPLES
|
---|
133 | #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
|
---|
134 | #define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const)))
|
---|
135 | #endif
|
---|
136 | #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
|
---|
137 | #define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const)))
|
---|
138 | #endif
|
---|
139 | #endif
|
---|
140 |
|
---|
141 | #ifndef MULTIPLY /* default definition */
|
---|
142 | #define MULTIPLY(var,const) ((var) * (const))
|
---|
143 | #endif
|
---|
144 |
|
---|
145 |
|
---|
146 | /*
|
---|
147 | Unlike our decoder where we approximate the FIXes, we need to use exact
|
---|
148 | ones here or successive P-frames will drift too much with Reference frame coding
|
---|
149 | */
|
---|
150 | #define FIX_0_211164243 1730
|
---|
151 | #define FIX_0_275899380 2260
|
---|
152 | #define FIX_0_298631336 2446
|
---|
153 | #define FIX_0_390180644 3196
|
---|
154 | #define FIX_0_509795579 4176
|
---|
155 | #define FIX_0_541196100 4433
|
---|
156 | #define FIX_0_601344887 4926
|
---|
157 | #define FIX_0_765366865 6270
|
---|
158 | #define FIX_0_785694958 6436
|
---|
159 | #define FIX_0_899976223 7373
|
---|
160 | #define FIX_1_061594337 8697
|
---|
161 | #define FIX_1_111140466 9102
|
---|
162 | #define FIX_1_175875602 9633
|
---|
163 | #define FIX_1_306562965 10703
|
---|
164 | #define FIX_1_387039845 11363
|
---|
165 | #define FIX_1_451774981 11893
|
---|
166 | #define FIX_1_501321110 12299
|
---|
167 | #define FIX_1_662939225 13623
|
---|
168 | #define FIX_1_847759065 15137
|
---|
169 | #define FIX_1_961570560 16069
|
---|
170 | #define FIX_2_053119869 16819
|
---|
171 | #define FIX_2_172734803 17799
|
---|
172 | #define FIX_2_562915447 20995
|
---|
173 | #define FIX_3_072711026 25172
|
---|
174 |
|
---|
175 | /*
|
---|
176 | * Perform the inverse DCT on one block of coefficients.
|
---|
177 | */
|
---|
178 |
|
---|
179 | void j_rev_dct(DCTBLOCK data)
|
---|
180 | {
|
---|
181 | int32_t tmp0, tmp1, tmp2, tmp3;
|
---|
182 | int32_t tmp10, tmp11, tmp12, tmp13;
|
---|
183 | int32_t z1, z2, z3, z4, z5;
|
---|
184 | int32_t d0, d1, d2, d3, d4, d5, d6, d7;
|
---|
185 | register DCTELEM *dataptr;
|
---|
186 | int rowctr;
|
---|
187 |
|
---|
188 | /* Pass 1: process rows. */
|
---|
189 | /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
|
---|
190 | /* furthermore, we scale the results by 2**PASS1_BITS. */
|
---|
191 |
|
---|
192 | dataptr = data;
|
---|
193 |
|
---|
194 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
|
---|
195 | /* Due to quantization, we will usually find that many of the input
|
---|
196 | * coefficients are zero, especially the AC terms. We can exploit this
|
---|
197 | * by short-circuiting the IDCT calculation for any row in which all
|
---|
198 | * the AC terms are zero. In that case each output is equal to the
|
---|
199 | * DC coefficient (with scale factor as needed).
|
---|
200 | * With typical images and quantization tables, half or more of the
|
---|
201 | * row DCT calculations can be simplified this way.
|
---|
202 | */
|
---|
203 |
|
---|
204 | register int *idataptr = (int*)dataptr;
|
---|
205 |
|
---|
206 | /* WARNING: we do the same permutation as MMX idct to simplify the
|
---|
207 | video core */
|
---|
208 | d0 = dataptr[0];
|
---|
209 | d2 = dataptr[1];
|
---|
210 | d4 = dataptr[2];
|
---|
211 | d6 = dataptr[3];
|
---|
212 | d1 = dataptr[4];
|
---|
213 | d3 = dataptr[5];
|
---|
214 | d5 = dataptr[6];
|
---|
215 | d7 = dataptr[7];
|
---|
216 |
|
---|
217 | if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
|
---|
218 | /* AC terms all zero */
|
---|
219 | if (d0) {
|
---|
220 | /* Compute a 32 bit value to assign. */
|
---|
221 | DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
|
---|
222 | register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
|
---|
223 |
|
---|
224 | idataptr[0] = v;
|
---|
225 | idataptr[1] = v;
|
---|
226 | idataptr[2] = v;
|
---|
227 | idataptr[3] = v;
|
---|
228 | }
|
---|
229 |
|
---|
230 | dataptr += DCTSIZE; /* advance pointer to next row */
|
---|
231 | continue;
|
---|
232 | }
|
---|
233 |
|
---|
234 | /* Even part: reverse the even part of the forward DCT. */
|
---|
235 | /* The rotator is sqrt(2)*c(-6). */
|
---|
236 | {
|
---|
237 | if (d6) {
|
---|
238 | if (d2) {
|
---|
239 | /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
|
---|
240 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
---|
241 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
---|
242 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
---|
243 |
|
---|
244 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
245 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
246 |
|
---|
247 | tmp10 = tmp0 + tmp3;
|
---|
248 | tmp13 = tmp0 - tmp3;
|
---|
249 | tmp11 = tmp1 + tmp2;
|
---|
250 | tmp12 = tmp1 - tmp2;
|
---|
251 | } else {
|
---|
252 | /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
|
---|
253 | tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
---|
254 | tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
---|
255 |
|
---|
256 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
257 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
258 |
|
---|
259 | tmp10 = tmp0 + tmp3;
|
---|
260 | tmp13 = tmp0 - tmp3;
|
---|
261 | tmp11 = tmp1 + tmp2;
|
---|
262 | tmp12 = tmp1 - tmp2;
|
---|
263 | }
|
---|
264 | } else {
|
---|
265 | if (d2) {
|
---|
266 | /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
|
---|
267 | tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
---|
268 | tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
---|
269 |
|
---|
270 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
271 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
272 |
|
---|
273 | tmp10 = tmp0 + tmp3;
|
---|
274 | tmp13 = tmp0 - tmp3;
|
---|
275 | tmp11 = tmp1 + tmp2;
|
---|
276 | tmp12 = tmp1 - tmp2;
|
---|
277 | } else {
|
---|
278 | /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
|
---|
279 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
|
---|
280 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
|
---|
281 | }
|
---|
282 | }
|
---|
283 |
|
---|
284 | /* Odd part per figure 8; the matrix is unitary and hence its
|
---|
285 | * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
|
---|
286 | */
|
---|
287 |
|
---|
288 | if (d7) {
|
---|
289 | if (d5) {
|
---|
290 | if (d3) {
|
---|
291 | if (d1) {
|
---|
292 | /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
|
---|
293 | z1 = d7 + d1;
|
---|
294 | z2 = d5 + d3;
|
---|
295 | z3 = d7 + d3;
|
---|
296 | z4 = d5 + d1;
|
---|
297 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
|
---|
298 |
|
---|
299 | tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
---|
300 | tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
---|
301 | tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
---|
302 | tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
---|
303 | z1 = MULTIPLY(-z1, FIX_0_899976223);
|
---|
304 | z2 = MULTIPLY(-z2, FIX_2_562915447);
|
---|
305 | z3 = MULTIPLY(-z3, FIX_1_961570560);
|
---|
306 | z4 = MULTIPLY(-z4, FIX_0_390180644);
|
---|
307 |
|
---|
308 | z3 += z5;
|
---|
309 | z4 += z5;
|
---|
310 |
|
---|
311 | tmp0 += z1 + z3;
|
---|
312 | tmp1 += z2 + z4;
|
---|
313 | tmp2 += z2 + z3;
|
---|
314 | tmp3 += z1 + z4;
|
---|
315 | } else {
|
---|
316 | /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
|
---|
317 | z2 = d5 + d3;
|
---|
318 | z3 = d7 + d3;
|
---|
319 | z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
|
---|
320 |
|
---|
321 | tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
---|
322 | tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
---|
323 | tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
---|
324 | z1 = MULTIPLY(-d7, FIX_0_899976223);
|
---|
325 | z2 = MULTIPLY(-z2, FIX_2_562915447);
|
---|
326 | z3 = MULTIPLY(-z3, FIX_1_961570560);
|
---|
327 | z4 = MULTIPLY(-d5, FIX_0_390180644);
|
---|
328 |
|
---|
329 | z3 += z5;
|
---|
330 | z4 += z5;
|
---|
331 |
|
---|
332 | tmp0 += z1 + z3;
|
---|
333 | tmp1 += z2 + z4;
|
---|
334 | tmp2 += z2 + z3;
|
---|
335 | tmp3 = z1 + z4;
|
---|
336 | }
|
---|
337 | } else {
|
---|
338 | if (d1) {
|
---|
339 | /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
|
---|
340 | z1 = d7 + d1;
|
---|
341 | z4 = d5 + d1;
|
---|
342 | z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
|
---|
343 |
|
---|
344 | tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
---|
345 | tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
---|
346 | tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
---|
347 | z1 = MULTIPLY(-z1, FIX_0_899976223);
|
---|
348 | z2 = MULTIPLY(-d5, FIX_2_562915447);
|
---|
349 | z3 = MULTIPLY(-d7, FIX_1_961570560);
|
---|
350 | z4 = MULTIPLY(-z4, FIX_0_390180644);
|
---|
351 |
|
---|
352 | z3 += z5;
|
---|
353 | z4 += z5;
|
---|
354 |
|
---|
355 | tmp0 += z1 + z3;
|
---|
356 | tmp1 += z2 + z4;
|
---|
357 | tmp2 = z2 + z3;
|
---|
358 | tmp3 += z1 + z4;
|
---|
359 | } else {
|
---|
360 | /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
|
---|
361 | tmp0 = MULTIPLY(-d7, FIX_0_601344887);
|
---|
362 | z1 = MULTIPLY(-d7, FIX_0_899976223);
|
---|
363 | z3 = MULTIPLY(-d7, FIX_1_961570560);
|
---|
364 | tmp1 = MULTIPLY(-d5, FIX_0_509795579);
|
---|
365 | z2 = MULTIPLY(-d5, FIX_2_562915447);
|
---|
366 | z4 = MULTIPLY(-d5, FIX_0_390180644);
|
---|
367 | z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
|
---|
368 |
|
---|
369 | z3 += z5;
|
---|
370 | z4 += z5;
|
---|
371 |
|
---|
372 | tmp0 += z3;
|
---|
373 | tmp1 += z4;
|
---|
374 | tmp2 = z2 + z3;
|
---|
375 | tmp3 = z1 + z4;
|
---|
376 | }
|
---|
377 | }
|
---|
378 | } else {
|
---|
379 | if (d3) {
|
---|
380 | if (d1) {
|
---|
381 | /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
|
---|
382 | z1 = d7 + d1;
|
---|
383 | z3 = d7 + d3;
|
---|
384 | z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
|
---|
385 |
|
---|
386 | tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
---|
387 | tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
---|
388 | tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
---|
389 | z1 = MULTIPLY(-z1, FIX_0_899976223);
|
---|
390 | z2 = MULTIPLY(-d3, FIX_2_562915447);
|
---|
391 | z3 = MULTIPLY(-z3, FIX_1_961570560);
|
---|
392 | z4 = MULTIPLY(-d1, FIX_0_390180644);
|
---|
393 |
|
---|
394 | z3 += z5;
|
---|
395 | z4 += z5;
|
---|
396 |
|
---|
397 | tmp0 += z1 + z3;
|
---|
398 | tmp1 = z2 + z4;
|
---|
399 | tmp2 += z2 + z3;
|
---|
400 | tmp3 += z1 + z4;
|
---|
401 | } else {
|
---|
402 | /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
|
---|
403 | z3 = d7 + d3;
|
---|
404 |
|
---|
405 | tmp0 = MULTIPLY(-d7, FIX_0_601344887);
|
---|
406 | z1 = MULTIPLY(-d7, FIX_0_899976223);
|
---|
407 | tmp2 = MULTIPLY(d3, FIX_0_509795579);
|
---|
408 | z2 = MULTIPLY(-d3, FIX_2_562915447);
|
---|
409 | z5 = MULTIPLY(z3, FIX_1_175875602);
|
---|
410 | z3 = MULTIPLY(-z3, FIX_0_785694958);
|
---|
411 |
|
---|
412 | tmp0 += z3;
|
---|
413 | tmp1 = z2 + z5;
|
---|
414 | tmp2 += z3;
|
---|
415 | tmp3 = z1 + z5;
|
---|
416 | }
|
---|
417 | } else {
|
---|
418 | if (d1) {
|
---|
419 | /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
|
---|
420 | z1 = d7 + d1;
|
---|
421 | z5 = MULTIPLY(z1, FIX_1_175875602);
|
---|
422 |
|
---|
423 | z1 = MULTIPLY(z1, FIX_0_275899380);
|
---|
424 | z3 = MULTIPLY(-d7, FIX_1_961570560);
|
---|
425 | tmp0 = MULTIPLY(-d7, FIX_1_662939225);
|
---|
426 | z4 = MULTIPLY(-d1, FIX_0_390180644);
|
---|
427 | tmp3 = MULTIPLY(d1, FIX_1_111140466);
|
---|
428 |
|
---|
429 | tmp0 += z1;
|
---|
430 | tmp1 = z4 + z5;
|
---|
431 | tmp2 = z3 + z5;
|
---|
432 | tmp3 += z1;
|
---|
433 | } else {
|
---|
434 | /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
|
---|
435 | tmp0 = MULTIPLY(-d7, FIX_1_387039845);
|
---|
436 | tmp1 = MULTIPLY(d7, FIX_1_175875602);
|
---|
437 | tmp2 = MULTIPLY(-d7, FIX_0_785694958);
|
---|
438 | tmp3 = MULTIPLY(d7, FIX_0_275899380);
|
---|
439 | }
|
---|
440 | }
|
---|
441 | }
|
---|
442 | } else {
|
---|
443 | if (d5) {
|
---|
444 | if (d3) {
|
---|
445 | if (d1) {
|
---|
446 | /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
|
---|
447 | z2 = d5 + d3;
|
---|
448 | z4 = d5 + d1;
|
---|
449 | z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
|
---|
450 |
|
---|
451 | tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
---|
452 | tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
---|
453 | tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
---|
454 | z1 = MULTIPLY(-d1, FIX_0_899976223);
|
---|
455 | z2 = MULTIPLY(-z2, FIX_2_562915447);
|
---|
456 | z3 = MULTIPLY(-d3, FIX_1_961570560);
|
---|
457 | z4 = MULTIPLY(-z4, FIX_0_390180644);
|
---|
458 |
|
---|
459 | z3 += z5;
|
---|
460 | z4 += z5;
|
---|
461 |
|
---|
462 | tmp0 = z1 + z3;
|
---|
463 | tmp1 += z2 + z4;
|
---|
464 | tmp2 += z2 + z3;
|
---|
465 | tmp3 += z1 + z4;
|
---|
466 | } else {
|
---|
467 | /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
|
---|
468 | z2 = d5 + d3;
|
---|
469 |
|
---|
470 | z5 = MULTIPLY(z2, FIX_1_175875602);
|
---|
471 | tmp1 = MULTIPLY(d5, FIX_1_662939225);
|
---|
472 | z4 = MULTIPLY(-d5, FIX_0_390180644);
|
---|
473 | z2 = MULTIPLY(-z2, FIX_1_387039845);
|
---|
474 | tmp2 = MULTIPLY(d3, FIX_1_111140466);
|
---|
475 | z3 = MULTIPLY(-d3, FIX_1_961570560);
|
---|
476 |
|
---|
477 | tmp0 = z3 + z5;
|
---|
478 | tmp1 += z2;
|
---|
479 | tmp2 += z2;
|
---|
480 | tmp3 = z4 + z5;
|
---|
481 | }
|
---|
482 | } else {
|
---|
483 | if (d1) {
|
---|
484 | /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
|
---|
485 | z4 = d5 + d1;
|
---|
486 |
|
---|
487 | z5 = MULTIPLY(z4, FIX_1_175875602);
|
---|
488 | z1 = MULTIPLY(-d1, FIX_0_899976223);
|
---|
489 | tmp3 = MULTIPLY(d1, FIX_0_601344887);
|
---|
490 | tmp1 = MULTIPLY(-d5, FIX_0_509795579);
|
---|
491 | z2 = MULTIPLY(-d5, FIX_2_562915447);
|
---|
492 | z4 = MULTIPLY(z4, FIX_0_785694958);
|
---|
493 |
|
---|
494 | tmp0 = z1 + z5;
|
---|
495 | tmp1 += z4;
|
---|
496 | tmp2 = z2 + z5;
|
---|
497 | tmp3 += z4;
|
---|
498 | } else {
|
---|
499 | /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
|
---|
500 | tmp0 = MULTIPLY(d5, FIX_1_175875602);
|
---|
501 | tmp1 = MULTIPLY(d5, FIX_0_275899380);
|
---|
502 | tmp2 = MULTIPLY(-d5, FIX_1_387039845);
|
---|
503 | tmp3 = MULTIPLY(d5, FIX_0_785694958);
|
---|
504 | }
|
---|
505 | }
|
---|
506 | } else {
|
---|
507 | if (d3) {
|
---|
508 | if (d1) {
|
---|
509 | /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
|
---|
510 | z5 = d1 + d3;
|
---|
511 | tmp3 = MULTIPLY(d1, FIX_0_211164243);
|
---|
512 | tmp2 = MULTIPLY(-d3, FIX_1_451774981);
|
---|
513 | z1 = MULTIPLY(d1, FIX_1_061594337);
|
---|
514 | z2 = MULTIPLY(-d3, FIX_2_172734803);
|
---|
515 | z4 = MULTIPLY(z5, FIX_0_785694958);
|
---|
516 | z5 = MULTIPLY(z5, FIX_1_175875602);
|
---|
517 |
|
---|
518 | tmp0 = z1 - z4;
|
---|
519 | tmp1 = z2 + z4;
|
---|
520 | tmp2 += z5;
|
---|
521 | tmp3 += z5;
|
---|
522 | } else {
|
---|
523 | /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
|
---|
524 | tmp0 = MULTIPLY(-d3, FIX_0_785694958);
|
---|
525 | tmp1 = MULTIPLY(-d3, FIX_1_387039845);
|
---|
526 | tmp2 = MULTIPLY(-d3, FIX_0_275899380);
|
---|
527 | tmp3 = MULTIPLY(d3, FIX_1_175875602);
|
---|
528 | }
|
---|
529 | } else {
|
---|
530 | if (d1) {
|
---|
531 | /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
|
---|
532 | tmp0 = MULTIPLY(d1, FIX_0_275899380);
|
---|
533 | tmp1 = MULTIPLY(d1, FIX_0_785694958);
|
---|
534 | tmp2 = MULTIPLY(d1, FIX_1_175875602);
|
---|
535 | tmp3 = MULTIPLY(d1, FIX_1_387039845);
|
---|
536 | } else {
|
---|
537 | /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
|
---|
538 | tmp0 = tmp1 = tmp2 = tmp3 = 0;
|
---|
539 | }
|
---|
540 | }
|
---|
541 | }
|
---|
542 | }
|
---|
543 | }
|
---|
544 | /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
|
---|
545 |
|
---|
546 | dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
|
---|
547 | dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
|
---|
548 | dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
|
---|
549 | dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
|
---|
550 | dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
|
---|
551 | dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
|
---|
552 | dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
|
---|
553 | dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
|
---|
554 |
|
---|
555 | dataptr += DCTSIZE; /* advance pointer to next row */
|
---|
556 | }
|
---|
557 |
|
---|
558 | /* Pass 2: process columns. */
|
---|
559 | /* Note that we must descale the results by a factor of 8 == 2**3, */
|
---|
560 | /* and also undo the PASS1_BITS scaling. */
|
---|
561 |
|
---|
562 | dataptr = data;
|
---|
563 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
|
---|
564 | /* Columns of zeroes can be exploited in the same way as we did with rows.
|
---|
565 | * However, the row calculation has created many nonzero AC terms, so the
|
---|
566 | * simplification applies less often (typically 5% to 10% of the time).
|
---|
567 | * On machines with very fast multiplication, it's possible that the
|
---|
568 | * test takes more time than it's worth. In that case this section
|
---|
569 | * may be commented out.
|
---|
570 | */
|
---|
571 |
|
---|
572 | d0 = dataptr[DCTSIZE*0];
|
---|
573 | d1 = dataptr[DCTSIZE*1];
|
---|
574 | d2 = dataptr[DCTSIZE*2];
|
---|
575 | d3 = dataptr[DCTSIZE*3];
|
---|
576 | d4 = dataptr[DCTSIZE*4];
|
---|
577 | d5 = dataptr[DCTSIZE*5];
|
---|
578 | d6 = dataptr[DCTSIZE*6];
|
---|
579 | d7 = dataptr[DCTSIZE*7];
|
---|
580 |
|
---|
581 | /* Even part: reverse the even part of the forward DCT. */
|
---|
582 | /* The rotator is sqrt(2)*c(-6). */
|
---|
583 | if (d6) {
|
---|
584 | if (d2) {
|
---|
585 | /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
|
---|
586 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
---|
587 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
---|
588 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
---|
589 |
|
---|
590 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
591 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
592 |
|
---|
593 | tmp10 = tmp0 + tmp3;
|
---|
594 | tmp13 = tmp0 - tmp3;
|
---|
595 | tmp11 = tmp1 + tmp2;
|
---|
596 | tmp12 = tmp1 - tmp2;
|
---|
597 | } else {
|
---|
598 | /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
|
---|
599 | tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
---|
600 | tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
---|
601 |
|
---|
602 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
603 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
604 |
|
---|
605 | tmp10 = tmp0 + tmp3;
|
---|
606 | tmp13 = tmp0 - tmp3;
|
---|
607 | tmp11 = tmp1 + tmp2;
|
---|
608 | tmp12 = tmp1 - tmp2;
|
---|
609 | }
|
---|
610 | } else {
|
---|
611 | if (d2) {
|
---|
612 | /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
|
---|
613 | tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
---|
614 | tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
---|
615 |
|
---|
616 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
617 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
618 |
|
---|
619 | tmp10 = tmp0 + tmp3;
|
---|
620 | tmp13 = tmp0 - tmp3;
|
---|
621 | tmp11 = tmp1 + tmp2;
|
---|
622 | tmp12 = tmp1 - tmp2;
|
---|
623 | } else {
|
---|
624 | /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
|
---|
625 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
|
---|
626 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
|
---|
627 | }
|
---|
628 | }
|
---|
629 |
|
---|
630 | /* Odd part per figure 8; the matrix is unitary and hence its
|
---|
631 | * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
|
---|
632 | */
|
---|
633 | if (d7) {
|
---|
634 | if (d5) {
|
---|
635 | if (d3) {
|
---|
636 | if (d1) {
|
---|
637 | /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
|
---|
638 | z1 = d7 + d1;
|
---|
639 | z2 = d5 + d3;
|
---|
640 | z3 = d7 + d3;
|
---|
641 | z4 = d5 + d1;
|
---|
642 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
|
---|
643 |
|
---|
644 | tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
---|
645 | tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
---|
646 | tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
---|
647 | tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
---|
648 | z1 = MULTIPLY(-z1, FIX_0_899976223);
|
---|
649 | z2 = MULTIPLY(-z2, FIX_2_562915447);
|
---|
650 | z3 = MULTIPLY(-z3, FIX_1_961570560);
|
---|
651 | z4 = MULTIPLY(-z4, FIX_0_390180644);
|
---|
652 |
|
---|
653 | z3 += z5;
|
---|
654 | z4 += z5;
|
---|
655 |
|
---|
656 | tmp0 += z1 + z3;
|
---|
657 | tmp1 += z2 + z4;
|
---|
658 | tmp2 += z2 + z3;
|
---|
659 | tmp3 += z1 + z4;
|
---|
660 | } else {
|
---|
661 | /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
|
---|
662 | z1 = d7;
|
---|
663 | z2 = d5 + d3;
|
---|
664 | z3 = d7 + d3;
|
---|
665 | z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
|
---|
666 |
|
---|
667 | tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
---|
668 | tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
---|
669 | tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
---|
670 | z1 = MULTIPLY(-d7, FIX_0_899976223);
|
---|
671 | z2 = MULTIPLY(-z2, FIX_2_562915447);
|
---|
672 | z3 = MULTIPLY(-z3, FIX_1_961570560);
|
---|
673 | z4 = MULTIPLY(-d5, FIX_0_390180644);
|
---|
674 |
|
---|
675 | z3 += z5;
|
---|
676 | z4 += z5;
|
---|
677 |
|
---|
678 | tmp0 += z1 + z3;
|
---|
679 | tmp1 += z2 + z4;
|
---|
680 | tmp2 += z2 + z3;
|
---|
681 | tmp3 = z1 + z4;
|
---|
682 | }
|
---|
683 | } else {
|
---|
684 | if (d1) {
|
---|
685 | /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
|
---|
686 | z1 = d7 + d1;
|
---|
687 | z2 = d5;
|
---|
688 | z3 = d7;
|
---|
689 | z4 = d5 + d1;
|
---|
690 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
|
---|
691 |
|
---|
692 | tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
---|
693 | tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
---|
694 | tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
---|
695 | z1 = MULTIPLY(-z1, FIX_0_899976223);
|
---|
696 | z2 = MULTIPLY(-d5, FIX_2_562915447);
|
---|
697 | z3 = MULTIPLY(-d7, FIX_1_961570560);
|
---|
698 | z4 = MULTIPLY(-z4, FIX_0_390180644);
|
---|
699 |
|
---|
700 | z3 += z5;
|
---|
701 | z4 += z5;
|
---|
702 |
|
---|
703 | tmp0 += z1 + z3;
|
---|
704 | tmp1 += z2 + z4;
|
---|
705 | tmp2 = z2 + z3;
|
---|
706 | tmp3 += z1 + z4;
|
---|
707 | } else {
|
---|
708 | /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
|
---|
709 | tmp0 = MULTIPLY(-d7, FIX_0_601344887);
|
---|
710 | z1 = MULTIPLY(-d7, FIX_0_899976223);
|
---|
711 | z3 = MULTIPLY(-d7, FIX_1_961570560);
|
---|
712 | tmp1 = MULTIPLY(-d5, FIX_0_509795579);
|
---|
713 | z2 = MULTIPLY(-d5, FIX_2_562915447);
|
---|
714 | z4 = MULTIPLY(-d5, FIX_0_390180644);
|
---|
715 | z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
|
---|
716 |
|
---|
717 | z3 += z5;
|
---|
718 | z4 += z5;
|
---|
719 |
|
---|
720 | tmp0 += z3;
|
---|
721 | tmp1 += z4;
|
---|
722 | tmp2 = z2 + z3;
|
---|
723 | tmp3 = z1 + z4;
|
---|
724 | }
|
---|
725 | }
|
---|
726 | } else {
|
---|
727 | if (d3) {
|
---|
728 | if (d1) {
|
---|
729 | /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
|
---|
730 | z1 = d7 + d1;
|
---|
731 | z3 = d7 + d3;
|
---|
732 | z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
|
---|
733 |
|
---|
734 | tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
---|
735 | tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
---|
736 | tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
---|
737 | z1 = MULTIPLY(-z1, FIX_0_899976223);
|
---|
738 | z2 = MULTIPLY(-d3, FIX_2_562915447);
|
---|
739 | z3 = MULTIPLY(-z3, FIX_1_961570560);
|
---|
740 | z4 = MULTIPLY(-d1, FIX_0_390180644);
|
---|
741 |
|
---|
742 | z3 += z5;
|
---|
743 | z4 += z5;
|
---|
744 |
|
---|
745 | tmp0 += z1 + z3;
|
---|
746 | tmp1 = z2 + z4;
|
---|
747 | tmp2 += z2 + z3;
|
---|
748 | tmp3 += z1 + z4;
|
---|
749 | } else {
|
---|
750 | /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
|
---|
751 | z3 = d7 + d3;
|
---|
752 |
|
---|
753 | tmp0 = MULTIPLY(-d7, FIX_0_601344887);
|
---|
754 | z1 = MULTIPLY(-d7, FIX_0_899976223);
|
---|
755 | tmp2 = MULTIPLY(d3, FIX_0_509795579);
|
---|
756 | z2 = MULTIPLY(-d3, FIX_2_562915447);
|
---|
757 | z5 = MULTIPLY(z3, FIX_1_175875602);
|
---|
758 | z3 = MULTIPLY(-z3, FIX_0_785694958);
|
---|
759 |
|
---|
760 | tmp0 += z3;
|
---|
761 | tmp1 = z2 + z5;
|
---|
762 | tmp2 += z3;
|
---|
763 | tmp3 = z1 + z5;
|
---|
764 | }
|
---|
765 | } else {
|
---|
766 | if (d1) {
|
---|
767 | /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
|
---|
768 | z1 = d7 + d1;
|
---|
769 | z5 = MULTIPLY(z1, FIX_1_175875602);
|
---|
770 |
|
---|
771 | z1 = MULTIPLY(z1, FIX_0_275899380);
|
---|
772 | z3 = MULTIPLY(-d7, FIX_1_961570560);
|
---|
773 | tmp0 = MULTIPLY(-d7, FIX_1_662939225);
|
---|
774 | z4 = MULTIPLY(-d1, FIX_0_390180644);
|
---|
775 | tmp3 = MULTIPLY(d1, FIX_1_111140466);
|
---|
776 |
|
---|
777 | tmp0 += z1;
|
---|
778 | tmp1 = z4 + z5;
|
---|
779 | tmp2 = z3 + z5;
|
---|
780 | tmp3 += z1;
|
---|
781 | } else {
|
---|
782 | /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
|
---|
783 | tmp0 = MULTIPLY(-d7, FIX_1_387039845);
|
---|
784 | tmp1 = MULTIPLY(d7, FIX_1_175875602);
|
---|
785 | tmp2 = MULTIPLY(-d7, FIX_0_785694958);
|
---|
786 | tmp3 = MULTIPLY(d7, FIX_0_275899380);
|
---|
787 | }
|
---|
788 | }
|
---|
789 | }
|
---|
790 | } else {
|
---|
791 | if (d5) {
|
---|
792 | if (d3) {
|
---|
793 | if (d1) {
|
---|
794 | /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
|
---|
795 | z2 = d5 + d3;
|
---|
796 | z4 = d5 + d1;
|
---|
797 | z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
|
---|
798 |
|
---|
799 | tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
---|
800 | tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
---|
801 | tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
---|
802 | z1 = MULTIPLY(-d1, FIX_0_899976223);
|
---|
803 | z2 = MULTIPLY(-z2, FIX_2_562915447);
|
---|
804 | z3 = MULTIPLY(-d3, FIX_1_961570560);
|
---|
805 | z4 = MULTIPLY(-z4, FIX_0_390180644);
|
---|
806 |
|
---|
807 | z3 += z5;
|
---|
808 | z4 += z5;
|
---|
809 |
|
---|
810 | tmp0 = z1 + z3;
|
---|
811 | tmp1 += z2 + z4;
|
---|
812 | tmp2 += z2 + z3;
|
---|
813 | tmp3 += z1 + z4;
|
---|
814 | } else {
|
---|
815 | /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
|
---|
816 | z2 = d5 + d3;
|
---|
817 |
|
---|
818 | z5 = MULTIPLY(z2, FIX_1_175875602);
|
---|
819 | tmp1 = MULTIPLY(d5, FIX_1_662939225);
|
---|
820 | z4 = MULTIPLY(-d5, FIX_0_390180644);
|
---|
821 | z2 = MULTIPLY(-z2, FIX_1_387039845);
|
---|
822 | tmp2 = MULTIPLY(d3, FIX_1_111140466);
|
---|
823 | z3 = MULTIPLY(-d3, FIX_1_961570560);
|
---|
824 |
|
---|
825 | tmp0 = z3 + z5;
|
---|
826 | tmp1 += z2;
|
---|
827 | tmp2 += z2;
|
---|
828 | tmp3 = z4 + z5;
|
---|
829 | }
|
---|
830 | } else {
|
---|
831 | if (d1) {
|
---|
832 | /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
|
---|
833 | z4 = d5 + d1;
|
---|
834 |
|
---|
835 | z5 = MULTIPLY(z4, FIX_1_175875602);
|
---|
836 | z1 = MULTIPLY(-d1, FIX_0_899976223);
|
---|
837 | tmp3 = MULTIPLY(d1, FIX_0_601344887);
|
---|
838 | tmp1 = MULTIPLY(-d5, FIX_0_509795579);
|
---|
839 | z2 = MULTIPLY(-d5, FIX_2_562915447);
|
---|
840 | z4 = MULTIPLY(z4, FIX_0_785694958);
|
---|
841 |
|
---|
842 | tmp0 = z1 + z5;
|
---|
843 | tmp1 += z4;
|
---|
844 | tmp2 = z2 + z5;
|
---|
845 | tmp3 += z4;
|
---|
846 | } else {
|
---|
847 | /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
|
---|
848 | tmp0 = MULTIPLY(d5, FIX_1_175875602);
|
---|
849 | tmp1 = MULTIPLY(d5, FIX_0_275899380);
|
---|
850 | tmp2 = MULTIPLY(-d5, FIX_1_387039845);
|
---|
851 | tmp3 = MULTIPLY(d5, FIX_0_785694958);
|
---|
852 | }
|
---|
853 | }
|
---|
854 | } else {
|
---|
855 | if (d3) {
|
---|
856 | if (d1) {
|
---|
857 | /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
|
---|
858 | z5 = d1 + d3;
|
---|
859 | tmp3 = MULTIPLY(d1, FIX_0_211164243);
|
---|
860 | tmp2 = MULTIPLY(-d3, FIX_1_451774981);
|
---|
861 | z1 = MULTIPLY(d1, FIX_1_061594337);
|
---|
862 | z2 = MULTIPLY(-d3, FIX_2_172734803);
|
---|
863 | z4 = MULTIPLY(z5, FIX_0_785694958);
|
---|
864 | z5 = MULTIPLY(z5, FIX_1_175875602);
|
---|
865 |
|
---|
866 | tmp0 = z1 - z4;
|
---|
867 | tmp1 = z2 + z4;
|
---|
868 | tmp2 += z5;
|
---|
869 | tmp3 += z5;
|
---|
870 | } else {
|
---|
871 | /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
|
---|
872 | tmp0 = MULTIPLY(-d3, FIX_0_785694958);
|
---|
873 | tmp1 = MULTIPLY(-d3, FIX_1_387039845);
|
---|
874 | tmp2 = MULTIPLY(-d3, FIX_0_275899380);
|
---|
875 | tmp3 = MULTIPLY(d3, FIX_1_175875602);
|
---|
876 | }
|
---|
877 | } else {
|
---|
878 | if (d1) {
|
---|
879 | /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
|
---|
880 | tmp0 = MULTIPLY(d1, FIX_0_275899380);
|
---|
881 | tmp1 = MULTIPLY(d1, FIX_0_785694958);
|
---|
882 | tmp2 = MULTIPLY(d1, FIX_1_175875602);
|
---|
883 | tmp3 = MULTIPLY(d1, FIX_1_387039845);
|
---|
884 | } else {
|
---|
885 | /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
|
---|
886 | tmp0 = tmp1 = tmp2 = tmp3 = 0;
|
---|
887 | }
|
---|
888 | }
|
---|
889 | }
|
---|
890 | }
|
---|
891 |
|
---|
892 | /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
|
---|
893 |
|
---|
894 | dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
|
---|
895 | CONST_BITS+PASS1_BITS+3);
|
---|
896 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
|
---|
897 | CONST_BITS+PASS1_BITS+3);
|
---|
898 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
|
---|
899 | CONST_BITS+PASS1_BITS+3);
|
---|
900 | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
|
---|
901 | CONST_BITS+PASS1_BITS+3);
|
---|
902 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
|
---|
903 | CONST_BITS+PASS1_BITS+3);
|
---|
904 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
|
---|
905 | CONST_BITS+PASS1_BITS+3);
|
---|
906 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
|
---|
907 | CONST_BITS+PASS1_BITS+3);
|
---|
908 | dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
|
---|
909 | CONST_BITS+PASS1_BITS+3);
|
---|
910 |
|
---|
911 | dataptr++; /* advance pointer to next column */
|
---|
912 | }
|
---|
913 | }
|
---|
914 |
|
---|
915 | #undef DCTSIZE
|
---|
916 | #define DCTSIZE 4
|
---|
917 | #define DCTSTRIDE 8
|
---|
918 |
|
---|
919 | void j_rev_dct4(DCTBLOCK data)
|
---|
920 | {
|
---|
921 | int32_t tmp0, tmp1, tmp2, tmp3;
|
---|
922 | int32_t tmp10, tmp11, tmp12, tmp13;
|
---|
923 | int32_t z1;
|
---|
924 | int32_t d0, d2, d4, d6;
|
---|
925 | register DCTELEM *dataptr;
|
---|
926 | int rowctr;
|
---|
927 |
|
---|
928 | /* Pass 1: process rows. */
|
---|
929 | /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
|
---|
930 | /* furthermore, we scale the results by 2**PASS1_BITS. */
|
---|
931 |
|
---|
932 | data[0] += 4;
|
---|
933 |
|
---|
934 | dataptr = data;
|
---|
935 |
|
---|
936 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
|
---|
937 | /* Due to quantization, we will usually find that many of the input
|
---|
938 | * coefficients are zero, especially the AC terms. We can exploit this
|
---|
939 | * by short-circuiting the IDCT calculation for any row in which all
|
---|
940 | * the AC terms are zero. In that case each output is equal to the
|
---|
941 | * DC coefficient (with scale factor as needed).
|
---|
942 | * With typical images and quantization tables, half or more of the
|
---|
943 | * row DCT calculations can be simplified this way.
|
---|
944 | */
|
---|
945 |
|
---|
946 | register int *idataptr = (int*)dataptr;
|
---|
947 |
|
---|
948 | d0 = dataptr[0];
|
---|
949 | d2 = dataptr[1];
|
---|
950 | d4 = dataptr[2];
|
---|
951 | d6 = dataptr[3];
|
---|
952 |
|
---|
953 | if ((d2 | d4 | d6) == 0) {
|
---|
954 | /* AC terms all zero */
|
---|
955 | if (d0) {
|
---|
956 | /* Compute a 32 bit value to assign. */
|
---|
957 | DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
|
---|
958 | register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
|
---|
959 |
|
---|
960 | idataptr[0] = v;
|
---|
961 | idataptr[1] = v;
|
---|
962 | }
|
---|
963 |
|
---|
964 | dataptr += DCTSTRIDE; /* advance pointer to next row */
|
---|
965 | continue;
|
---|
966 | }
|
---|
967 |
|
---|
968 | /* Even part: reverse the even part of the forward DCT. */
|
---|
969 | /* The rotator is sqrt(2)*c(-6). */
|
---|
970 | if (d6) {
|
---|
971 | if (d2) {
|
---|
972 | /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
|
---|
973 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
---|
974 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
---|
975 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
---|
976 |
|
---|
977 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
978 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
979 |
|
---|
980 | tmp10 = tmp0 + tmp3;
|
---|
981 | tmp13 = tmp0 - tmp3;
|
---|
982 | tmp11 = tmp1 + tmp2;
|
---|
983 | tmp12 = tmp1 - tmp2;
|
---|
984 | } else {
|
---|
985 | /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
|
---|
986 | tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
---|
987 | tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
---|
988 |
|
---|
989 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
990 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
991 |
|
---|
992 | tmp10 = tmp0 + tmp3;
|
---|
993 | tmp13 = tmp0 - tmp3;
|
---|
994 | tmp11 = tmp1 + tmp2;
|
---|
995 | tmp12 = tmp1 - tmp2;
|
---|
996 | }
|
---|
997 | } else {
|
---|
998 | if (d2) {
|
---|
999 | /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
|
---|
1000 | tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
---|
1001 | tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
---|
1002 |
|
---|
1003 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
1004 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
1005 |
|
---|
1006 | tmp10 = tmp0 + tmp3;
|
---|
1007 | tmp13 = tmp0 - tmp3;
|
---|
1008 | tmp11 = tmp1 + tmp2;
|
---|
1009 | tmp12 = tmp1 - tmp2;
|
---|
1010 | } else {
|
---|
1011 | /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
|
---|
1012 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
|
---|
1013 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
|
---|
1014 | }
|
---|
1015 | }
|
---|
1016 |
|
---|
1017 | /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
|
---|
1018 |
|
---|
1019 | dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
|
---|
1020 | dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
|
---|
1021 | dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
|
---|
1022 | dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
|
---|
1023 |
|
---|
1024 | dataptr += DCTSTRIDE; /* advance pointer to next row */
|
---|
1025 | }
|
---|
1026 |
|
---|
1027 | /* Pass 2: process columns. */
|
---|
1028 | /* Note that we must descale the results by a factor of 8 == 2**3, */
|
---|
1029 | /* and also undo the PASS1_BITS scaling. */
|
---|
1030 |
|
---|
1031 | dataptr = data;
|
---|
1032 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
|
---|
1033 | /* Columns of zeroes can be exploited in the same way as we did with rows.
|
---|
1034 | * However, the row calculation has created many nonzero AC terms, so the
|
---|
1035 | * simplification applies less often (typically 5% to 10% of the time).
|
---|
1036 | * On machines with very fast multiplication, it's possible that the
|
---|
1037 | * test takes more time than it's worth. In that case this section
|
---|
1038 | * may be commented out.
|
---|
1039 | */
|
---|
1040 |
|
---|
1041 | d0 = dataptr[DCTSTRIDE*0];
|
---|
1042 | d2 = dataptr[DCTSTRIDE*1];
|
---|
1043 | d4 = dataptr[DCTSTRIDE*2];
|
---|
1044 | d6 = dataptr[DCTSTRIDE*3];
|
---|
1045 |
|
---|
1046 | /* Even part: reverse the even part of the forward DCT. */
|
---|
1047 | /* The rotator is sqrt(2)*c(-6). */
|
---|
1048 | if (d6) {
|
---|
1049 | if (d2) {
|
---|
1050 | /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
|
---|
1051 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
---|
1052 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
---|
1053 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
---|
1054 |
|
---|
1055 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
1056 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
1057 |
|
---|
1058 | tmp10 = tmp0 + tmp3;
|
---|
1059 | tmp13 = tmp0 - tmp3;
|
---|
1060 | tmp11 = tmp1 + tmp2;
|
---|
1061 | tmp12 = tmp1 - tmp2;
|
---|
1062 | } else {
|
---|
1063 | /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
|
---|
1064 | tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
---|
1065 | tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
---|
1066 |
|
---|
1067 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
1068 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
1069 |
|
---|
1070 | tmp10 = tmp0 + tmp3;
|
---|
1071 | tmp13 = tmp0 - tmp3;
|
---|
1072 | tmp11 = tmp1 + tmp2;
|
---|
1073 | tmp12 = tmp1 - tmp2;
|
---|
1074 | }
|
---|
1075 | } else {
|
---|
1076 | if (d2) {
|
---|
1077 | /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
|
---|
1078 | tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
---|
1079 | tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
---|
1080 |
|
---|
1081 | tmp0 = (d0 + d4) << CONST_BITS;
|
---|
1082 | tmp1 = (d0 - d4) << CONST_BITS;
|
---|
1083 |
|
---|
1084 | tmp10 = tmp0 + tmp3;
|
---|
1085 | tmp13 = tmp0 - tmp3;
|
---|
1086 | tmp11 = tmp1 + tmp2;
|
---|
1087 | tmp12 = tmp1 - tmp2;
|
---|
1088 | } else {
|
---|
1089 | /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
|
---|
1090 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
|
---|
1091 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
|
---|
1092 | }
|
---|
1093 | }
|
---|
1094 |
|
---|
1095 | /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
|
---|
1096 |
|
---|
1097 | dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
|
---|
1098 | dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
|
---|
1099 | dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
|
---|
1100 | dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
|
---|
1101 |
|
---|
1102 | dataptr++; /* advance pointer to next column */
|
---|
1103 | }
|
---|
1104 | }
|
---|
1105 |
|
---|
1106 | void j_rev_dct2(DCTBLOCK data){
|
---|
1107 | int d00, d01, d10, d11;
|
---|
1108 |
|
---|
1109 | data[0] += 4;
|
---|
1110 | d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE];
|
---|
1111 | d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
|
---|
1112 | d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
|
---|
1113 | d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
|
---|
1114 |
|
---|
1115 | data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
|
---|
1116 | data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
|
---|
1117 | data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
|
---|
1118 | data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
|
---|
1119 | }
|
---|
1120 |
|
---|
1121 | void j_rev_dct1(DCTBLOCK data){
|
---|
1122 | data[0] = (data[0] + 4)>>3;
|
---|
1123 | }
|
---|
1124 |
|
---|
1125 | #undef FIX
|
---|
1126 | #undef CONST_BITS
|
---|