1 |
/* |
2 |
* jrevdct.c |
3 |
* |
4 |
* Copyright (C) 1991, 1992, Thomas G. Lane. |
5 |
* This file is part of the Independent JPEG Group's software. |
6 |
* For conditions of distribution and use, see the accompanying README file. |
7 |
* |
8 |
* This file contains the basic inverse-DCT transformation subroutine. |
9 |
* |
10 |
* This implementation is based on an algorithm described in |
11 |
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT |
12 |
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, |
13 |
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. |
14 |
* The primary algorithm described there uses 11 multiplies and 29 adds. |
15 |
* We use their alternate method with 12 multiplies and 32 adds. |
16 |
* The advantage of this method is that no data path contains more than one |
17 |
* multiplication; this allows a very simple and accurate implementation in |
18 |
* scaled fixed-point arithmetic, with a minimal number of shifts. |
19 |
* |
20 |
* I've made lots of modifications to attempt to take advantage of the |
21 |
* sparse nature of the DCT matrices we're getting. Although the logic |
22 |
* is cumbersome, it's straightforward and the resulting code is much |
23 |
* faster. |
24 |
* |
25 |
* A better way to do this would be to pass in the DCT block as a sparse |
26 |
* matrix, perhaps with the difference cases encoded. |
27 |
*/ |
28 |
|
29 |
#include <memory.h> |
30 |
#include "all.h" |
31 |
#include "ansi.h" |
32 |
#include "dct.h" |
33 |
|
34 |
|
35 |
#define CONST_BITS 13 |
36 |
|
37 |
/* |
38 |
* This routine is specialized to the case DCTSIZE = 8. |
39 |
*/ |
40 |
|
41 |
#if DCTSIZE != 8 |
42 |
Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ |
43 |
#endif |
44 |
|
45 |
|
46 |
/* |
47 |
* A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT |
48 |
* on each column. Direct algorithms are also available, but they are |
49 |
* much more complex and seem not to be any faster when reduced to code. |
50 |
* |
51 |
* The poop on this scaling stuff is as follows: |
52 |
* |
53 |
* Each 1-D IDCT step produces outputs which are a factor of sqrt(N) |
54 |
* larger than the true IDCT outputs. The final outputs are therefore |
55 |
* a factor of N larger than desired; since N=8 this can be cured by |
56 |
* a simple right shift at the end of the algorithm. The advantage of |
57 |
* this arrangement is that we save two multiplications per 1-D IDCT, |
58 |
* because the y0 and y4 inputs need not be divided by sqrt(N). |
59 |
* |
60 |
* We have to do addition and subtraction of the integer inputs, which |
61 |
* is no problem, and multiplication by fractional constants, which is |
62 |
* a problem to do in integer arithmetic. We multiply all the constants |
63 |
* by CONST_SCALE and convert them to integer constants (thus retaining |
64 |
* CONST_BITS bits of precision in the constants). After doing a |
65 |
* multiplication we have to divide the product by CONST_SCALE, with proper |
66 |
* rounding, to produce the correct output. This division can be done |
67 |
* cheaply as a right shift of CONST_BITS bits. We postpone shifting |
68 |
* as long as possible so that partial sums can be added together with |
69 |
* full fractional precision. |
70 |
* |
71 |
* The outputs of the first pass are scaled up by PASS1_BITS bits so that |
72 |
* they are represented to better-than-integral precision. These outputs |
73 |
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word |
74 |
* with the recommended scaling. (To scale up 12-bit sample data further, an |
75 |
* intermediate int32 array would be needed.) |
76 |
* |
77 |
* To avoid overflow of the 32-bit intermediate results in pass 2, we must |
78 |
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis |
79 |
* shows that the values given below are the most effective. |
80 |
*/ |
81 |
|
82 |
#ifdef EIGHT_BIT_SAMPLES |
83 |
#define PASS1_BITS 2 |
84 |
#else |
85 |
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
86 |
#endif |
87 |
|
88 |
#define ONE ((int32) 1) |
89 |
|
90 |
#define CONST_SCALE (ONE << CONST_BITS) |
91 |
|
92 |
/* Convert a positive real constant to an integer scaled by CONST_SCALE. |
93 |
* IMPORTANT: if your compiler doesn't do this arithmetic at compile time, |
94 |
* you will pay a significant penalty in run time. In that case, figure |
95 |
* the correct integer constant values and insert them by hand. |
96 |
*/ |
97 |
|
98 |
/* Actually FIX is no longer used, we precomputed them all */ |
99 |
#define FIX(x) ((int32) ((x) * CONST_SCALE + 0.5)) |
100 |
|
101 |
/* Descale and correctly round an int32 value that's scaled by N bits. |
102 |
* We assume RIGHT_SHIFT rounds towards minus infinity, so adding |
103 |
* the fudge factor is correct for either sign of X. |
104 |
*/ |
105 |
|
106 |
#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) |
107 |
|
108 |
/* Multiply an int32 variable by an int32 constant to yield an int32 result. |
109 |
* For 8-bit samples with the recommended scaling, all the variable |
110 |
* and constant values involved are no more than 16 bits wide, so a |
111 |
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply; |
112 |
* this provides a useful speedup on many machines. |
113 |
* There is no way to specify a 16x16->32 multiply in portable C, but |
114 |
* some C compilers will do the right thing if you provide the correct |
115 |
* combination of casts. |
116 |
* NB: for 12-bit samples, a full 32-bit multiplication will be needed. |
117 |
*/ |
118 |
|
119 |
#ifdef EIGHT_BIT_SAMPLES |
120 |
#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ |
121 |
#define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) |
122 |
#endif |
123 |
#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ |
124 |
#define MULTIPLY(var,const) (((INT16) (var)) * ((int32) (const))) |
125 |
#endif |
126 |
#endif |
127 |
|
128 |
#ifndef MULTIPLY /* default definition */ |
129 |
#define MULTIPLY(var,const) ((var) * (const)) |
130 |
#endif |
131 |
|
132 |
|
133 |
/* |
134 |
Unlike our decoder where we approximate the FIXes, we need to use exact |
135 |
ones here or successive P-frames will drift too much with Reference frame coding |
136 |
*/ |
137 |
#define FIX_0_211164243 1730 |
138 |
#define FIX_0_275899380 2260 |
139 |
#define FIX_0_298631336 2446 |
140 |
#define FIX_0_390180644 3196 |
141 |
#define FIX_0_509795579 4176 |
142 |
#define FIX_0_541196100 4433 |
143 |
#define FIX_0_601344887 4926 |
144 |
#define FIX_0_765366865 6270 |
145 |
#define FIX_0_785694958 6436 |
146 |
#define FIX_0_899976223 7373 |
147 |
#define FIX_1_061594337 8697 |
148 |
#define FIX_1_111140466 9102 |
149 |
#define FIX_1_175875602 9633 |
150 |
#define FIX_1_306562965 10703 |
151 |
#define FIX_1_387039845 11363 |
152 |
#define FIX_1_451774981 11893 |
153 |
#define FIX_1_501321110 12299 |
154 |
#define FIX_1_662939225 13623 |
155 |
#define FIX_1_847759065 15137 |
156 |
#define FIX_1_961570560 16069 |
157 |
#define FIX_2_053119869 16819 |
158 |
#define FIX_2_172734803 17799 |
159 |
#define FIX_2_562915447 20995 |
160 |
#define FIX_3_072711026 25172 |
161 |
|
162 |
/* |
163 |
Switch on reverse_dct choices |
164 |
*/ |
165 |
void reference_rev_dct _ANSI_ARGS_((int16 *block)); |
166 |
void mpeg_jrevdct_quick _ANSI_ARGS_((int16 *block)); |
167 |
void init_idctref _ANSI_ARGS_((void)); |
168 |
|
169 |
extern boolean pureDCT; |
170 |
|
171 |
void |
172 |
mpeg_jrevdct(data) |
173 |
DCTBLOCK data; |
174 |
{ |
175 |
if (pureDCT) reference_rev_dct(data); |
176 |
else mpeg_jrevdct_quick(data); |
177 |
} |
178 |
|
179 |
/* |
180 |
* Perform the inverse DCT on one block of coefficients. |
181 |
*/ |
182 |
|
183 |
void |
184 |
mpeg_jrevdct_quick(data) |
185 |
DCTBLOCK data; |
186 |
{ |
187 |
int32 tmp0, tmp1, tmp2, tmp3; |
188 |
int32 tmp10, tmp11, tmp12, tmp13; |
189 |
int32 z1, z2, z3, z4, z5; |
190 |
int32 d0, d1, d2, d3, d4, d5, d6, d7; |
191 |
register DCTELEM *dataptr; |
192 |
int rowctr; |
193 |
SHIFT_TEMPS |
194 |
|
195 |
/* Pass 1: process rows. */ |
196 |
/* Note results are scaled up by sqrt(8) compared to a true IDCT; */ |
197 |
/* furthermore, we scale the results by 2**PASS1_BITS. */ |
198 |
|
199 |
dataptr = data; |
200 |
|
201 |
for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
202 |
/* Due to quantization, we will usually find that many of the input |
203 |
* coefficients are zero, especially the AC terms. We can exploit this |
204 |
* by short-circuiting the IDCT calculation for any row in which all |
205 |
* the AC terms are zero. In that case each output is equal to the |
206 |
* DC coefficient (with scale factor as needed). |
207 |
* With typical images and quantization tables, half or more of the |
208 |
* row DCT calculations can be simplified this way. |
209 |
*/ |
210 |
|
211 |
register int *idataptr = (int*)dataptr; |
212 |
d0 = dataptr[0]; |
213 |
d1 = dataptr[1]; |
214 |
if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) { |
215 |
/* AC terms all zero */ |
216 |
if (d0) { |
217 |
/* Compute a 32 bit value to assign. */ |
218 |
DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); |
219 |
register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); |
220 |
|
221 |
idataptr[0] = v; |
222 |
idataptr[1] = v; |
223 |
idataptr[2] = v; |
224 |
idataptr[3] = v; |
225 |
} |
226 |
|
227 |
dataptr += DCTSIZE; /* advance pointer to next row */ |
228 |
continue; |
229 |
} |
230 |
d2 = dataptr[2]; |
231 |
d3 = dataptr[3]; |
232 |
d4 = dataptr[4]; |
233 |
d5 = dataptr[5]; |
234 |
d6 = dataptr[6]; |
235 |
d7 = dataptr[7]; |
236 |
|
237 |
/* Even part: reverse the even part of the forward DCT. */ |
238 |
/* The rotator is sqrt(2)*c(-6). */ |
239 |
{ |
240 |
if (d6) { |
241 |
if (d4) { |
242 |
if (d2) { |
243 |
if (d0) { |
244 |
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ |
245 |
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
246 |
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
247 |
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
248 |
|
249 |
tmp0 = (d0 + d4) << CONST_BITS; |
250 |
tmp1 = (d0 - d4) << CONST_BITS; |
251 |
|
252 |
tmp10 = tmp0 + tmp3; |
253 |
tmp13 = tmp0 - tmp3; |
254 |
tmp11 = tmp1 + tmp2; |
255 |
tmp12 = tmp1 - tmp2; |
256 |
} else { |
257 |
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ |
258 |
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
259 |
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
260 |
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
261 |
|
262 |
tmp0 = d4 << CONST_BITS; |
263 |
|
264 |
tmp10 = tmp0 + tmp3; |
265 |
tmp13 = tmp0 - tmp3; |
266 |
tmp11 = tmp2 - tmp0; |
267 |
tmp12 = -(tmp0 + tmp2); |
268 |
} |
269 |
} else { |
270 |
if (d0) { |
271 |
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ |
272 |
tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
273 |
tmp3 = MULTIPLY(d6, FIX_0_541196100); |
274 |
|
275 |
tmp0 = (d0 + d4) << CONST_BITS; |
276 |
tmp1 = (d0 - d4) << CONST_BITS; |
277 |
|
278 |
tmp10 = tmp0 + tmp3; |
279 |
tmp13 = tmp0 - tmp3; |
280 |
tmp11 = tmp1 + tmp2; |
281 |
tmp12 = tmp1 - tmp2; |
282 |
} else { |
283 |
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ |
284 |
tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
285 |
tmp3 = MULTIPLY(d6, FIX_0_541196100); |
286 |
|
287 |
tmp0 = d4 << CONST_BITS; |
288 |
|
289 |
tmp10 = tmp0 + tmp3; |
290 |
tmp13 = tmp0 - tmp3; |
291 |
tmp11 = tmp2 - tmp0; |
292 |
tmp12 = -(tmp0 + tmp2); |
293 |
} |
294 |
} |
295 |
} else { |
296 |
if (d2) { |
297 |
if (d0) { |
298 |
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ |
299 |
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
300 |
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
301 |
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
302 |
|
303 |
tmp0 = d0 << CONST_BITS; |
304 |
|
305 |
tmp10 = tmp0 + tmp3; |
306 |
tmp13 = tmp0 - tmp3; |
307 |
tmp11 = tmp0 + tmp2; |
308 |
tmp12 = tmp0 - tmp2; |
309 |
} else { |
310 |
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ |
311 |
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
312 |
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
313 |
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
314 |
|
315 |
tmp10 = tmp3; |
316 |
tmp13 = -tmp3; |
317 |
tmp11 = tmp2; |
318 |
tmp12 = -tmp2; |
319 |
} |
320 |
} else { |
321 |
if (d0) { |
322 |
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ |
323 |
tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
324 |
tmp3 = MULTIPLY(d6, FIX_0_541196100); |
325 |
|
326 |
tmp0 = d0 << CONST_BITS; |
327 |
|
328 |
tmp10 = tmp0 + tmp3; |
329 |
tmp13 = tmp0 - tmp3; |
330 |
tmp11 = tmp0 + tmp2; |
331 |
tmp12 = tmp0 - tmp2; |
332 |
} else { |
333 |
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ |
334 |
tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
335 |
tmp3 = MULTIPLY(d6, FIX_0_541196100); |
336 |
|
337 |
tmp10 = tmp3; |
338 |
tmp13 = -tmp3; |
339 |
tmp11 = tmp2; |
340 |
tmp12 = -tmp2; |
341 |
} |
342 |
} |
343 |
} |
344 |
} else { |
345 |
if (d4) { |
346 |
if (d2) { |
347 |
if (d0) { |
348 |
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ |
349 |
tmp2 = MULTIPLY(d2, FIX_0_541196100); |
350 |
tmp3 = MULTIPLY(d2, FIX_1_306562965); |
351 |
|
352 |
tmp0 = (d0 + d4) << CONST_BITS; |
353 |
tmp1 = (d0 - d4) << CONST_BITS; |
354 |
|
355 |
tmp10 = tmp0 + tmp3; |
356 |
tmp13 = tmp0 - tmp3; |
357 |
tmp11 = tmp1 + tmp2; |
358 |
tmp12 = tmp1 - tmp2; |
359 |
} else { |
360 |
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ |
361 |
tmp2 = MULTIPLY(d2, FIX_0_541196100); |
362 |
tmp3 = MULTIPLY(d2, FIX_1_306562965); |
363 |
|
364 |
tmp0 = d4 << CONST_BITS; |
365 |
|
366 |
tmp10 = tmp0 + tmp3; |
367 |
tmp13 = tmp0 - tmp3; |
368 |
tmp11 = tmp2 - tmp0; |
369 |
tmp12 = -(tmp0 + tmp2); |
370 |
} |
371 |
} else { |
372 |
if (d0) { |
373 |
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ |
374 |
tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
375 |
tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
376 |
} else { |
377 |
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ |
378 |
tmp10 = tmp13 = d4 << CONST_BITS; |
379 |
tmp11 = tmp12 = -tmp10; |
380 |
} |
381 |
} |
382 |
} else { |
383 |
if (d2) { |
384 |
if (d0) { |
385 |
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ |
386 |
tmp2 = MULTIPLY(d2, FIX_0_541196100); |
387 |
tmp3 = MULTIPLY(d2, FIX_1_306562965); |
388 |
|
389 |
tmp0 = d0 << CONST_BITS; |
390 |
|
391 |
tmp10 = tmp0 + tmp3; |
392 |
tmp13 = tmp0 - tmp3; |
393 |
tmp11 = tmp0 + tmp2; |
394 |
tmp12 = tmp0 - tmp2; |
395 |
} else { |
396 |
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ |
397 |
tmp2 = MULTIPLY(d2, FIX_0_541196100); |
398 |
tmp3 = MULTIPLY(d2, FIX_1_306562965); |
399 |
|
400 |
tmp10 = tmp3; |
401 |
tmp13 = -tmp3; |
402 |
tmp11 = tmp2; |
403 |
tmp12 = -tmp2; |
404 |
} |
405 |
} else { |
406 |
if (d0) { |
407 |
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ |
408 |
tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; |
409 |
} else { |
410 |
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ |
411 |
tmp10 = tmp13 = tmp11 = tmp12 = 0; |
412 |
} |
413 |
} |
414 |
} |
415 |
} |
416 |
|
417 |
/* Odd part per figure 8; the matrix is unitary and hence its |
418 |
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
419 |
*/ |
420 |
|
421 |
if (d7) { |
422 |
if (d5) { |
423 |
if (d3) { |
424 |
if (d1) { |
425 |
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ |
426 |
z1 = d7 + d1; |
427 |
z2 = d5 + d3; |
428 |
z3 = d7 + d3; |
429 |
z4 = d5 + d1; |
430 |
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
431 |
|
432 |
tmp0 = MULTIPLY(d7, FIX_0_298631336); |
433 |
tmp1 = MULTIPLY(d5, FIX_2_053119869); |
434 |
tmp2 = MULTIPLY(d3, FIX_3_072711026); |
435 |
tmp3 = MULTIPLY(d1, FIX_1_501321110); |
436 |
z1 = MULTIPLY(-z1, FIX_0_899976223); |
437 |
z2 = MULTIPLY(-z2, FIX_2_562915447); |
438 |
z3 = MULTIPLY(-z3, FIX_1_961570560); |
439 |
z4 = MULTIPLY(-z4, FIX_0_390180644); |
440 |
|
441 |
z3 += z5; |
442 |
z4 += z5; |
443 |
|
444 |
tmp0 += z1 + z3; |
445 |
tmp1 += z2 + z4; |
446 |
tmp2 += z2 + z3; |
447 |
tmp3 += z1 + z4; |
448 |
} else { |
449 |
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ |
450 |
z2 = d5 + d3; |
451 |
z3 = d7 + d3; |
452 |
z5 = MULTIPLY(z3 + d5, FIX_1_175875602); |
453 |
|
454 |
tmp0 = MULTIPLY(d7, FIX_0_298631336); |
455 |
tmp1 = MULTIPLY(d5, FIX_2_053119869); |
456 |
tmp2 = MULTIPLY(d3, FIX_3_072711026); |
457 |
z1 = MULTIPLY(-d7, FIX_0_899976223); |
458 |
z2 = MULTIPLY(-z2, FIX_2_562915447); |
459 |
z3 = MULTIPLY(-z3, FIX_1_961570560); |
460 |
z4 = MULTIPLY(-d5, FIX_0_390180644); |
461 |
|
462 |
z3 += z5; |
463 |
z4 += z5; |
464 |
|
465 |
tmp0 += z1 + z3; |
466 |
tmp1 += z2 + z4; |
467 |
tmp2 += z2 + z3; |
468 |
tmp3 = z1 + z4; |
469 |
} |
470 |
} else { |
471 |
if (d1) { |
472 |
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ |
473 |
z1 = d7 + d1; |
474 |
z4 = d5 + d1; |
475 |
z5 = MULTIPLY(d7 + z4, FIX_1_175875602); |
476 |
|
477 |
tmp0 = MULTIPLY(d7, FIX_0_298631336); |
478 |
tmp1 = MULTIPLY(d5, FIX_2_053119869); |
479 |
tmp3 = MULTIPLY(d1, FIX_1_501321110); |
480 |
z1 = MULTIPLY(-z1, FIX_0_899976223); |
481 |
z2 = MULTIPLY(-d5, FIX_2_562915447); |
482 |
z3 = MULTIPLY(-d7, FIX_1_961570560); |
483 |
z4 = MULTIPLY(-z4, FIX_0_390180644); |
484 |
|
485 |
z3 += z5; |
486 |
z4 += z5; |
487 |
|
488 |
tmp0 += z1 + z3; |
489 |
tmp1 += z2 + z4; |
490 |
tmp2 = z2 + z3; |
491 |
tmp3 += z1 + z4; |
492 |
} else { |
493 |
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ |
494 |
tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
495 |
z1 = MULTIPLY(-d7, FIX_0_899976223); |
496 |
z3 = MULTIPLY(-d7, FIX_1_961570560); |
497 |
tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
498 |
z2 = MULTIPLY(-d5, FIX_2_562915447); |
499 |
z4 = MULTIPLY(-d5, FIX_0_390180644); |
500 |
z5 = MULTIPLY(d5 + d7, FIX_1_175875602); |
501 |
|
502 |
z3 += z5; |
503 |
z4 += z5; |
504 |
|
505 |
tmp0 += z3; |
506 |
tmp1 += z4; |
507 |
tmp2 = z2 + z3; |
508 |
tmp3 = z1 + z4; |
509 |
} |
510 |
} |
511 |
} else { |
512 |
if (d3) { |
513 |
if (d1) { |
514 |
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ |
515 |
z1 = d7 + d1; |
516 |
z3 = d7 + d3; |
517 |
z5 = MULTIPLY(z3 + d1, FIX_1_175875602); |
518 |
|
519 |
tmp0 = MULTIPLY(d7, FIX_0_298631336); |
520 |
tmp2 = MULTIPLY(d3, FIX_3_072711026); |
521 |
tmp3 = MULTIPLY(d1, FIX_1_501321110); |
522 |
z1 = MULTIPLY(-z1, FIX_0_899976223); |
523 |
z2 = MULTIPLY(-d3, FIX_2_562915447); |
524 |
z3 = MULTIPLY(-z3, FIX_1_961570560); |
525 |
z4 = MULTIPLY(-d1, FIX_0_390180644); |
526 |
|
527 |
z3 += z5; |
528 |
z4 += z5; |
529 |
|
530 |
tmp0 += z1 + z3; |
531 |
tmp1 = z2 + z4; |
532 |
tmp2 += z2 + z3; |
533 |
tmp3 += z1 + z4; |
534 |
} else { |
535 |
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ |
536 |
z3 = d7 + d3; |
537 |
|
538 |
tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
539 |
z1 = MULTIPLY(-d7, FIX_0_899976223); |
540 |
tmp2 = MULTIPLY(d3, FIX_0_509795579); |
541 |
z2 = MULTIPLY(-d3, FIX_2_562915447); |
542 |
z5 = MULTIPLY(z3, FIX_1_175875602); |
543 |
z3 = MULTIPLY(-z3, FIX_0_785694958); |
544 |
|
545 |
tmp0 += z3; |
546 |
tmp1 = z2 + z5; |
547 |
tmp2 += z3; |
548 |
tmp3 = z1 + z5; |
549 |
} |
550 |
} else { |
551 |
if (d1) { |
552 |
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ |
553 |
z1 = d7 + d1; |
554 |
z5 = MULTIPLY(z1, FIX_1_175875602); |
555 |
|
556 |
z1 = MULTIPLY(z1, FIX_0_275899380); |
557 |
z3 = MULTIPLY(-d7, FIX_1_961570560); |
558 |
tmp0 = MULTIPLY(-d7, FIX_1_662939225); |
559 |
z4 = MULTIPLY(-d1, FIX_0_390180644); |
560 |
tmp3 = MULTIPLY(d1, FIX_1_111140466); |
561 |
|
562 |
tmp0 += z1; |
563 |
tmp1 = z4 + z5; |
564 |
tmp2 = z3 + z5; |
565 |
tmp3 += z1; |
566 |
} else { |
567 |
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ |
568 |
tmp0 = MULTIPLY(-d7, FIX_1_387039845); |
569 |
tmp1 = MULTIPLY(d7, FIX_1_175875602); |
570 |
tmp2 = MULTIPLY(-d7, FIX_0_785694958); |
571 |
tmp3 = MULTIPLY(d7, FIX_0_275899380); |
572 |
} |
573 |
} |
574 |
} |
575 |
} else { |
576 |
if (d5) { |
577 |
if (d3) { |
578 |
if (d1) { |
579 |
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ |
580 |
z2 = d5 + d3; |
581 |
z4 = d5 + d1; |
582 |
z5 = MULTIPLY(d3 + z4, FIX_1_175875602); |
583 |
|
584 |
tmp1 = MULTIPLY(d5, FIX_2_053119869); |
585 |
tmp2 = MULTIPLY(d3, FIX_3_072711026); |
586 |
tmp3 = MULTIPLY(d1, FIX_1_501321110); |
587 |
z1 = MULTIPLY(-d1, FIX_0_899976223); |
588 |
z2 = MULTIPLY(-z2, FIX_2_562915447); |
589 |
z3 = MULTIPLY(-d3, FIX_1_961570560); |
590 |
z4 = MULTIPLY(-z4, FIX_0_390180644); |
591 |
|
592 |
z3 += z5; |
593 |
z4 += z5; |
594 |
|
595 |
tmp0 = z1 + z3; |
596 |
tmp1 += z2 + z4; |
597 |
tmp2 += z2 + z3; |
598 |
tmp3 += z1 + z4; |
599 |
} else { |
600 |
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ |
601 |
z2 = d5 + d3; |
602 |
|
603 |
z5 = MULTIPLY(z2, FIX_1_175875602); |
604 |
tmp1 = MULTIPLY(d5, FIX_1_662939225); |
605 |
z4 = MULTIPLY(-d5, FIX_0_390180644); |
606 |
z2 = MULTIPLY(-z2, FIX_1_387039845); |
607 |
tmp2 = MULTIPLY(d3, FIX_1_111140466); |
608 |
z3 = MULTIPLY(-d3, FIX_1_961570560); |
609 |
|
610 |
tmp0 = z3 + z5; |
611 |
tmp1 += z2; |
612 |
tmp2 += z2; |
613 |
tmp3 = z4 + z5; |
614 |
} |
615 |
} else { |
616 |
if (d1) { |
617 |
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ |
618 |
z4 = d5 + d1; |
619 |
|
620 |
z5 = MULTIPLY(z4, FIX_1_175875602); |
621 |
z1 = MULTIPLY(-d1, FIX_0_899976223); |
622 |
tmp3 = MULTIPLY(d1, FIX_0_601344887); |
623 |
tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
624 |
z2 = MULTIPLY(-d5, FIX_2_562915447); |
625 |
z4 = MULTIPLY(z4, FIX_0_785694958); |
626 |
|
627 |
tmp0 = z1 + z5; |
628 |
tmp1 += z4; |
629 |
tmp2 = z2 + z5; |
630 |
tmp3 += z4; |
631 |
} else { |
632 |
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ |
633 |
tmp0 = MULTIPLY(d5, FIX_1_175875602); |
634 |
tmp1 = MULTIPLY(d5, FIX_0_275899380); |
635 |
tmp2 = MULTIPLY(-d5, FIX_1_387039845); |
636 |
tmp3 = MULTIPLY(d5, FIX_0_785694958); |
637 |
} |
638 |
} |
639 |
} else { |
640 |
if (d3) { |
641 |
if (d1) { |
642 |
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ |
643 |
z5 = d1 + d3; |
644 |
tmp3 = MULTIPLY(d1, FIX_0_211164243); |
645 |
tmp2 = MULTIPLY(-d3, FIX_1_451774981); |
646 |
z1 = MULTIPLY(d1, FIX_1_061594337); |
647 |
z2 = MULTIPLY(-d3, FIX_2_172734803); |
648 |
z4 = MULTIPLY(z5, FIX_0_785694958); |
649 |
z5 = MULTIPLY(z5, FIX_1_175875602); |
650 |
|
651 |
tmp0 = z1 - z4; |
652 |
tmp1 = z2 + z4; |
653 |
tmp2 += z5; |
654 |
tmp3 += z5; |
655 |
} else { |
656 |
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ |
657 |
tmp0 = MULTIPLY(-d3, FIX_0_785694958); |
658 |
tmp1 = MULTIPLY(-d3, FIX_1_387039845); |
659 |
tmp2 = MULTIPLY(-d3, FIX_0_275899380); |
660 |
tmp3 = MULTIPLY(d3, FIX_1_175875602); |
661 |
} |
662 |
} else { |
663 |
if (d1) { |
664 |
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ |
665 |
tmp0 = MULTIPLY(d1, FIX_0_275899380); |
666 |
tmp1 = MULTIPLY(d1, FIX_0_785694958); |
667 |
tmp2 = MULTIPLY(d1, FIX_1_175875602); |
668 |
tmp3 = MULTIPLY(d1, FIX_1_387039845); |
669 |
} else { |
670 |
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ |
671 |
tmp0 = tmp1 = tmp2 = tmp3 = 0; |
672 |
} |
673 |
} |
674 |
} |
675 |
} |
676 |
} |
677 |
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
678 |
|
679 |
dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); |
680 |
dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); |
681 |
dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); |
682 |
dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); |
683 |
dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); |
684 |
dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); |
685 |
dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); |
686 |
dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); |
687 |
|
688 |
dataptr += DCTSIZE; /* advance pointer to next row */ |
689 |
} |
690 |
|
691 |
/* Pass 2: process columns. */ |
692 |
/* Note that we must descale the results by a factor of 8 == 2**3, */ |
693 |
/* and also undo the PASS1_BITS scaling. */ |
694 |
|
695 |
dataptr = data; |
696 |
for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
697 |
/* Columns of zeroes can be exploited in the same way as we did with rows. |
698 |
* However, the row calculation has created many nonzero AC terms, so the |
699 |
* simplification applies less often (typically 5% to 10% of the time). |
700 |
* On machines with very fast multiplication, it's possible that the |
701 |
* test takes more time than it's worth. In that case this section |
702 |
* may be commented out. |
703 |
*/ |
704 |
|
705 |
d0 = dataptr[DCTSIZE*0]; |
706 |
d1 = dataptr[DCTSIZE*1]; |
707 |
d2 = dataptr[DCTSIZE*2]; |
708 |
d3 = dataptr[DCTSIZE*3]; |
709 |
d4 = dataptr[DCTSIZE*4]; |
710 |
d5 = dataptr[DCTSIZE*5]; |
711 |
d6 = dataptr[DCTSIZE*6]; |
712 |
d7 = dataptr[DCTSIZE*7]; |
713 |
|
714 |
/* Even part: reverse the even part of the forward DCT. */ |
715 |
/* The rotator is sqrt(2)*c(-6). */ |
716 |
if (d6) { |
717 |
if (d4) { |
718 |
if (d2) { |
719 |
if (d0) { |
720 |
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ |
721 |
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
722 |
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
723 |
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
724 |
|
725 |
tmp0 = (d0 + d4) << CONST_BITS; |
726 |
tmp1 = (d0 - d4) << CONST_BITS; |
727 |
|
728 |
tmp10 = tmp0 + tmp3; |
729 |
tmp13 = tmp0 - tmp3; |
730 |
tmp11 = tmp1 + tmp2; |
731 |
tmp12 = tmp1 - tmp2; |
732 |
} else { |
733 |
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ |
734 |
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
735 |
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
736 |
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
737 |
|
738 |
tmp0 = d4 << CONST_BITS; |
739 |
|
740 |
tmp10 = tmp0 + tmp3; |
741 |
tmp13 = tmp0 - tmp3; |
742 |
tmp11 = tmp2 - tmp0; |
743 |
tmp12 = -(tmp0 + tmp2); |
744 |
} |
745 |
} else { |
746 |
if (d0) { |
747 |
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ |
748 |
tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
749 |
tmp3 = MULTIPLY(d6, FIX_0_541196100); |
750 |
|
751 |
tmp0 = (d0 + d4) << CONST_BITS; |
752 |
tmp1 = (d0 - d4) << CONST_BITS; |
753 |
|
754 |
tmp10 = tmp0 + tmp3; |
755 |
tmp13 = tmp0 - tmp3; |
756 |
tmp11 = tmp1 + tmp2; |
757 |
tmp12 = tmp1 - tmp2; |
758 |
} else { |
759 |
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ |
760 |
tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
761 |
tmp3 = MULTIPLY(d6, FIX_0_541196100); |
762 |
|
763 |
tmp0 = d4 << CONST_BITS; |
764 |
|
765 |
tmp10 = tmp0 + tmp3; |
766 |
tmp13 = tmp0 - tmp3; |
767 |
tmp11 = tmp2 - tmp0; |
768 |
tmp12 = -(tmp0 + tmp2); |
769 |
} |
770 |
} |
771 |
} else { |
772 |
if (d2) { |
773 |
if (d0) { |
774 |
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ |
775 |
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
776 |
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
777 |
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
778 |
|
779 |
tmp0 = d0 << CONST_BITS; |
780 |
|
781 |
tmp10 = tmp0 + tmp3; |
782 |
tmp13 = tmp0 - tmp3; |
783 |
tmp11 = tmp0 + tmp2; |
784 |
tmp12 = tmp0 - tmp2; |
785 |
} else { |
786 |
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ |
787 |
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
788 |
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
789 |
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
790 |
|
791 |
tmp10 = tmp3; |
792 |
tmp13 = -tmp3; |
793 |
tmp11 = tmp2; |
794 |
tmp12 = -tmp2; |
795 |
} |
796 |
} else { |
797 |
if (d0) { |
798 |
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ |
799 |
tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
800 |
tmp3 = MULTIPLY(d6, FIX_0_541196100); |
801 |
|
802 |
tmp0 = d0 << CONST_BITS; |
803 |
|
804 |
tmp10 = tmp0 + tmp3; |
805 |
tmp13 = tmp0 - tmp3; |
806 |
tmp11 = tmp0 + tmp2; |
807 |
tmp12 = tmp0 - tmp2; |
808 |
} else { |
809 |
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ |
810 |
tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
811 |
tmp3 = MULTIPLY(d6, FIX_0_541196100); |
812 |
|
813 |
tmp10 = tmp3; |
814 |
tmp13 = -tmp3; |
815 |
tmp11 = tmp2; |
816 |
tmp12 = -tmp2; |
817 |
} |
818 |
} |
819 |
} |
820 |
} else { |
821 |
if (d4) { |
822 |
if (d2) { |
823 |
if (d0) { |
824 |
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ |
825 |
tmp2 = MULTIPLY(d2, FIX_0_541196100); |
826 |
tmp3 = MULTIPLY(d2, FIX_1_306562965); |
827 |
|
828 |
tmp0 = (d0 + d4) << CONST_BITS; |
829 |
tmp1 = (d0 - d4) << CONST_BITS; |
830 |
|
831 |
tmp10 = tmp0 + tmp3; |
832 |
tmp13 = tmp0 - tmp3; |
833 |
tmp11 = tmp1 + tmp2; |
834 |
tmp12 = tmp1 - tmp2; |
835 |
} else { |
836 |
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ |
837 |
tmp2 = MULTIPLY(d2, FIX_0_541196100); |
838 |
tmp3 = MULTIPLY(d2, FIX_1_306562965); |
839 |
|
840 |
tmp0 = d4 << CONST_BITS; |
841 |
|
842 |
tmp10 = tmp0 + tmp3; |
843 |
tmp13 = tmp0 - tmp3; |
844 |
tmp11 = tmp2 - tmp0; |
845 |
tmp12 = -(tmp0 + tmp2); |
846 |
} |
847 |
} else { |
848 |
if (d0) { |
849 |
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ |
850 |
tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
851 |
tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
852 |
} else { |
853 |
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ |
854 |
tmp10 = tmp13 = d4 << CONST_BITS; |
855 |
tmp11 = tmp12 = -tmp10; |
856 |
} |
857 |
} |
858 |
} else { |
859 |
if (d2) { |
860 |
if (d0) { |
861 |
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ |
862 |
tmp2 = MULTIPLY(d2, FIX_0_541196100); |
863 |
tmp3 = MULTIPLY(d2, FIX_1_306562965); |
864 |
|
865 |
tmp0 = d0 << CONST_BITS; |
866 |
|
867 |
tmp10 = tmp0 + tmp3; |
868 |
tmp13 = tmp0 - tmp3; |
869 |
tmp11 = tmp0 + tmp2; |
870 |
tmp12 = tmp0 - tmp2; |
871 |
} else { |
872 |
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ |
873 |
tmp2 = MULTIPLY(d2, FIX_0_541196100); |
874 |
tmp3 = MULTIPLY(d2, FIX_1_306562965); |
875 |
|
876 |
tmp10 = tmp3; |
877 |
tmp13 = -tmp3; |
878 |
tmp11 = tmp2; |
879 |
tmp12 = -tmp2; |
880 |
} |
881 |
} else { |
882 |
if (d0) { |
883 |
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ |
884 |
tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; |
885 |
} else { |
886 |
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ |
887 |
tmp10 = tmp13 = tmp11 = tmp12 = 0; |
888 |
} |
889 |
} |
890 |
} |
891 |
} |
892 |
|
893 |
/* Odd part per figure 8; the matrix is unitary and hence its |
894 |
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
895 |
*/ |
896 |
if (d7) { |
897 |
if (d5) { |
898 |
if (d3) { |
899 |
if (d1) { |
900 |
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ |
901 |
z1 = d7 + d1; |
902 |
z2 = d5 + d3; |
903 |
z3 = d7 + d3; |
904 |
z4 = d5 + d1; |
905 |
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
906 |
|
907 |
tmp0 = MULTIPLY(d7, FIX_0_298631336); |
908 |
tmp1 = MULTIPLY(d5, FIX_2_053119869); |
909 |
tmp2 = MULTIPLY(d3, FIX_3_072711026); |
910 |
tmp3 = MULTIPLY(d1, FIX_1_501321110); |
911 |
z1 = MULTIPLY(-z1, FIX_0_899976223); |
912 |
z2 = MULTIPLY(-z2, FIX_2_562915447); |
913 |
z3 = MULTIPLY(-z3, FIX_1_961570560); |
914 |
z4 = MULTIPLY(-z4, FIX_0_390180644); |
915 |
|
916 |
z3 += z5; |
917 |
z4 += z5; |
918 |
|
919 |
tmp0 += z1 + z3; |
920 |
tmp1 += z2 + z4; |
921 |
tmp2 += z2 + z3; |
922 |
tmp3 += z1 + z4; |
923 |
} else { |
924 |
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ |
925 |
z1 = d7; |
926 |
z2 = d5 + d3; |
927 |
z3 = d7 + d3; |
928 |
z5 = MULTIPLY(z3 + d5, FIX_1_175875602); |
929 |
|
930 |
tmp0 = MULTIPLY(d7, FIX_0_298631336); |
931 |
tmp1 = MULTIPLY(d5, FIX_2_053119869); |
932 |
tmp2 = MULTIPLY(d3, FIX_3_072711026); |
933 |
z1 = MULTIPLY(-d7, FIX_0_899976223); |
934 |
z2 = MULTIPLY(-z2, FIX_2_562915447); |
935 |
z3 = MULTIPLY(-z3, FIX_1_961570560); |
936 |
z4 = MULTIPLY(-d5, FIX_0_390180644); |
937 |
|
938 |
z3 += z5; |
939 |
z4 += z5; |
940 |
|
941 |
tmp0 += z1 + z3; |
942 |
tmp1 += z2 + z4; |
943 |
tmp2 += z2 + z3; |
944 |
tmp3 = z1 + z4; |
945 |
} |
946 |
} else { |
947 |
if (d1) { |
948 |
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ |
949 |
z1 = d7 + d1; |
950 |
z2 = d5; |
951 |
z3 = d7; |
952 |
z4 = d5 + d1; |
953 |
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
954 |
|
955 |
tmp0 = MULTIPLY(d7, FIX_0_298631336); |
956 |
tmp1 = MULTIPLY(d5, FIX_2_053119869); |
957 |
tmp3 = MULTIPLY(d1, FIX_1_501321110); |
958 |
z1 = MULTIPLY(-z1, FIX_0_899976223); |
959 |
z2 = MULTIPLY(-d5, FIX_2_562915447); |
960 |
z3 = MULTIPLY(-d7, FIX_1_961570560); |
961 |
z4 = MULTIPLY(-z4, FIX_0_390180644); |
962 |
|
963 |
z3 += z5; |
964 |
z4 += z5; |
965 |
|
966 |
tmp0 += z1 + z3; |
967 |
tmp1 += z2 + z4; |
968 |
tmp2 = z2 + z3; |
969 |
tmp3 += z1 + z4; |
970 |
} else { |
971 |
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ |
972 |
tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
973 |
z1 = MULTIPLY(-d7, FIX_0_899976223); |
974 |
z3 = MULTIPLY(-d7, FIX_1_961570560); |
975 |
tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
976 |
z2 = MULTIPLY(-d5, FIX_2_562915447); |
977 |
z4 = MULTIPLY(-d5, FIX_0_390180644); |
978 |
z5 = MULTIPLY(d5 + d7, FIX_1_175875602); |
979 |
|
980 |
z3 += z5; |
981 |
z4 += z5; |
982 |
|
983 |
tmp0 += z3; |
984 |
tmp1 += z4; |
985 |
tmp2 = z2 + z3; |
986 |
tmp3 = z1 + z4; |
987 |
} |
988 |
} |
989 |
} else { |
990 |
if (d3) { |
991 |
if (d1) { |
992 |
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ |
993 |
z1 = d7 + d1; |
994 |
z3 = d7 + d3; |
995 |
z5 = MULTIPLY(z3 + d1, FIX_1_175875602); |
996 |
|
997 |
tmp0 = MULTIPLY(d7, FIX_0_298631336); |
998 |
tmp2 = MULTIPLY(d3, FIX_3_072711026); |
999 |
tmp3 = MULTIPLY(d1, FIX_1_501321110); |
1000 |
z1 = MULTIPLY(-z1, FIX_0_899976223); |
1001 |
z2 = MULTIPLY(-d3, FIX_2_562915447); |
1002 |
z3 = MULTIPLY(-z3, FIX_1_961570560); |
1003 |
z4 = MULTIPLY(-d1, FIX_0_390180644); |
1004 |
|
1005 |
z3 += z5; |
1006 |
z4 += z5; |
1007 |
|
1008 |
tmp0 += z1 + z3; |
1009 |
tmp1 = z2 + z4; |
1010 |
tmp2 += z2 + z3; |
1011 |
tmp3 += z1 + z4; |
1012 |
} else { |
1013 |
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ |
1014 |
z3 = d7 + d3; |
1015 |
|
1016 |
tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
1017 |
z1 = MULTIPLY(-d7, FIX_0_899976223); |
1018 |
tmp2 = MULTIPLY(d3, FIX_0_509795579); |
1019 |
z2 = MULTIPLY(-d3, FIX_2_562915447); |
1020 |
z5 = MULTIPLY(z3, FIX_1_175875602); |
1021 |
z3 = MULTIPLY(-z3, FIX_0_785694958); |
1022 |
|
1023 |
tmp0 += z3; |
1024 |
tmp1 = z2 + z5; |
1025 |
tmp2 += z3; |
1026 |
tmp3 = z1 + z5; |
1027 |
} |
1028 |
} else { |
1029 |
if (d1) { |
1030 |
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ |
1031 |
z1 = d7 + d1; |
1032 |
z5 = MULTIPLY(z1, FIX_1_175875602); |
1033 |
|
1034 |
z1 = MULTIPLY(z1, FIX_0_275899380); |
1035 |
z3 = MULTIPLY(-d7, FIX_1_961570560); |
1036 |
tmp0 = MULTIPLY(-d7, FIX_1_662939225); |
1037 |
z4 = MULTIPLY(-d1, FIX_0_390180644); |
1038 |
tmp3 = MULTIPLY(d1, FIX_1_111140466); |
1039 |
|
1040 |
tmp0 += z1; |
1041 |
tmp1 = z4 + z5; |
1042 |
tmp2 = z3 + z5; |
1043 |
tmp3 += z1; |
1044 |
} else { |
1045 |
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ |
1046 |
tmp0 = MULTIPLY(-d7, FIX_1_387039845); |
1047 |
tmp1 = MULTIPLY(d7, FIX_1_175875602); |
1048 |
tmp2 = MULTIPLY(-d7, FIX_0_785694958); |
1049 |
tmp3 = MULTIPLY(d7, FIX_0_275899380); |
1050 |
} |
1051 |
} |
1052 |
} |
1053 |
} else { |
1054 |
if (d5) { |
1055 |
if (d3) { |
1056 |
if (d1) { |
1057 |
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ |
1058 |
z2 = d5 + d3; |
1059 |
z4 = d5 + d1; |
1060 |
z5 = MULTIPLY(d3 + z4, FIX_1_175875602); |
1061 |
|
1062 |
tmp1 = MULTIPLY(d5, FIX_2_053119869); |
1063 |
tmp2 = MULTIPLY(d3, FIX_3_072711026); |
1064 |
tmp3 = MULTIPLY(d1, FIX_1_501321110); |
1065 |
z1 = MULTIPLY(-d1, FIX_0_899976223); |
1066 |
z2 = MULTIPLY(-z2, FIX_2_562915447); |
1067 |
z3 = MULTIPLY(-d3, FIX_1_961570560); |
1068 |
z4 = MULTIPLY(-z4, FIX_0_390180644); |
1069 |
|
1070 |
z3 += z5; |
1071 |
z4 += z5; |
1072 |
|
1073 |
tmp0 = z1 + z3; |
1074 |
tmp1 += z2 + z4; |
1075 |
tmp2 += z2 + z3; |
1076 |
tmp3 += z1 + z4; |
1077 |
} else { |
1078 |
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ |
1079 |
z2 = d5 + d3; |
1080 |
|
1081 |
z5 = MULTIPLY(z2, FIX_1_175875602); |
1082 |
tmp1 = MULTIPLY(d5, FIX_1_662939225); |
1083 |
z4 = MULTIPLY(-d5, FIX_0_390180644); |
1084 |
z2 = MULTIPLY(-z2, FIX_1_387039845); |
1085 |
tmp2 = MULTIPLY(d3, FIX_1_111140466); |
1086 |
z3 = MULTIPLY(-d3, FIX_1_961570560); |
1087 |
|
1088 |
tmp0 = z3 + z5; |
1089 |
tmp1 += z2; |
1090 |
tmp2 += z2; |
1091 |
tmp3 = z4 + z5; |
1092 |
} |
1093 |
} else { |
1094 |
if (d1) { |
1095 |
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ |
1096 |
z4 = d5 + d1; |
1097 |
|
1098 |
z5 = MULTIPLY(z4, FIX_1_175875602); |
1099 |
z1 = MULTIPLY(-d1, FIX_0_899976223); |
1100 |
tmp3 = MULTIPLY(d1, FIX_0_601344887); |
1101 |
tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
1102 |
z2 = MULTIPLY(-d5, FIX_2_562915447); |
1103 |
z4 = MULTIPLY(z4, FIX_0_785694958); |
1104 |
|
1105 |
tmp0 = z1 + z5; |
1106 |
tmp1 += z4; |
1107 |
tmp2 = z2 + z5; |
1108 |
tmp3 += z4; |
1109 |
} else { |
1110 |
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ |
1111 |
tmp0 = MULTIPLY(d5, FIX_1_175875602); |
1112 |
tmp1 = MULTIPLY(d5, FIX_0_275899380); |
1113 |
tmp2 = MULTIPLY(-d5, FIX_1_387039845); |
1114 |
tmp3 = MULTIPLY(d5, FIX_0_785694958); |
1115 |
} |
1116 |
} |
1117 |
} else { |
1118 |
if (d3) { |
1119 |
if (d1) { |
1120 |
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ |
1121 |
z5 = d1 + d3; |
1122 |
tmp3 = MULTIPLY(d1, FIX_0_211164243); |
1123 |
tmp2 = MULTIPLY(-d3, FIX_1_451774981); |
1124 |
z1 = MULTIPLY(d1, FIX_1_061594337); |
1125 |
z2 = MULTIPLY(-d3, FIX_2_172734803); |
1126 |
z4 = MULTIPLY(z5, FIX_0_785694958); |
1127 |
z5 = MULTIPLY(z5, FIX_1_175875602); |
1128 |
|
1129 |
tmp0 = z1 - z4; |
1130 |
tmp1 = z2 + z4; |
1131 |
tmp2 += z5; |
1132 |
tmp3 += z5; |
1133 |
} else { |
1134 |
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ |
1135 |
tmp0 = MULTIPLY(-d3, FIX_0_785694958); |
1136 |
tmp1 = MULTIPLY(-d3, FIX_1_387039845); |
1137 |
tmp2 = MULTIPLY(-d3, FIX_0_275899380); |
1138 |
tmp3 = MULTIPLY(d3, FIX_1_175875602); |
1139 |
} |
1140 |
} else { |
1141 |
if (d1) { |
1142 |
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ |
1143 |
tmp0 = MULTIPLY(d1, FIX_0_275899380); |
1144 |
tmp1 = MULTIPLY(d1, FIX_0_785694958); |
1145 |
tmp2 = MULTIPLY(d1, FIX_1_175875602); |
1146 |
tmp3 = MULTIPLY(d1, FIX_1_387039845); |
1147 |
} else { |
1148 |
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ |
1149 |
tmp0 = tmp1 = tmp2 = tmp3 = 0; |
1150 |
} |
1151 |
} |
1152 |
} |
1153 |
} |
1154 |
|
1155 |
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
1156 |
|
1157 |
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, |
1158 |
CONST_BITS+PASS1_BITS+3); |
1159 |
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, |
1160 |
CONST_BITS+PASS1_BITS+3); |
1161 |
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, |
1162 |
CONST_BITS+PASS1_BITS+3); |
1163 |
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, |
1164 |
CONST_BITS+PASS1_BITS+3); |
1165 |
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, |
1166 |
CONST_BITS+PASS1_BITS+3); |
1167 |
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, |
1168 |
CONST_BITS+PASS1_BITS+3); |
1169 |
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, |
1170 |
CONST_BITS+PASS1_BITS+3); |
1171 |
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, |
1172 |
CONST_BITS+PASS1_BITS+3); |
1173 |
|
1174 |
dataptr++; /* advance pointer to next column */ |
1175 |
} |
1176 |
} |
1177 |
|
1178 |
|
1179 |
/* here is the reference one, in case of problems with the normal one */ |
1180 |
|
1181 |
/* idctref.c, Inverse Discrete Fourier Transform, double precision */ |
1182 |
|
1183 |
/* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */ |
1184 |
|
1185 |
/* |
1186 |
* Disclaimer of Warranty |
1187 |
* |
1188 |
* These software programs are available to the user without any license fee or |
1189 |
* royalty on an "as is" basis. The MPEG Software Simulation Group disclaims |
1190 |
* any and all warranties, whether express, implied, or statuary, including any |
1191 |
* implied warranties or merchantability or of fitness for a particular |
1192 |
* purpose. In no event shall the copyright-holder be liable for any |
1193 |
* incidental, punitive, or consequential damages of any kind whatsoever |
1194 |
* arising from the use of these programs. |
1195 |
* |
1196 |
* This disclaimer of warranty extends to the user of these programs and user's |
1197 |
* customers, employees, agents, transferees, successors, and assigns. |
1198 |
* |
1199 |
* The MPEG Software Simulation Group does not represent or warrant that the |
1200 |
* programs furnished hereunder are free of infringement of any third-party |
1201 |
* patents. |
1202 |
* |
1203 |
* Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, |
1204 |
* are subject to royalty fees to patent holders. Many of these patents are |
1205 |
* general enough such that they are unavoidable regardless of implementation |
1206 |
* design. |
1207 |
* |
1208 |
*/ |
1209 |
|
1210 |
/* Perform IEEE 1180 reference (64-bit floating point, separable 8x1 |
1211 |
* direct matrix multiply) Inverse Discrete Cosine Transform |
1212 |
*/ |
1213 |
|
1214 |
|
1215 |
/* Here we use math.h to generate constants. Compiler results may |
1216 |
vary a little */ |
1217 |
|
1218 |
#ifndef PI |
1219 |
#ifdef M_PI |
1220 |
#define PI M_PI |
1221 |
#else |
1222 |
#define PI 3.14159265358979323846 |
1223 |
#endif |
1224 |
#endif |
1225 |
|
1226 |
/* cosine transform matrix for 8x1 IDCT */ |
1227 |
static double itrans_coef[8][8]; |
1228 |
|
1229 |
/* initialize DCT coefficient matrix */ |
1230 |
|
1231 |
void init_idctref() |
1232 |
{ |
1233 |
int freq, time; |
1234 |
double scale; |
1235 |
|
1236 |
for (freq=0; freq < 8; freq++) |
1237 |
{ |
1238 |
scale = (freq == 0) ? sqrt(0.125) : 0.5; |
1239 |
for (time=0; time<8; time++) |
1240 |
itrans_coef[freq][time] = scale*cos((PI/8.0)*freq*(time + 0.5)); |
1241 |
} |
1242 |
} |
1243 |
|
1244 |
/* perform IDCT matrix multiply for 8x8 coefficient block */ |
1245 |
|
1246 |
void reference_rev_dct(block) |
1247 |
int16 *block; |
1248 |
{ |
1249 |
int i, j, k, v; |
1250 |
double partial_product; |
1251 |
double tmp[64]; |
1252 |
|
1253 |
for (i=0; i<8; i++) |
1254 |
for (j=0; j<8; j++) |
1255 |
{ |
1256 |
partial_product = 0.0; |
1257 |
|
1258 |
for (k=0; k<8; k++) |
1259 |
partial_product+= itrans_coef[k][j]*block[8*i+k]; |
1260 |
|
1261 |
tmp[8*i+j] = partial_product; |
1262 |
} |
1263 |
|
1264 |
/* Transpose operation is integrated into address mapping by switching |
1265 |
loop order of i and j */ |
1266 |
|
1267 |
for (j=0; j<8; j++) |
1268 |
for (i=0; i<8; i++) |
1269 |
{ |
1270 |
partial_product = 0.0; |
1271 |
|
1272 |
for (k=0; k<8; k++) |
1273 |
partial_product+= itrans_coef[k][i]*tmp[8*k+j]; |
1274 |
|
1275 |
v = floor(partial_product+0.5); |
1276 |
block[8*i+j] = (v<-256) ? -256 : ((v>255) ? 255 : v); |
1277 |
} |
1278 |
} |