ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/freemol/trunk/src/mpeg_encode/src/mfwddct.c
Revision: 22
Committed: Mon Jul 7 22:16:37 2008 UTC (11 years, 1 month ago) by wdelano
File size: 11781 byte(s)
Log Message:
initial checkin of mpeg_encode source
Line User Rev File contents
1 wdelano 22
2     /*
3     * mfwddct.c (derived from jfwddct.c, which carries the following info)
4     *
5     * Copyright (C) 1991, 1992, Thomas G. Lane. This file is part of the
6     * Independent JPEG Group's software. For conditions of distribution and use,
7     * see the accompanying README file.
8     *
9     * This file contains the basic DCT (Discrete Cosine Transform) transformation
10     * subroutine.
11     *
12     * This implementation is based on Appendix A.2 of the book "Discrete Cosine
13     * Transform---Algorithms, Advantages, Applications" by K.R. Rao and P. Yip
14     * (Academic Press, Inc, London, 1990). It uses scaled fixed-point arithmetic
15     * instead of floating point.
16     */
17    
18     #include "all.h"
19    
20     #include "dct.h"
21     #include "mtypes.h"
22     #include "opts.h"
23    
24     /*
25     * The poop on this scaling stuff is as follows:
26     *
27     * We have to do addition and subtraction of the integer inputs, which is no
28     * problem, and multiplication by fractional constants, which is a problem to
29     * do in integer arithmetic. We multiply all the constants by DCT_SCALE and
30     * convert them to integer constants (thus retaining LG2_DCT_SCALE bits of
31     * precision in the constants). After doing a multiplication we have to
32     * divide the product by DCT_SCALE, with proper rounding, to produce the
33     * correct output. The division can be implemented cheaply as a right shift
34     * of LG2_DCT_SCALE bits. The DCT equations also specify an additional
35     * division by 2 on the final outputs; this can be folded into the
36     * right-shift by shifting one more bit (see UNFIXH).
37     *
38     * If you are planning to recode this in assembler, you might want to set
39     * LG2_DCT_SCALE to 15. This loses a bit of precision, but then all the
40     * multiplications are between 16-bit quantities (given 8-bit JSAMPLEs!) so
41     * you could use a signed 16x16=>32 bit multiply instruction instead of full
42     * 32x32 multiply. Unfortunately there's no way to describe such a multiply
43     * portably in C, so we've gone for the extra bit of accuracy here.
44     */
45    
46     #define EIGHT_BIT_SAMPLES
47     #ifdef EIGHT_BIT_SAMPLES
48     #define LG2_DCT_SCALE 16
49     #else
50     #define LG2_DCT_SCALE 15 /* lose a little precision to avoid overflow */
51     #endif
52    
53     #define ONE ((int32) 1)
54    
55     #define DCT_SCALE (ONE << LG2_DCT_SCALE)
56    
57     /* In some places we shift the inputs left by a couple more bits, */
58     /* so that they can be added to fractional results without too much */
59     /* loss of precision. */
60     #define LG2_OVERSCALE 2
61     #define OVERSCALE (ONE << LG2_OVERSCALE)
62     #define OVERSHIFT(x) ((x) <<= LG2_OVERSCALE)
63    
64     /* Scale a fractional constant by DCT_SCALE */
65     #define FIX(x) ((int32) ((x) * DCT_SCALE + 0.5))
66    
67     /* Scale a fractional constant by DCT_SCALE/OVERSCALE */
68     /* Such a constant can be multiplied with an overscaled input */
69     /* to produce something that's scaled by DCT_SCALE */
70     #define FIXO(x) ((int32) ((x) * DCT_SCALE / OVERSCALE + 0.5))
71    
72     /* Descale and correctly round a value that's scaled by DCT_SCALE */
73     #define UNFIX(x) RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1)), LG2_DCT_SCALE)
74    
75     /* Same with an additional division by 2, ie, correctly rounded UNFIX(x/2) */
76     #define UNFIXH(x) RIGHT_SHIFT((x) + (ONE << LG2_DCT_SCALE), LG2_DCT_SCALE+1)
77    
78     /* Take a value scaled by DCT_SCALE and round to integer scaled by OVERSCALE */
79     #define UNFIXO(x) RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1-LG2_OVERSCALE)),\
80     LG2_DCT_SCALE-LG2_OVERSCALE)
81    
82     /* Here are the constants we need */
83     /* SIN_i_j is sine of i*pi/j, scaled by DCT_SCALE */
84     /* COS_i_j is cosine of i*pi/j, scaled by DCT_SCALE */
85    
86     #define SIN_1_4 FIX(0.707106781)
87     #define COS_1_4 SIN_1_4
88    
89     #define SIN_1_8 FIX(0.382683432)
90     #define COS_1_8 FIX(0.923879533)
91     #define SIN_3_8 COS_1_8
92     #define COS_3_8 SIN_1_8
93    
94     #define SIN_1_16 FIX(0.195090322)
95     #define COS_1_16 FIX(0.980785280)
96     #define SIN_7_16 COS_1_16
97     #define COS_7_16 SIN_1_16
98    
99     #define SIN_3_16 FIX(0.555570233)
100     #define COS_3_16 FIX(0.831469612)
101     #define SIN_5_16 COS_3_16
102     #define COS_5_16 SIN_3_16
103    
104     /* OSIN_i_j is sine of i*pi/j, scaled by DCT_SCALE/OVERSCALE */
105     /* OCOS_i_j is cosine of i*pi/j, scaled by DCT_SCALE/OVERSCALE */
106    
107     #define OSIN_1_4 FIXO(0.707106781)
108     #define OCOS_1_4 OSIN_1_4
109    
110     #define OSIN_1_8 FIXO(0.382683432)
111     #define OCOS_1_8 FIXO(0.923879533)
112     #define OSIN_3_8 OCOS_1_8
113     #define OCOS_3_8 OSIN_1_8
114    
115     #define OSIN_1_16 FIXO(0.195090322)
116     #define OCOS_1_16 FIXO(0.980785280)
117     #define OSIN_7_16 OCOS_1_16
118     #define OCOS_7_16 OSIN_1_16
119    
120     #define OSIN_3_16 FIXO(0.555570233)
121     #define OCOS_3_16 FIXO(0.831469612)
122     #define OSIN_5_16 OCOS_3_16
123     #define OCOS_5_16 OSIN_3_16
124    
125     /* Prototypes */
126     void reference_fwd_dct _ANSI_ARGS_((Block block, Block dest));
127     void mp_fwd_dct_fast _ANSI_ARGS_((Block data2d, Block dest2d));
128     void init_fdct _ANSI_ARGS_((void));
129    
130     /*
131     * --------------------------------------------------------------
132     *
133     * mp_fwd_dct_block2 --
134     *
135     * Select the appropriate mp_fwd_dct routine
136     *
137     * Results: None
138     *
139     * Side effects: None
140     *
141     * --------------------------------------------------------------
142     */
143     extern boolean pureDCT;
144     void
145     mp_fwd_dct_block2(data, dest)
146     Block data, dest;
147     {
148     if (pureDCT) reference_fwd_dct(data, dest);
149     else mp_fwd_dct_fast(data, dest);
150     }
151    
152     /*
153     * --------------------------------------------------------------
154     *
155     * mp_fwd_dct_fast --
156     *
157     * Perform the forward DCT on one block of samples.
158     *
159     * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT on each
160     * column.
161     *
162     * Results: None
163     *
164     * Side effects: Overwrites the input data
165     *
166     * --------------------------------------------------------------
167     */
168    
169     void
170     mp_fwd_dct_fast(data2d, dest2d)
171     Block data2d, dest2d;
172     {
173     int16 *data = (int16 *) data2d; /* this algorithm wants
174     * a 1-d array */
175     int16 *dest = (int16 *) dest2d;
176     int pass, rowctr;
177     register int16 *inptr, *outptr;
178     int16 workspace[DCTSIZE_SQ];
179     SHIFT_TEMPS
180    
181     #ifdef ndef
182     {
183     int y;
184    
185     printf("fwd_dct (beforehand):\n");
186     for (y = 0; y < 8; y++)
187     printf("%4d %4d %4d %4d %4d %4d %4d %4d\n",
188     data2d[y][0], data2d[y][1],
189     data2d[y][2], data2d[y][3],
190     data2d[y][4], data2d[y][5],
191     data2d[y][6], data2d[y][7]);
192     }
193     #endif
194    
195     /*
196     * Each iteration of the inner loop performs one 8-point 1-D DCT. It
197     * reads from a *row* of the input matrix and stores into a *column*
198     * of the output matrix. In the first pass, we read from the data[]
199     * array and store into the local workspace[]. In the second pass,
200     * we read from the workspace[] array and store into data[], thus
201     * performing the equivalent of a columnar DCT pass with no variable
202     * array indexing.
203     */
204    
205     inptr = data; /* initialize pointers for first pass */
206     outptr = workspace;
207     for (pass = 1; pass >= 0; pass--) {
208     for (rowctr = DCTSIZE - 1; rowctr >= 0; rowctr--) {
209     /*
210     * many tmps have nonoverlapping lifetime -- flashy
211     * register colourers should be able to do this lot
212     * very well
213     */
214     int32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
215     int32 tmp10, tmp11, tmp12, tmp13;
216     int32 tmp14, tmp15, tmp16, tmp17;
217     int32 tmp25, tmp26;
218     /* SHIFT_TEMPS */
219    
220     /* temp0 through tmp7: -512 to +512 */
221     /* if I-block, then -256 to +256 */
222     tmp0 = inptr[7] + inptr[0];
223     tmp1 = inptr[6] + inptr[1];
224     tmp2 = inptr[5] + inptr[2];
225     tmp3 = inptr[4] + inptr[3];
226     tmp4 = inptr[3] - inptr[4];
227     tmp5 = inptr[2] - inptr[5];
228     tmp6 = inptr[1] - inptr[6];
229     tmp7 = inptr[0] - inptr[7];
230    
231     /* tmp10 through tmp13: -1024 to +1024 */
232     /* if I-block, then -512 to +512 */
233     tmp10 = tmp3 + tmp0;
234     tmp11 = tmp2 + tmp1;
235     tmp12 = tmp1 - tmp2;
236     tmp13 = tmp0 - tmp3;
237    
238     outptr[0] = (int16) UNFIXH((tmp10 + tmp11) * SIN_1_4);
239     outptr[DCTSIZE * 4] = (int16) UNFIXH((tmp10 - tmp11) * COS_1_4);
240    
241     outptr[DCTSIZE * 2] = (int16) UNFIXH(tmp13 * COS_1_8 + tmp12 * SIN_1_8);
242     outptr[DCTSIZE * 6] = (int16) UNFIXH(tmp13 * SIN_1_8 - tmp12 * COS_1_8);
243    
244     tmp16 = UNFIXO((tmp6 + tmp5) * SIN_1_4);
245     tmp15 = UNFIXO((tmp6 - tmp5) * COS_1_4);
246    
247     OVERSHIFT(tmp4);
248     OVERSHIFT(tmp7);
249    
250     /*
251     * tmp4, tmp7, tmp15, tmp16 are overscaled by
252     * OVERSCALE
253     */
254    
255     tmp14 = tmp4 + tmp15;
256     tmp25 = tmp4 - tmp15;
257     tmp26 = tmp7 - tmp16;
258     tmp17 = tmp7 + tmp16;
259    
260     outptr[DCTSIZE] = (int16) UNFIXH(tmp17 * OCOS_1_16 + tmp14 * OSIN_1_16);
261     outptr[DCTSIZE * 7] = (int16) UNFIXH(tmp17 * OCOS_7_16 - tmp14 * OSIN_7_16);
262     outptr[DCTSIZE * 5] = (int16) UNFIXH(tmp26 * OCOS_5_16 + tmp25 * OSIN_5_16);
263     outptr[DCTSIZE * 3] = (int16) UNFIXH(tmp26 * OCOS_3_16 - tmp25 * OSIN_3_16);
264    
265     inptr += DCTSIZE; /* advance inptr to next row */
266     outptr++; /* advance outptr to next column */
267     }
268     /* end of pass; in case it was pass 1, set up for pass 2 */
269     inptr = workspace;
270     outptr = dest;
271     }
272     #ifdef ndef
273     {
274     int y;
275    
276     printf("fwd_dct (afterward):\n");
277     for (y = 0; y < 8; y++)
278     printf("%4d %4d %4d %4d %4d %4d %4d %4d\n",
279     dest2d[y][0], dest2d[y][1],
280     dest2d[y][2], dest2d[y][3],
281     dest2d[y][4], dest2d[y][5],
282     dest2d[y][6], dest2d[y][7]);
283     }
284     #endif
285     }
286    
287    
288     /* Modifies from the MPEG2 verification coder */
289     /* fdctref.c, forward discrete cosine transform, double precision */
290    
291     /* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */
292    
293     /*
294     * Disclaimer of Warranty
295     *
296     * These software programs are available to the user without any license fee or
297     * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
298     * any and all warranties, whether express, implied, or statuary, including any
299     * implied warranties or merchantability or of fitness for a particular
300     * purpose. In no event shall the copyright-holder be liable for any
301     * incidental, punitive, or consequential damages of any kind whatsoever
302     * arising from the use of these programs.
303     *
304     * This disclaimer of warranty extends to the user of these programs and user's
305     * customers, employees, agents, transferees, successors, and assigns.
306     *
307     * The MPEG Software Simulation Group does not represent or warrant that the
308     * programs furnished hereunder are free of infringement of any third-party
309     * patents.
310     *
311     * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
312     * are subject to royalty fees to patent holders. Many of these patents are
313     * general enough such that they are unavoidable regardless of implementation
314     * design.
315     *
316     */
317    
318     #ifndef PI
319     #ifdef M_PI
320     #define PI M_PI
321     #else
322     #define PI 3.14159265358979323846
323     #endif
324     #endif
325    
326     /* private data */
327     static double trans_coef[8][8]; /* transform coefficients */
328    
329     void init_fdct()
330     {
331     int i, j;
332     double s;
333    
334     for (i=0; i<8; i++)
335     {
336     s = (i==0) ? sqrt(0.125) : 0.5;
337    
338     for (j=0; j<8; j++)
339     trans_coef[i][j] = s * cos((PI/8.0)*i*(j+0.5));
340     }
341     }
342    
343     void reference_fwd_dct(block, dest)
344     Block block, dest;
345     {
346     int i, j, k;
347     double s;
348     double tmp[64];
349    
350     if (DoLaplace) {
351     LaplaceNum++;
352     }
353    
354     for (i=0; i<8; i++)
355     for (j=0; j<8; j++)
356     {
357     s = 0.0;
358    
359     for (k=0; k<8; k++)
360     s += trans_coef[j][k] * block[i][k];
361    
362     tmp[8*i+j] = s;
363     }
364    
365     for (i=0; i<8; i++)
366     for (j=0; j<8; j++)
367     {
368     s = 0.0;
369    
370     for (k=0; k<8; k++)
371     s += trans_coef[i][k] * tmp[8*k+j];
372    
373     if (collect_quant) {
374     fprintf(collect_quant_fp, "%d %lf\n", 8*i+j, s);
375     }
376     if (DoLaplace) {
377     L1[LaplaceCnum][i*8+j] += s*s;
378     L2[LaplaceCnum][i*8+j] += s;
379     }
380    
381    
382     dest[i][j] = (int)floor(s+0.499999);
383     /*
384     * reason for adding 0.499999 instead of 0.5:
385     * s is quite often x.5 (at least for i and/or j = 0 or 4)
386     * and setting the rounding threshold exactly to 0.5 leads to an
387     * extremely high arithmetic implementation dependency of the result;
388     * s being between x.5 and x.500001 (which is now incorrectly rounded
389     * downwards instead of upwards) is assumed to occur less often
390     * (if at all)
391     */
392     }
393     }