1 | /*
|
---|
2 | * ARMv4L optimized DSP utils
|
---|
3 | * Copyright (c) 2001 Lionel Ulmer.
|
---|
4 | *
|
---|
5 | * This library is free software; you can redistribute it and/or
|
---|
6 | * modify it under the terms of the GNU Lesser General Public
|
---|
7 | * License as published by the Free Software Foundation; either
|
---|
8 | * version 2 of the License, or (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This library is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
13 | * Lesser General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU Lesser General Public
|
---|
16 | * License along with this library; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
---|
18 | */
|
---|
19 |
|
---|
20 | #include "../dsputil.h"
|
---|
21 | #ifdef HAVE_IPP
|
---|
22 | #include "ipp.h"
|
---|
23 | #endif
|
---|
24 |
|
---|
25 | extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
|
---|
26 |
|
---|
27 | extern void j_rev_dct_ARM(DCTELEM *data);
|
---|
28 | extern void simple_idct_ARM(DCTELEM *data);
|
---|
29 |
|
---|
30 | /* XXX: local hack */
|
---|
31 | static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
|
---|
32 | static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
|
---|
33 |
|
---|
34 | void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
---|
35 | void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
---|
36 | void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
---|
37 | void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
---|
38 |
|
---|
39 | void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
---|
40 | void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
---|
41 | void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
---|
42 |
|
---|
43 | void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
---|
44 |
|
---|
45 | CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
|
---|
46 | CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
|
---|
47 | CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
|
---|
48 | CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
|
---|
49 | CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
|
---|
50 | CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
|
---|
51 |
|
---|
52 | static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size)
|
---|
53 | {
|
---|
54 | asm volatile (
|
---|
55 | "mov r10, #8 \n\t"
|
---|
56 |
|
---|
57 | "1: \n\t"
|
---|
58 |
|
---|
59 | /* load dest */
|
---|
60 | "ldr r4, [%1] \n\t"
|
---|
61 | /* block[0] and block[1]*/
|
---|
62 | "ldrsh r5, [%0] \n\t"
|
---|
63 | "ldrsh r7, [%0, #2] \n\t"
|
---|
64 | "and r6, r4, #0xFF \n\t"
|
---|
65 | "and r8, r4, #0xFF00 \n\t"
|
---|
66 | "add r6, r5, r6 \n\t"
|
---|
67 | "add r8, r7, r8, lsr #8 \n\t"
|
---|
68 | "mvn r5, r5 \n\t"
|
---|
69 | "mvn r7, r7 \n\t"
|
---|
70 | "tst r6, #0x100 \n\t"
|
---|
71 | "movne r6, r5, lsr #24 \n\t"
|
---|
72 | "tst r8, #0x100 \n\t"
|
---|
73 | "movne r8, r7, lsr #24 \n\t"
|
---|
74 | "mov r9, r6 \n\t"
|
---|
75 | "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */
|
---|
76 | "orr r9, r9, r8, lsl #8 \n\t"
|
---|
77 | /* block[2] and block[3] */
|
---|
78 | /* [A] */
|
---|
79 | "ldrsh r7, [%0, #6] \n\t"
|
---|
80 | "and r6, r4, #0xFF0000 \n\t"
|
---|
81 | "and r8, r4, #0xFF000000 \n\t"
|
---|
82 | "add r6, r5, r6, lsr #16 \n\t"
|
---|
83 | "add r8, r7, r8, lsr #24 \n\t"
|
---|
84 | "mvn r5, r5 \n\t"
|
---|
85 | "mvn r7, r7 \n\t"
|
---|
86 | "tst r6, #0x100 \n\t"
|
---|
87 | "movne r6, r5, lsr #24 \n\t"
|
---|
88 | "tst r8, #0x100 \n\t"
|
---|
89 | "movne r8, r7, lsr #24 \n\t"
|
---|
90 | "orr r9, r9, r6, lsl #16 \n\t"
|
---|
91 | "ldr r4, [%1, #4] \n\t" /* moved form [B] */
|
---|
92 | "orr r9, r9, r8, lsl #24 \n\t"
|
---|
93 | /* store dest */
|
---|
94 | "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */
|
---|
95 | "str r9, [%1] \n\t"
|
---|
96 |
|
---|
97 | /* load dest */
|
---|
98 | /* [B] */
|
---|
99 | /* block[4] and block[5] */
|
---|
100 | /* [C] */
|
---|
101 | "ldrsh r7, [%0, #10] \n\t"
|
---|
102 | "and r6, r4, #0xFF \n\t"
|
---|
103 | "and r8, r4, #0xFF00 \n\t"
|
---|
104 | "add r6, r5, r6 \n\t"
|
---|
105 | "add r8, r7, r8, lsr #8 \n\t"
|
---|
106 | "mvn r5, r5 \n\t"
|
---|
107 | "mvn r7, r7 \n\t"
|
---|
108 | "tst r6, #0x100 \n\t"
|
---|
109 | "movne r6, r5, lsr #24 \n\t"
|
---|
110 | "tst r8, #0x100 \n\t"
|
---|
111 | "movne r8, r7, lsr #24 \n\t"
|
---|
112 | "mov r9, r6 \n\t"
|
---|
113 | "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */
|
---|
114 | "orr r9, r9, r8, lsl #8 \n\t"
|
---|
115 | /* block[6] and block[7] */
|
---|
116 | /* [D] */
|
---|
117 | "ldrsh r7, [%0, #14] \n\t"
|
---|
118 | "and r6, r4, #0xFF0000 \n\t"
|
---|
119 | "and r8, r4, #0xFF000000 \n\t"
|
---|
120 | "add r6, r5, r6, lsr #16 \n\t"
|
---|
121 | "add r8, r7, r8, lsr #24 \n\t"
|
---|
122 | "mvn r5, r5 \n\t"
|
---|
123 | "mvn r7, r7 \n\t"
|
---|
124 | "tst r6, #0x100 \n\t"
|
---|
125 | "movne r6, r5, lsr #24 \n\t"
|
---|
126 | "tst r8, #0x100 \n\t"
|
---|
127 | "movne r8, r7, lsr #24 \n\t"
|
---|
128 | "orr r9, r9, r6, lsl #16 \n\t"
|
---|
129 | "add %0, %0, #16 \n\t" /* moved from [E] */
|
---|
130 | "orr r9, r9, r8, lsl #24 \n\t"
|
---|
131 | "subs r10, r10, #1 \n\t" /* moved from [F] */
|
---|
132 | /* store dest */
|
---|
133 | "str r9, [%1, #4] \n\t"
|
---|
134 |
|
---|
135 | /* [E] */
|
---|
136 | /* [F] */
|
---|
137 | "add %1, %1, %2 \n\t"
|
---|
138 | "bne 1b \n\t"
|
---|
139 | : "+r"(block),
|
---|
140 | "+r"(dest)
|
---|
141 | : "r"(line_size)
|
---|
142 | : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
|
---|
143 | }
|
---|
144 |
|
---|
145 | /* XXX: those functions should be suppressed ASAP when all IDCTs are
|
---|
146 | converted */
|
---|
147 | static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
148 | {
|
---|
149 | j_rev_dct_ARM (block);
|
---|
150 | ff_put_pixels_clamped(block, dest, line_size);
|
---|
151 | }
|
---|
152 | static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
153 | {
|
---|
154 | j_rev_dct_ARM (block);
|
---|
155 | ff_add_pixels_clamped(block, dest, line_size);
|
---|
156 | }
|
---|
157 | static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
158 | {
|
---|
159 | simple_idct_ARM (block);
|
---|
160 | ff_put_pixels_clamped(block, dest, line_size);
|
---|
161 | }
|
---|
162 | static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
163 | {
|
---|
164 | simple_idct_ARM (block);
|
---|
165 | ff_add_pixels_clamped(block, dest, line_size);
|
---|
166 | }
|
---|
167 | static void simple_idct_ipp(DCTELEM *block)
|
---|
168 | {
|
---|
169 | #ifdef HAVE_IPP
|
---|
170 | ippiDCT8x8Inv_Video_16s_C1I(block);
|
---|
171 | #endif
|
---|
172 | }
|
---|
173 | static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
174 | {
|
---|
175 | #ifdef HAVE_IPP
|
---|
176 | ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
|
---|
177 | #endif
|
---|
178 | }
|
---|
179 |
|
---|
180 | void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
|
---|
181 |
|
---|
182 | static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
183 | {
|
---|
184 | #ifdef HAVE_IPP
|
---|
185 | ippiDCT8x8Inv_Video_16s_C1I(block);
|
---|
186 | #ifdef HAVE_IWMMXT
|
---|
187 | add_pixels_clamped_iwmmxt(block, dest, line_size);
|
---|
188 | #else
|
---|
189 | add_pixels_clamped_ARM(block, dest, line_size);
|
---|
190 | #endif
|
---|
191 | #endif
|
---|
192 | }
|
---|
193 |
|
---|
194 | void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
|
---|
195 | {
|
---|
196 | const int idct_algo= avctx->idct_algo;
|
---|
197 |
|
---|
198 | ff_put_pixels_clamped = c->put_pixels_clamped;
|
---|
199 | ff_add_pixels_clamped = c->add_pixels_clamped;
|
---|
200 |
|
---|
201 | #ifdef HAVE_IPP
|
---|
202 | if(idct_algo==FF_IDCT_ARM){
|
---|
203 | #else
|
---|
204 | if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){
|
---|
205 | #endif
|
---|
206 | c->idct_put= j_rev_dct_ARM_put;
|
---|
207 | c->idct_add= j_rev_dct_ARM_add;
|
---|
208 | c->idct = j_rev_dct_ARM;
|
---|
209 | c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
|
---|
210 | } else if (idct_algo==FF_IDCT_SIMPLEARM){
|
---|
211 | c->idct_put= simple_idct_ARM_put;
|
---|
212 | c->idct_add= simple_idct_ARM_add;
|
---|
213 | c->idct = simple_idct_ARM;
|
---|
214 | c->idct_permutation_type= FF_NO_IDCT_PERM;
|
---|
215 | #ifdef HAVE_IPP
|
---|
216 | } else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){
|
---|
217 | #else
|
---|
218 | } else if (idct_algo==FF_IDCT_IPP){
|
---|
219 | #endif
|
---|
220 | c->idct_put= simple_idct_ipp_put;
|
---|
221 | c->idct_add= simple_idct_ipp_add;
|
---|
222 | c->idct = simple_idct_ipp;
|
---|
223 | c->idct_permutation_type= FF_NO_IDCT_PERM;
|
---|
224 | }
|
---|
225 |
|
---|
226 | /* c->put_pixels_tab[0][0] = put_pixels16_arm; */ // NG!
|
---|
227 | c->put_pixels_tab[0][1] = put_pixels16_x2_arm; //OK!
|
---|
228 | c->put_pixels_tab[0][2] = put_pixels16_y2_arm; //OK!
|
---|
229 | /* c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; /\* NG *\/ */
|
---|
230 | /* c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; // ?(»È¤ï¤ì¤Ê¤¤) */
|
---|
231 | c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; // OK
|
---|
232 | c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; //OK
|
---|
233 | /* c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; //NG */
|
---|
234 | c->put_pixels_tab[1][0] = put_pixels8_arm; //OK
|
---|
235 | c->put_pixels_tab[1][1] = put_pixels8_x2_arm; //OK
|
---|
236 | /* c->put_pixels_tab[1][2] = put_pixels8_y2_arm; //NG */
|
---|
237 | /* c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; //NG */
|
---|
238 | c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;//OK
|
---|
239 | c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; //OK
|
---|
240 | c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK
|
---|
241 | /* c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;//NG */
|
---|
242 |
|
---|
243 | #ifdef HAVE_IWMMXT
|
---|
244 | dsputil_init_iwmmxt(c, avctx);
|
---|
245 | #endif
|
---|
246 | }
|
---|