1 | /*
|
---|
2 | C-like prototype :
|
---|
3 | void j_rev_dct_ARM(DCTBLOCK data)
|
---|
4 |
|
---|
5 | With DCTBLOCK being a pointer to an array of 64 'signed shorts'
|
---|
6 |
|
---|
7 | Copyright (c) 2001 Lionel Ulmer ([email protected] / [email protected])
|
---|
8 |
|
---|
9 | Permission is hereby granted, free of charge, to any person obtaining a copy
|
---|
10 | of this software and associated documentation files (the "Software"), to deal
|
---|
11 | in the Software without restriction, including without limitation the rights
|
---|
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
---|
13 | copies of the Software, and to permit persons to whom the Software is
|
---|
14 | furnished to do so, subject to the following conditions:
|
---|
15 |
|
---|
16 | The above copyright notice and this permission notice shall be included in
|
---|
17 | all copies or substantial portions of the Software.
|
---|
18 |
|
---|
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
---|
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
---|
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
---|
22 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
---|
23 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
---|
24 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
---|
25 |
|
---|
26 | */
|
---|
27 | #define FIX_0_298631336 2446
|
---|
28 | #define FIX_0_541196100 4433
|
---|
29 | #define FIX_0_765366865 6270
|
---|
30 | #define FIX_1_175875602 9633
|
---|
31 | #define FIX_1_501321110 12299
|
---|
32 | #define FIX_2_053119869 16819
|
---|
33 | #define FIX_3_072711026 25172
|
---|
34 | #define FIX_M_0_390180644 -3196
|
---|
35 | #define FIX_M_0_899976223 -7373
|
---|
36 | #define FIX_M_1_847759065 -15137
|
---|
37 | #define FIX_M_1_961570560 -16069
|
---|
38 | #define FIX_M_2_562915447 -20995
|
---|
39 | #define FIX_0xFFFF 0xFFFF
|
---|
40 |
|
---|
41 | #define FIX_0_298631336_ID 0
|
---|
42 | #define FIX_0_541196100_ID 4
|
---|
43 | #define FIX_0_765366865_ID 8
|
---|
44 | #define FIX_1_175875602_ID 12
|
---|
45 | #define FIX_1_501321110_ID 16
|
---|
46 | #define FIX_2_053119869_ID 20
|
---|
47 | #define FIX_3_072711026_ID 24
|
---|
48 | #define FIX_M_0_390180644_ID 28
|
---|
49 | #define FIX_M_0_899976223_ID 32
|
---|
50 | #define FIX_M_1_847759065_ID 36
|
---|
51 | #define FIX_M_1_961570560_ID 40
|
---|
52 | #define FIX_M_2_562915447_ID 44
|
---|
53 | #define FIX_0xFFFF_ID 48
|
---|
54 | .text
|
---|
55 | .align
|
---|
56 |
|
---|
57 | .global j_rev_dct_ARM
|
---|
58 | j_rev_dct_ARM:
|
---|
59 | stmdb sp!, { r4 - r12, lr } @ all callee saved regs
|
---|
60 |
|
---|
61 | sub sp, sp, #4 @ reserve some space on the stack
|
---|
62 | str r0, [ sp ] @ save the DCT pointer to the stack
|
---|
63 |
|
---|
64 | mov lr, r0 @ lr = pointer to the current row
|
---|
65 | mov r12, #8 @ r12 = row-counter
|
---|
66 | add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
|
---|
67 | row_loop:
|
---|
68 | ldrsh r0, [lr, # 0] @ r0 = 'd0'
|
---|
69 | ldrsh r1, [lr, # 8] @ r1 = 'd1'
|
---|
70 |
|
---|
71 | @ Optimization for row that have all items except the first set to 0
|
---|
72 | @ (this works as the DCTELEMS are always 4-byte aligned)
|
---|
73 | ldr r5, [lr, # 0]
|
---|
74 | ldr r2, [lr, # 4]
|
---|
75 | ldr r3, [lr, # 8]
|
---|
76 | ldr r4, [lr, #12]
|
---|
77 | orr r3, r3, r4
|
---|
78 | orr r3, r3, r2
|
---|
79 | orrs r5, r3, r5
|
---|
80 | beq end_of_row_loop @ nothing to be done as ALL of them are '0'
|
---|
81 | orrs r2, r3, r1
|
---|
82 | beq empty_row
|
---|
83 |
|
---|
84 | ldrsh r2, [lr, # 2] @ r2 = 'd2'
|
---|
85 | ldrsh r4, [lr, # 4] @ r4 = 'd4'
|
---|
86 | ldrsh r6, [lr, # 6] @ r6 = 'd6'
|
---|
87 |
|
---|
88 | ldr r3, [r11, #FIX_0_541196100_ID]
|
---|
89 | add r7, r2, r6
|
---|
90 | ldr r5, [r11, #FIX_M_1_847759065_ID]
|
---|
91 | mul r7, r3, r7 @ r7 = z1
|
---|
92 | ldr r3, [r11, #FIX_0_765366865_ID]
|
---|
93 | mla r6, r5, r6, r7 @ r6 = tmp2
|
---|
94 | add r5, r0, r4 @ r5 = tmp0
|
---|
95 | mla r2, r3, r2, r7 @ r2 = tmp3
|
---|
96 | sub r3, r0, r4 @ r3 = tmp1
|
---|
97 |
|
---|
98 | add r0, r2, r5, lsl #13 @ r0 = tmp10
|
---|
99 | rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
---|
100 | add r4, r6, r3, lsl #13 @ r4 = tmp11
|
---|
101 | rsb r3, r6, r3, lsl #13 @ r3 = tmp12
|
---|
102 |
|
---|
103 | stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
---|
104 |
|
---|
105 | ldrsh r3, [lr, #10] @ r3 = 'd3'
|
---|
106 | ldrsh r5, [lr, #12] @ r5 = 'd5'
|
---|
107 | ldrsh r7, [lr, #14] @ r7 = 'd7'
|
---|
108 |
|
---|
109 | add r0, r3, r5 @ r0 = 'z2'
|
---|
110 | add r2, r1, r7 @ r2 = 'z1'
|
---|
111 | add r4, r3, r7 @ r4 = 'z3'
|
---|
112 | add r6, r1, r5 @ r6 = 'z4'
|
---|
113 | ldr r9, [r11, #FIX_1_175875602_ID]
|
---|
114 | add r8, r4, r6 @ r8 = z3 + z4
|
---|
115 | ldr r10, [r11, #FIX_M_0_899976223_ID]
|
---|
116 | mul r8, r9, r8 @ r8 = 'z5'
|
---|
117 | ldr r9, [r11, #FIX_M_2_562915447_ID]
|
---|
118 | mul r2, r10, r2 @ r2 = 'z1'
|
---|
119 | ldr r10, [r11, #FIX_M_1_961570560_ID]
|
---|
120 | mul r0, r9, r0 @ r0 = 'z2'
|
---|
121 | ldr r9, [r11, #FIX_M_0_390180644_ID]
|
---|
122 | mla r4, r10, r4, r8 @ r4 = 'z3'
|
---|
123 | ldr r10, [r11, #FIX_0_298631336_ID]
|
---|
124 | mla r6, r9, r6, r8 @ r6 = 'z4'
|
---|
125 | ldr r9, [r11, #FIX_2_053119869_ID]
|
---|
126 | mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
---|
127 | ldr r10, [r11, #FIX_3_072711026_ID]
|
---|
128 | mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
---|
129 | ldr r9, [r11, #FIX_1_501321110_ID]
|
---|
130 | mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
---|
131 | add r7, r7, r4 @ r7 = tmp0
|
---|
132 | mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
---|
133 | add r5, r5, r6 @ r5 = tmp1
|
---|
134 | add r3, r3, r4 @ r3 = tmp2
|
---|
135 | add r1, r1, r6 @ r1 = tmp3
|
---|
136 |
|
---|
137 | ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
|
---|
138 | @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
---|
139 |
|
---|
140 | @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
|
---|
141 | add r8, r0, r1
|
---|
142 | add r8, r8, #(1<<10)
|
---|
143 | mov r8, r8, asr #11
|
---|
144 | strh r8, [lr, # 0]
|
---|
145 |
|
---|
146 | @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
|
---|
147 | sub r8, r0, r1
|
---|
148 | add r8, r8, #(1<<10)
|
---|
149 | mov r8, r8, asr #11
|
---|
150 | strh r8, [lr, #14]
|
---|
151 |
|
---|
152 | @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
|
---|
153 | add r8, r6, r3
|
---|
154 | add r8, r8, #(1<<10)
|
---|
155 | mov r8, r8, asr #11
|
---|
156 | strh r8, [lr, # 2]
|
---|
157 |
|
---|
158 | @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
|
---|
159 | sub r8, r6, r3
|
---|
160 | add r8, r8, #(1<<10)
|
---|
161 | mov r8, r8, asr #11
|
---|
162 | strh r8, [lr, #12]
|
---|
163 |
|
---|
164 | @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
|
---|
165 | add r8, r4, r5
|
---|
166 | add r8, r8, #(1<<10)
|
---|
167 | mov r8, r8, asr #11
|
---|
168 | strh r8, [lr, # 4]
|
---|
169 |
|
---|
170 | @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
|
---|
171 | sub r8, r4, r5
|
---|
172 | add r8, r8, #(1<<10)
|
---|
173 | mov r8, r8, asr #11
|
---|
174 | strh r8, [lr, #10]
|
---|
175 |
|
---|
176 | @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
|
---|
177 | add r8, r2, r7
|
---|
178 | add r8, r8, #(1<<10)
|
---|
179 | mov r8, r8, asr #11
|
---|
180 | strh r8, [lr, # 6]
|
---|
181 |
|
---|
182 | @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
|
---|
183 | sub r8, r2, r7
|
---|
184 | add r8, r8, #(1<<10)
|
---|
185 | mov r8, r8, asr #11
|
---|
186 | strh r8, [lr, # 8]
|
---|
187 |
|
---|
188 | @ End of row loop
|
---|
189 | add lr, lr, #16
|
---|
190 | subs r12, r12, #1
|
---|
191 | bne row_loop
|
---|
192 | beq start_column_loop
|
---|
193 |
|
---|
194 | empty_row:
|
---|
195 | ldr r1, [r11, #FIX_0xFFFF_ID]
|
---|
196 | mov r0, r0, lsl #2
|
---|
197 | and r0, r0, r1
|
---|
198 | add r0, r0, r0, lsl #16
|
---|
199 | str r0, [lr, # 0]
|
---|
200 | str r0, [lr, # 4]
|
---|
201 | str r0, [lr, # 8]
|
---|
202 | str r0, [lr, #12]
|
---|
203 |
|
---|
204 | end_of_row_loop:
|
---|
205 | @ End of loop
|
---|
206 | add lr, lr, #16
|
---|
207 | subs r12, r12, #1
|
---|
208 | bne row_loop
|
---|
209 |
|
---|
210 | start_column_loop:
|
---|
211 | @ Start of column loop
|
---|
212 | ldr lr, [ sp ]
|
---|
213 | mov r12, #8
|
---|
214 | column_loop:
|
---|
215 | ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
|
---|
216 | ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
|
---|
217 | ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
|
---|
218 | ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
|
---|
219 |
|
---|
220 | ldr r3, [r11, #FIX_0_541196100_ID]
|
---|
221 | add r1, r2, r6
|
---|
222 | ldr r5, [r11, #FIX_M_1_847759065_ID]
|
---|
223 | mul r1, r3, r1 @ r1 = z1
|
---|
224 | ldr r3, [r11, #FIX_0_765366865_ID]
|
---|
225 | mla r6, r5, r6, r1 @ r6 = tmp2
|
---|
226 | add r5, r0, r4 @ r5 = tmp0
|
---|
227 | mla r2, r3, r2, r1 @ r2 = tmp3
|
---|
228 | sub r3, r0, r4 @ r3 = tmp1
|
---|
229 |
|
---|
230 | add r0, r2, r5, lsl #13 @ r0 = tmp10
|
---|
231 | rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
---|
232 | add r4, r6, r3, lsl #13 @ r4 = tmp11
|
---|
233 | rsb r6, r6, r3, lsl #13 @ r6 = tmp12
|
---|
234 |
|
---|
235 | ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
|
---|
236 | ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
|
---|
237 | ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
|
---|
238 | ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
|
---|
239 |
|
---|
240 | @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
|
---|
241 | orr r9, r1, r3
|
---|
242 | orr r10, r5, r7
|
---|
243 | orrs r10, r9, r10
|
---|
244 | beq empty_odd_column
|
---|
245 |
|
---|
246 | stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
---|
247 |
|
---|
248 | add r0, r3, r5 @ r0 = 'z2'
|
---|
249 | add r2, r1, r7 @ r2 = 'z1'
|
---|
250 | add r4, r3, r7 @ r4 = 'z3'
|
---|
251 | add r6, r1, r5 @ r6 = 'z4'
|
---|
252 | ldr r9, [r11, #FIX_1_175875602_ID]
|
---|
253 | add r8, r4, r6
|
---|
254 | ldr r10, [r11, #FIX_M_0_899976223_ID]
|
---|
255 | mul r8, r9, r8 @ r8 = 'z5'
|
---|
256 | ldr r9, [r11, #FIX_M_2_562915447_ID]
|
---|
257 | mul r2, r10, r2 @ r2 = 'z1'
|
---|
258 | ldr r10, [r11, #FIX_M_1_961570560_ID]
|
---|
259 | mul r0, r9, r0 @ r0 = 'z2'
|
---|
260 | ldr r9, [r11, #FIX_M_0_390180644_ID]
|
---|
261 | mla r4, r10, r4, r8 @ r4 = 'z3'
|
---|
262 | ldr r10, [r11, #FIX_0_298631336_ID]
|
---|
263 | mla r6, r9, r6, r8 @ r6 = 'z4'
|
---|
264 | ldr r9, [r11, #FIX_2_053119869_ID]
|
---|
265 | mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
---|
266 | ldr r10, [r11, #FIX_3_072711026_ID]
|
---|
267 | mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
---|
268 | ldr r9, [r11, #FIX_1_501321110_ID]
|
---|
269 | mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
---|
270 | add r7, r7, r4 @ r7 = tmp0
|
---|
271 | mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
---|
272 | add r5, r5, r6 @ r5 = tmp1
|
---|
273 | add r3, r3, r4 @ r3 = tmp2
|
---|
274 | add r1, r1, r6 @ r1 = tmp3
|
---|
275 |
|
---|
276 | ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
|
---|
277 | @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
---|
278 |
|
---|
279 | @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
---|
280 | add r8, r0, r1
|
---|
281 | add r8, r8, #(1<<17)
|
---|
282 | mov r8, r8, asr #18
|
---|
283 | strh r8, [lr, #( 0*8)]
|
---|
284 |
|
---|
285 | @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
---|
286 | sub r8, r0, r1
|
---|
287 | add r8, r8, #(1<<17)
|
---|
288 | mov r8, r8, asr #18
|
---|
289 | strh r8, [lr, #(14*8)]
|
---|
290 |
|
---|
291 | @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
---|
292 | add r8, r4, r3
|
---|
293 | add r8, r8, #(1<<17)
|
---|
294 | mov r8, r8, asr #18
|
---|
295 | strh r8, [lr, #( 2*8)]
|
---|
296 |
|
---|
297 | @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
---|
298 | sub r8, r4, r3
|
---|
299 | add r8, r8, #(1<<17)
|
---|
300 | mov r8, r8, asr #18
|
---|
301 | strh r8, [lr, #(12*8)]
|
---|
302 |
|
---|
303 | @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
---|
304 | add r8, r6, r5
|
---|
305 | add r8, r8, #(1<<17)
|
---|
306 | mov r8, r8, asr #18
|
---|
307 | strh r8, [lr, #( 4*8)]
|
---|
308 |
|
---|
309 | @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
---|
310 | sub r8, r6, r5
|
---|
311 | add r8, r8, #(1<<17)
|
---|
312 | mov r8, r8, asr #18
|
---|
313 | strh r8, [lr, #(10*8)]
|
---|
314 |
|
---|
315 | @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
---|
316 | add r8, r2, r7
|
---|
317 | add r8, r8, #(1<<17)
|
---|
318 | mov r8, r8, asr #18
|
---|
319 | strh r8, [lr, #( 6*8)]
|
---|
320 |
|
---|
321 | @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
---|
322 | sub r8, r2, r7
|
---|
323 | add r8, r8, #(1<<17)
|
---|
324 | mov r8, r8, asr #18
|
---|
325 | strh r8, [lr, #( 8*8)]
|
---|
326 |
|
---|
327 | @ End of row loop
|
---|
328 | add lr, lr, #2
|
---|
329 | subs r12, r12, #1
|
---|
330 | bne column_loop
|
---|
331 | beq the_end
|
---|
332 |
|
---|
333 | empty_odd_column:
|
---|
334 | @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
---|
335 | @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
---|
336 | add r0, r0, #(1<<17)
|
---|
337 | mov r0, r0, asr #18
|
---|
338 | strh r0, [lr, #( 0*8)]
|
---|
339 | strh r0, [lr, #(14*8)]
|
---|
340 |
|
---|
341 | @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
---|
342 | @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
---|
343 | add r4, r4, #(1<<17)
|
---|
344 | mov r4, r4, asr #18
|
---|
345 | strh r4, [lr, #( 2*8)]
|
---|
346 | strh r4, [lr, #(12*8)]
|
---|
347 |
|
---|
348 | @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
---|
349 | @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
---|
350 | add r6, r6, #(1<<17)
|
---|
351 | mov r6, r6, asr #18
|
---|
352 | strh r6, [lr, #( 4*8)]
|
---|
353 | strh r6, [lr, #(10*8)]
|
---|
354 |
|
---|
355 | @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
---|
356 | @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
---|
357 | add r2, r2, #(1<<17)
|
---|
358 | mov r2, r2, asr #18
|
---|
359 | strh r2, [lr, #( 6*8)]
|
---|
360 | strh r2, [lr, #( 8*8)]
|
---|
361 |
|
---|
362 | @ End of row loop
|
---|
363 | add lr, lr, #2
|
---|
364 | subs r12, r12, #1
|
---|
365 | bne column_loop
|
---|
366 |
|
---|
367 | the_end:
|
---|
368 | @ The end....
|
---|
369 | add sp, sp, #4
|
---|
370 | ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
|
---|
371 |
|
---|
372 | const_array:
|
---|
373 | .align
|
---|
374 | .word FIX_0_298631336
|
---|
375 | .word FIX_0_541196100
|
---|
376 | .word FIX_0_765366865
|
---|
377 | .word FIX_1_175875602
|
---|
378 | .word FIX_1_501321110
|
---|
379 | .word FIX_2_053119869
|
---|
380 | .word FIX_3_072711026
|
---|
381 | .word FIX_M_0_390180644
|
---|
382 | .word FIX_M_0_899976223
|
---|
383 | .word FIX_M_1_847759065
|
---|
384 | .word FIX_M_1_961570560
|
---|
385 | .word FIX_M_2_562915447
|
---|
386 | .word FIX_0xFFFF
|
---|