VirtualBox

瀏覽源碼

source: vbox/trunk/src/libs/ffmpeg-20060710/libavcodec/i386/fft_3dn.c@ 9360

最後變更在這個檔案從9360是 5776,由 vboxsync 提交於 17 年前
ffmpeg: exported to OSE
檔案大小: 3.9 KB

行
1	/*
2	* FFT/MDCT transform with 3DNow! optimizations
3	* Copyright (c) 2006 Zuxy MENG Jie.
4	* Based on fft_sse.c copyright (c) 2002 Fabrice Bellard.
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public
17	* License along with this library; if not, write to the Free Software
18	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19	*/
20	#include "../dsputil.h"
21	#include <math.h>
22
23	#ifdef HAVE_MM3DNOW
24
25	#include <mm3dnow.h>
26
27	static const int p1m1[2] __attribute__((aligned(8))) =
28	{ 0, 1 << 31 };
29
30	static const int m1p1[2] __attribute__((aligned(8))) =
31	{ 1 << 31, 0 };
32
33	void ff_fft_calc_3dn(FFTContext s, FFTComplex z)
34	{
35	int ln = s->nbits;
36	int j, np, np2;
37	int nblocks, nloops;
38	register FFTComplex p, q;
39	FFTComplex cptr, cptr1;
40	int k;
41
42	np = 1 << ln;
43	/* FEMMS not a must here but recommended by AMD */
44	_m_femms();
45
46	{
47	__m64 *r, a0, a1, b0, b1, tmp, c;
48
49	r = (__m64 *)&z[0];
50	if (s->inverse)
51	c = (__m64 )m1p1;
52	else
53	c = (__m64 )p1m1;
54
55	j = (np >> 2);
56	do {
57	/* do the pass 0 butterfly */
58	a0 = _m_pfadd(r[0], r[1]);
59	a1 = _m_pfsub(r[0], r[1]);
60
61	/* do the pass 0 butterfly */
62	b0 = _m_pfadd(r[2], r[3]);
63	b1 = _m_pfsub(r[2], r[3]);
64
65	/* multiply third by -i */
66	tmp = _m_punpckhdq(b1, b1);
67	b1 = _m_punpckldq(b1, b1);
68	b1 = _m_punpckldq(tmp, b1);
69	b1 = _m_pxor(b1, c);
70
71	/* do the pass 1 butterfly */
72	r[0] = _m_pfadd(a0, b0);
73	r[1] = _m_pfadd(a1, b1);
74	r[2] = _m_pfsub(a0, b0);
75	r[3] = _m_pfsub(a1, b1);
76	r += 4;
77	} while (--j != 0);
78	}
79	/* pass 2 .. ln-1 */
80
81	nblocks = np >> 3;
82	nloops = 1 << 2;
83	np2 = np >> 1;
84
85	cptr1 = s->exptab1;
86	do {
87	p = z;
88	q = z + nloops;
89	j = nblocks;
90	do {
91	cptr = cptr1;
92	k = nloops >> 1;
93	do {
94	__m64 a0, a1, b0, b1, c0, c1, t10, t11, t20, t21;
95
96	a0 = (__m64 )&p[0];
97	a1 = (__m64 )&p[1];
98	b0 = (__m64 )&q[0];
99	b1 = (__m64 )&q[1];
100
101	/* complex mul */
102	c0 = (__m64 )&cptr[0];
103	c1 = (__m64 )&cptr[1];
104	/* crere cimre */
105	t10 = _m_pfmul(c0, _m_punpckldq(b0, b0));
106	t11 = _m_pfmul(c1, _m_punpckldq(b1, b1));
107	c0 = (__m64 )&cptr[2];
108	c1 = (__m64 )&cptr[3];
109	/* -cimim creim */
110	t20 = _m_pfmul(c0, _m_punpckhdq(b0, b0));
111	t21 = _m_pfmul(c1, _m_punpckhdq(b1, b1));
112	b0 = _m_pfadd(t10, t20);
113	b1 = _m_pfadd(t11, t21);
114
115	/* butterfly */
116	(__m64 )&p[0] = _m_pfadd(a0, b0);
117	(__m64 )&p[1] = _m_pfadd(a1, b1);
118	(__m64 )&q[0] = _m_pfsub(a0, b0);
119	(__m64 )&q[1] = _m_pfsub(a1, b1);
120
121	p += 2;
122	q += 2;
123	cptr += 4;
124	} while (--k);
125
126	p += nloops;
127	q += nloops;
128	} while (--j);
129	cptr1 += nloops * 2;
130	nblocks = nblocks >> 1;
131	nloops = nloops << 1;
132	} while (nblocks != 0);
133	_m_femms();
134	}
135
136	#endif

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

以其他格式下載:

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette