----------------------------------------
-- Microsoft DirectX Ada binding lib --
-- File : XDSP.ads --
-- Translator:Dongfeng.Gu,2018/11/19 --
-- Mail: 515639@qq.com --
-- Progress:100% --
----------------------------------------
with win32; use win32;
with Interfaces.C;
package XDSP is
function Shift_Left(A:dword;B:Natural) return dword with import,convention=>Intrinsic;
function Lsh(A:dword;B:Natural) return dword is (Shift_Left(A,B));
function Shift_Left(A:Interfaces.C.unsigned;B:Natural) return Interfaces.C.unsigned with import,convention=>Intrinsic;
function Lsh(A:Interfaces.C.unsigned;B:Natural) return Interfaces.C.unsigned is (Shift_Left(A,B));
function Shift_Right(A:dword;B:Natural) return dword with import,convention=>Intrinsic;
function Rsh(A:dword;B:Natural) return dword is (Shift_Right(A,B));
function Shift_Right(A:Interfaces.C.unsigned;B:Natural) return Interfaces.C.unsigned with import,convention=>Intrinsic;
function Rsh(A:Interfaces.C.unsigned;B:Natural) return Interfaces.C.unsigned is (Shift_Right(A,B));
type FLOAT_ARRAY is array (Natural range<>) of FLOAT;
type UINT8_ARRAY is array (Natural range<>) of UINT8;
type UINT16_ARRAY is array (Natural range<>) of UINT16;
type UINT32_ARRAY is array (Natural range<>) of UINT32;
type UINT64_ARRAY is array (Natural range<>) of UINT64;
type INT8_ARRAY is array (Natural range<>) of INT8;
type INT16_ARRAY is array (Natural range<>) of INT16;
type INT32_ARRAY is array (Natural range<>) of INT32;
type INT64_ARRAY is array (Natural range<>) of INT64;
-- From xmmintrin.h(Microsoft)
type tagM128(i:Integer:=0) is record
case i is
when 0=>
m128_f32:FLOAT_ARRAY(0..3);
when 1=>
m128_i8:INT8_ARRAY(0..15);
when 2=>
m128_i16:INT16_ARRAY(0..7);
when 3=>
m128_i32:INT32_ARRAY(0..3);
when 4=>
m128_i64:INT64_ARRAY(0..1);
when 5=>
m128_u8:UINT8_ARRAY(0..15);
when 6=>
m128_u16:UINT16_ARRAY(0..7);
when 7=>
m128_u32:UINT32_ARRAY(0..3);
when 8=>
m128_u64:UINT64_ARRAY(0..1);
when others=>
NULL;
end case;
end record with Unchecked_Union,Alignment=>16,size=>128;
subtype M128 is tagM128;
subtype XVECTOR is tagM128;
type LPXVECTOR is access all XVECTOR;
type LPCXVECTOR is access constant XVECTOR;
type XVECTOR_ARRAY is array (UINT range<>) of XVECTOR;
use type Interfaces.C.unsigned_long;
use type interfaces.C.unsigned;
function MM_SHUFFLE(fp3,fp2,fp1,fp0:DWORD) return DWORD is (Lsh(fp3,6) or Lsh(fp2,4) or Lsh(fp1,2) or fp0);
function MM_SHUFFLE(fp3,fp2,fp1,fp0:interfaces.C.unsigned) return interfaces.C.unsigned is (Lsh(fp3,6) or Lsh(fp2,4) or Lsh(fp1,2) or fp0);
function mm_add_ps(v1,v2:XVECTOR) return XVECTOR with Inline_Always;
function mm_sub_ps(v1,v2:XVECTOR) return XVECTOR with Inline_Always;
function mm_mul_ps(v1,v2:XVECTOR) return XVECTOR with Inline_Always;
function mm_sqrt_ps(v:XVECTOR) return XVECTOR;
-- *由于SHUFPS指令的第1个参数(正序,GNU汇编是正序的)必须是立即数,
-- *不得已采用创建导入文件的方式来实现
-- *!不得删除文件XDSP_SHUFFLES.s文件,否则不能通过编译,
-- *生成的程序则不依赖于该导入文件.
function mm_shuffle_ps(v1,v2:XVECTOR;ui:uint) return XVECTOR with Inline_Always;
function mm_xor_ps(v1,v2:XVECTOR) return XVECTOR;
procedure vmulComplex(rResult:out XVECTOR;iResult:out XVECTOR;r1,i1,r2,i2:XVECTOR) with Inline_Always;
procedure vmulComplex(r1,i1:in out XVECTOR;r2,i2:XVECTOR) with Inline_Always;
procedure ButterflyDIT4_1(r1,i1:in out XVECTOR) with Inline_Always;
procedure ButterflyDIT4_4(r0,r1,r2,r3,i0,i1,i2,i3:in out XVECTOR;pUnityTableReal,pUnityTableImaginary:access constant XVECTOR_ARRAY;uStride:uint32;fLast:BOOL) with Inline_Always;
procedure FFT4(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:uint32:=1) with Inline_Always;
procedure FFT8(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:UINT32:=1) with Inline_Always;
procedure FFT16(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:UINT32:=1) with Inline_Always;
procedure FFT(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;pUnityTable:access constant XVECTOR_ARRAY;uLength:uint32;uCount:UINT32:=1) with Inline;
procedure FFTInitializeUnityTable(pUnityTable:in out XVECTOR_ARRAY;uLen:uint32) with Inline;
procedure FFTUnswizzle(pOutput:out FLOAT_ARRAY;pInput: FLOAT_ARRAY;uLog2Length:UINT32) with Inline;
procedure FFTPolar(pOutput:out XVECTOR_ARRAY;pInputReal: XVECTOR_ARRAY;pInputImaginary: XVECTOR_ARRAY;uLength:UINT32) with Inline;
end XDSP;
----------------------------------------
-- Microsoft DirectX Ada binding lib --
-- File : XDSP.adb --
-- Translator:Dongfeng.Gu,2018/11/19 --
-- Mail: 515639@qq.com --
-- Progress:100% --
----------------------------------------
with Machine_Code; use Machine_Code;
with Ada.Streams.Stream_IO; use Ada.Streams.Stream_IO;
package body XDSP is
use type Interfaces.C.unsigned;
NL:constant String:=ASCII.CR&ASCII.LF;
function mm_sqrt_ps(v:XVECTOR) return XVECTOR is
v1:XVECTOR with Alignment=>16,Warnings=>OFF;
begin
asm(
template=>
("movl 12(%%ebp),%%edx;"&NL&
"movaps (%%edx),%%xmm0;"&NL&
"sqrtps %%xmm0,%%xmm1;"&NL&
"movl 8(%%ebp),%%eax;"&NL&
"movaps %%xmm1,(%%eax);"&NL
)
);
return v1;
end;
function mm_set_ps1(f:FLOAT) return XVECTOR is
v:XVECTOR:=(0,(others=>f));
begin
return v;
end;
function mm_add_ps(v1,v2:XVECTOR) return XVECTOR is
v3:XVECTOR;pragma Warnings(off,v3);
begin
asm(template=>(
"movl 12(%%esp),%%edx;"&
"movaps (%%edx),%%xmm0;"&
"movl 8(%%esp),%%eax;"&
"movl 16(%%esp),%%edx;"&
"addps (%%edx),%%xmm0;"&
"movaps %%xmm0,(%%eax);"
),
inputs=>No_Input_Operands,
Outputs => No_Output_Operands,
Volatile=>TRUE
);
return v3;
end;
function mm_sub_ps(v1,v2:XVECTOR) return XVECTOR is
v3:XVECTOR;pragma Warnings(off,v3);
begin
asm( "movl 12(%%esp),%%edx;"&
"movaps (%%edx),%%xmm0;"&
"movl 8(%%esp),%%eax;"&
"movl 16(%%esp),%%edx;"&
"subps (%%edx),%%xmm0;"&
"movaps %%xmm0,(%%eax);",
inputs=>No_Input_Operands,
Outputs => No_Output_Operands,
Volatile=>TRUE
);
return v3;
end;
function mm_mul_ps(v1,v2:XVECTOR) return XVECTOR is
v3:XVECTOR;pragma Warnings(off,v3);
begin
asm( "movl 12(%%esp),%%edx;"&
"movaps (%%edx),%%xmm0;"&
"movl 8(%%esp),%%eax;"&
"movl 16(%%esp),%%edx;"&
"mulps (%%edx),%%xmm0;"&
"movaps %%xmm0,(%%eax);",
inputs=>No_Input_Operands,
Outputs => No_Output_Operands,
Volatile=>TRUE
);
return v3;
end;
function mm_xor_ps(v1,v2:XVECTOR) return XVECTOR is
v3:XVECTOR;pragma Warnings(off,v3);
begin
asm( "movl 12(%%esp),%%edx;"&
"movaps (%%edx),%%xmm0;"&
"movl 8(%%esp),%%eax;"&
"movl 16(%%esp),%%edx;"&
"xorps (%%edx),%%xmm0;"&
"movaps %%xmm0,(%%eax);",
inputs=>No_Input_Operands,
Outputs => No_Output_Operands,
Volatile=>TRUE
);
return v3;
end;
function IsFileExist(fn:String) return Boolean is
file:File_Type;
begin
Open(File,in_file,fn);
Close(File);
return True;
exception when others=>
return False;
end;
function CreateShuffleAsm(ui:uint) return Boolean is
File:File_Type;
S:Stream_Access;
UII:String:=UI'IMG;
begin
Create(File,Out_File,"XDSP_SHUFFLES.s");
S:=Stream(File);
String'Write(S,"# function __mm_shuffle_ps(v1,v2:__m128;i:uint) return __m128;"&NL);
String'Write(S,".file "&'"'&"XDSP_SHUFFLES.s"&'"'&NL);
String'Write(S,".text"&NL);
String'Write(S,".globl _mm_shuffle_ps;"&NL);
String'Write(S,".globl __mm_shuffle_ps;"&NL);
String'Write(S,".def _mm_shuffle_ps"&"; .Scl 2; .type 32; .endef;"&NL);
String'Write(S,".def __mm_shuffle_ps"&"; .Scl 2; .type 32; .endef;"&NL);
String'Write(S,"_mm_shuffle_ps:"&NL);
String'Write(S,"__mm_shuffle_ps:"&NL);
String'Write(S,"pushl %ebp;"&NL&"movl %esp,%ebp;"&NL);
String'Write(S,"movl 12(%esp),%edx;"&NL&"movaps (%edx),%xmm0;"&NL&"movl 16(%esp),%edx;"&NL&"movaps (%edx),%xmm1;"&NL&"shufps $"&UII(2..UII'Last)&", %xmm1, %xmm0;"&NL);
String'Write(S,"movl 8(%esp),%eax;"&NL&"movaps %xmm0,(%eax);"&NL&"leave;"&NL&"ret;"&NL&NL);
return True;
exception when others=>
return false;
end;
-- *由于SHUFPS指令的第1个参数(正序)必须是立即数,
-- *不得已采用创建导入文件的方式来实现
-- *!不得删除文件XDSP_SHUFFLES.s文件,否则不能通过编译,
-- *生成的程序则不依赖于该导入文件.
function mm_shuffle_ps(v1,v2:XVECTOR;ui:uint) return XVECTOR is
v3:XVECTOR;pragma Warnings(off,v3);
begin
if CreateShuffleAsm(ui) then
declare
pragma Linker_Options("XDSP_SHUFFLES.s");
function shuffle_ps(v1,v2:XVECTOR;ui:uint) return XVECTOR with Import,external_name=>"_mm_shuffle_ps";
begin
return shuffle_ps(v1,v2,ui);
end;
end if;
return (0,(0.0,0.0,0.0,0.0));
end;
procedure vmulComplex(rResult:out XVECTOR;iResult:out XVECTOR;r1,i1,r2,i2:XVECTOR) is
begin
declare
vi1i2:XVECTOR := mm_mul_ps(i1, i2);
vr1r2:XVECTOR := mm_mul_ps(r1, r2);
vr1i2:XVECTOR := mm_mul_ps(r1, i2);
vr2i1:XVECTOR := mm_mul_ps(r2, i1);
begin
rResult := mm_sub_ps(vr1r2, vi1i2); -- real: (r1*r2 - i1*i2)
iResult := mm_add_ps(vr1i2, vr2i1); -- imaginary: (r1*i2 + r2*i1)
end;
end;
procedure vmulComplex(r1,i1:in out XVECTOR;r2,i2:XVECTOR) is
begin
-- (r1, i1) * (r2, i2) := (r1r2 - i1i2, r1i2 + r2i1)
declare
vi1i2:XVECTOR := mm_mul_ps(i1, i2);
vr1r2:XVECTOR := mm_mul_ps(r1, r2);
vr1i2:XVECTOR := mm_mul_ps(r1, i2);
vr2i1:XVECTOR := mm_mul_ps(r2, i1);
begin
r1 := mm_sub_ps(vr1r2, vi1i2); -- real: (r1*r2 - i1*i2)
i1 := mm_add_ps(vr1i2, vr2i1); -- imaginary: (r1*i2 + r2*i1)
end;
end;
procedure ButterflyDIT4_1(r1,i1:in out XVECTOR) is
vDFT4SignBits1 :constant XVECTOR:= (0,(0.0, -0.0, 0.0, -0.0));
vDFT4SignBits2 :constant XVECTOR:= (0,(0.0, 0.0, -0.0, -0.0));
vDFT4SignBits3 :constant XVECTOR:= (0,(0.0, -0.0, -0.0, 0.0));
rTemp:XVECTOR := mm_add_ps( mm_shuffle_ps(r1, r1, MM_SHUFFLE(1, 1, 0, 0)), -- (r1X| r1X|r1Y| r1Y) +
mm_xor_ps(mm_shuffle_ps(r1, r1, MM_SHUFFLE(3, 3, 2, 2)), vDFT4SignBits1) ); -- (r1Z|-r1Z|r1W|-r1W)
iTemp:XVECTOR := mm_add_ps( mm_shuffle_ps(i1, i1, MM_SHUFFLE(1, 1, 0, 0)), -- (i1X| i1X|i1Y| i1Y) +
mm_xor_ps(mm_shuffle_ps(i1, i1, MM_SHUFFLE(3, 3, 2, 2)), vDFT4SignBits1) ); -- (i1Z|-i1Z|i1W|-i1W)
rZrWiZiW:XVECTOR := mm_shuffle_ps(rTemp, iTemp, mm_SHUFFLE(3, 2, 3, 2)); -- (rTempZ|rTempW|iTempZ|iTempW)
rZiWrZiW:XVECTOR := mm_shuffle_ps(rZrWiZiW, rZrWiZiW, mm_SHUFFLE(3, 0, 3, 0)); -- (rTempZ|iTempW|rTempZ|iTempW)
iZrWiZrW:XVECTOR := mm_shuffle_ps(rZrWiZiW, rZrWiZiW, mm_SHUFFLE(1, 2, 1, 2)); -- (rTempZ|iTempW|rTempZ|iTempW)
begin
r1 := mm_add_ps( mm_shuffle_ps(rTemp, rTemp, mm_SHUFFLE(1, 0, 1, 0)), -- (rTempX| rTempY| rTempX| rTempY) +
mm_xor_ps(rZiWrZiW, vDFT4SignBits2) ); -- (rTempZ| iTempW|-rTempZ|-iTempW)
i1 := mm_add_ps( mm_shuffle_ps(iTemp, iTemp, mm_SHUFFLE(1, 0, 1, 0)), -- (iTempX| iTempY| iTempX| iTempY) +
mm_xor_ps(iZrWiZrW, vDFT4SignBits3) ); -- (iTempZ|-rTempW|-iTempZ| rTempW)
end;
procedure ButterflyDIT4_4(r0,r1,r2,r3,i0,i1,i2,i3:in out XVECTOR;pUnityTableReal,pUnityTableImaginary:access constant XVECTOR_ARRAY;uStride:uint32;fLast:BOOL) is
rTemp0, rTemp1, rTemp2, rTemp3, rTemp4, rTemp5, rTemp6, rTemp7:XVECTOR;
iTemp0, iTemp1, iTemp2, iTemp3, iTemp4, iTemp5, iTemp6, iTemp7:XVECTOR;
begin
rTemp0 := mm_add_ps(r0, r2); iTemp0 := mm_add_ps(i0, i2);
rTemp2 := mm_add_ps(r1, r3); iTemp2 := mm_add_ps(i1, i3);
rTemp1 := mm_sub_ps(r0, r2); iTemp1 := mm_sub_ps(i0, i2);
rTemp3 := mm_sub_ps(r1, r3); iTemp3 := mm_sub_ps(i1, i3);
rTemp4 := mm_add_ps(rTemp0, rTemp2); iTemp4 := mm_add_ps(iTemp0, iTemp2);
rTemp5 := mm_add_ps(rTemp1, iTemp3); iTemp5 := mm_sub_ps(iTemp1, rTemp3);
rTemp6 := mm_sub_ps(rTemp0, rTemp2); iTemp6 := mm_sub_ps(iTemp0, iTemp2);
rTemp7 := mm_sub_ps(rTemp1, iTemp3); iTemp7 := mm_add_ps(iTemp1, rTemp3);
-- calculating Result
-- vmulComplex(rTemp0, iTemp0, rTemp0, iTemp0, pUnityTableReal(0), pUnityTableImaginary(0)); -- first one is always trivial
vmulComplex(rTemp5, iTemp5, pUnityTableReal(uStride), pUnityTableImaginary(uStride));
vmulComplex(rTemp6, iTemp6, pUnityTableReal(uStride*2), pUnityTableImaginary(uStride*2));
vmulComplex(rTemp7, iTemp7, pUnityTableReal(uStride*3), pUnityTableImaginary(uStride*3));
if (fLast/=win32.FALSE) then null;
ButterflyDIT4_1(rTemp4, iTemp4);
ButterflyDIT4_1(rTemp5, iTemp5);
ButterflyDIT4_1(rTemp6, iTemp6);
ButterflyDIT4_1(rTemp7, iTemp7);
end if;
r0 := rTemp4; i0 := iTemp4;
r1 := rTemp5; i1 := iTemp5;
r2 := rTemp6; i2 := iTemp6;
r3 := rTemp7; i3 := iTemp7;
end;
procedure FFT4(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:uint32:=1) is
begin
for uIndex in 0..uCount-1 loop
ButterflyDIT4_1(pReal(uIndex), pImaginary(uIndex));
end loop;
end FFT4;
procedure FFT8(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:UINT32:=1) is
wr1:constant XVECTOR := (0,( 1.0, 0.70710677, 0.0, -0.70710677));
wi1:constant XVECTOR := (0,( 0.0, -0.70710677, -1.0, -0.70710677));
wr2:constant XVECTOR := (0,( -1.0, -0.70710677, 0.0, 0.70710677));
wi2:constant XVECTOR := (0,( 0.0, 0.70710677, 1.0, 0.70710677));
pR:access XVECTOR_ARRAY:=pReal'Unrestricted_Access;
pI:access XVECTOR_ARRAY:=pImaginary'Unrestricted_Access;
begin
for uIndex in 0..uCount-1 loop
declare
oddsR :XVECTOR := mm_shuffle_ps(pR(0), pR(1), MM_SHUFFLE(3, 1, 3, 1));
evensR :XVECTOR := mm_shuffle_ps(pR(0), pR(1), MM_SHUFFLE(2, 0, 2, 0));
oddsI :XVECTOR := mm_shuffle_ps(pI(0), pI(1), MM_SHUFFLE(3, 1, 3, 1));
evensI :XVECTOR := mm_shuffle_ps(pI(0), pI(1), MM_SHUFFLE(2, 0, 2, 0));
r,i:XVECTOR;
begin
ButterflyDIT4_1(oddsR, oddsI);
ButterflyDIT4_1(evensR, evensI);
vmulComplex(r, i, oddsR, oddsI, wr1, wi1);
pR(0) := mm_add_ps(evensR, r);
pI(0) := mm_add_ps(evensI, i);
vmulComplex(r, i, oddsR, oddsI, wr2, wi2);
pR(1) := mm_add_ps(evensR, r);
pI(1) := mm_add_ps(evensI, i);
end;
end loop;
end;
procedure FFT16(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:UINT32:=1) is
aUnityTableReal:XVECTOR_ARRAY(0..3):=((0,(1.0, 1.0, 1.0, 1.0)), (0,(1.0, 0.92387950, 0.70710677, 0.38268343)), (0,(1.0, 0.70710677, -4.3711388e-008, -0.70710677)), (0,(1.0, 0.38268343, -0.70710677, -0.92387950)));
aUnityTableImaginary:XVECTOR_ARRAY(0..3):=((0,(-0.0, -0.0, -0.0, -0.0)), (0,(-0.0, -0.38268343, -0.70710677, -0.92387950)), (0,(-0.0, -0.70710677, -1.0, -0.70710677)), (0,(-0.0, -0.92387950, -0.70710677, 0.38268343)));
begin
for uIndex in 0..uCount-1 loop
ButterflyDIT4_4(pReal(uIndex*4),
pReal(uIndex*4 + 1),
pReal(uIndex*4 + 2),
pReal(uIndex*4 + 3),
pImaginary(uIndex*4),
pImaginary(uIndex*4 + 1),
pImaginary(uIndex*4 + 2),
pImaginary(uIndex*4 + 3),
aUnityTableReal'unrestricted_access,
aUnityTableImaginary'unrestricted_access,
1,
win32.TRUE
);
end loop;
end FFT16;
procedure FFT(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;pUnityTable:access constant XVECTOR_ARRAY;uLength:uint32;uCount:UINT32:=1) is
pUnityTableReal:access constant XVECTOR_ARRAY:=pUnityTable;
pUnityTableImaginary:access XVECTOR_ARRAY:=pUnityTable(Rsh(uLength,2)..pUnityTable'Last)'unrestricted_access;
uTotal:constant UINT32:=uCount * uLength;
uTotal_vectors:constant UINT32:=Rsh(uTotal,2);
uStage_vectors:constant UINT32:=Rsh(uLength,2);
uStride:constant UINT32:=Rsh(uStage_vectors,2);
uSkip:constant UINT32:=uStage_vectors-uStride;
begin
for uIndex in 0..uTotal_vectors-1 loop
declare
n:UINT32:=(uIndex/uStride) * (uStride + uSkip) + (uIndex mod uStride);
begin
ButterflyDIT4_4(pReal(n),
pReal(n + uStride),
pReal(n + uStride*2),
pReal(n + uStride*3),
pImaginary(n ),
pImaginary(n + uStride),
pImaginary(n + uStride*2),
pImaginary(n + uStride*3),
pUnityTableReal(n mod uStage_vectors..pUnityTableReal'Last)'unrestricted_access,
pUnityTableImaginary( n mod uStage_vectors..pUnityTableImaginary'Last)'unrestricted_access,
uStride, win32.FALSE);
end;
if uLength>64 then
FFT(pReal, pImaginary, pUnityTable(Rsh(uLength,1)..pUnityTable'Last)'unrestricted_access, Rsh(uLength,2), uCount*4);
elsif uLength=64 then
FFT16(pReal, pImaginary, uCount*4);
elsif uLength=32 then
FFT8(pReal, pImaginary, uCount*4);
elsif uLength=16 then
FFT4(pReal, pImaginary, uCount*4);
end if;
end loop;
end FFT;
function sinf(f:FLOAT) return float with Import,external_name=>"sinf";
function cosf(f:FLOAT) return float with Import,external_name=>"cosf";
procedure FFTInitializeUnityTable(pUnityTable:in out XVECTOR_ARRAY;uLen:uint32) is
pfUnityTable: FLOAT_ARRAY(0..4*integer(uLen)-1) with Import,Address=>pUnityTable'Address;
begin
if (uLen>16) then
declare
flStep:FLOAT:=6.283185307 / FLOAT(uLen);
uLength:uint32:=uLen;
begin
uLength:=Rsh(uLength,2);
for i in 0..3 loop
for j in 0..uLength-1 loop
declare
uIndex:UINT32:=(uint32(i)*uLength) + j;
begin
pfUnityTable(Integer(uIndex)) := cosf(FLOAT(i)*FLOAT(j)*flStep); -- real component
pfUnityTable(Integer(uIndex) + Integer(uLength)*4) := -sinf(FLOAT(i)*FLOAT(j)*flStep); -- imaginary component
end;
end loop;
end loop;
pfUnityTable:= pfUnityTable(Integer(uLength)*8..pfUnityTable'Last);
end;
end if;
end FFTInitializeUnityTable;
procedure FFTUnswizzle(pOutput:out FLOAT_ARRAY;pInput: FLOAT_ARRAY;uLog2Length:UINT32) is
uLength : constant Uint32 := Lsh(1,integer(uLog2Length));
begin
if ((uLog2Length and 1) = 0) then
for uIndex in 0..uLength-1 loop
declare
n:uint32:=uIndex;
begin
n := ( Rsh(n and 16#cccccccc#, 2 ) or Lsh(n and 16#33333333#, 2 ));
n := ( Rsh(n and 16#f0f0f0f0#, 4 ) or Lsh(n and 16#0f0f0f0f#, 4 ));
n := ( Rsh(n and 16#ff00ff00#, 8 ) or Lsh(n and 16#00ff00ff#, 8 ));
n := ( Rsh(n and 16#ffff0000#, 16 ) or Lsh(n and 16#0000ffff#, 16 ));
n:=Rsh(n , (32 - Integer(uLog2Length)));
pOutput(Integer(n)) := pInput(integer(uIndex));
end;
end loop;
else
for uIndex in 0..uLength-1 loop
declare
n:uint32:=Rsh(uIndex,3);
begin
n := ( Rsh(n and 16#cccccccc#, 2 ) or Lsh(n and 16#33333333#, 2 ));
n := ( Rsh(n and 16#f0f0f0f0#, 4 ) or Lsh(n and 16#0f0f0f0f#, 4 ));
n := ( Rsh(n and 16#ff00ff00#, 8 ) or Lsh(n and 16#00ff00ff#, 8 ));
n := ( Rsh(n and 16#ffff0000#, 16 ) or Lsh(n and 16#0000ffff#, 16 ));
n:=Rsh(n , Integer(32 - (uLog2Length-3)));
n:=n or Lsh((uIndex and 7) , integer((uLog2Length - 3)));
pOutput(Integer(n)) := pInput(Integer(uIndex));
end;
end loop;
end if;
end FFTUnswizzle;
procedure FFTPolar(pOutput:out XVECTOR_ARRAY;pInputReal: XVECTOR_ARRAY;pInputImaginary: XVECTOR_ARRAY;uLength:UINT32) is
flOneOverLength:FLOAT:=1.0/FLOAT(ulength);
vOneOverLength :XVECTOR:= mm_set_ps1(flOneOverLength);
begin
for uIndex in 0..Rsh(uLength,2)-1 loop
declare
vReal :XVECTOR := mm_mul_ps(pInputReal(uIndex), vOneOverLength);
vImaginary:XVECTOR := mm_mul_ps(pInputImaginary(uIndex), vOneOverLength);
vRR :XVECTOR := mm_mul_ps(vReal, vReal);
vII :XVECTOR := mm_mul_ps(vImaginary, vImaginary);
vRRplusII :XVECTOR := mm_add_ps(vRR, vII);
vTotal :XVECTOR := mm_sqrt_ps(vRRplusII);
begin
pOutput(uIndex) := mm_add_ps(vTotal, vTotal);
end;
end loop;
end FFTPolar;
--
-- THE END.
--
end XDSP;
下方的附件必须保存为XDSP_SHUFFLES.s
# function __mm_shuffle_ps(v1,v2:__m128;i:uint) return __m128;
.file "XDSP_SHUFFLES.s"
.text
.globl _mm_shuffle_ps;
.globl __mm_shuffle_ps;
.def _mm_shuffle_ps; .Scl 2; .type 32; .endef;
.def __mm_shuffle_ps; .Scl 2; .type 32; .endef;
_mm_shuffle_ps:
__mm_shuffle_ps:
pushl %ebp;
movl %esp,%ebp;
movl 12(%esp),%edx;
movaps (%edx),%xmm0;
movl 16(%esp),%edx;
movaps (%edx),%xmm1;
shufps $221, %xmm1, %xmm0;
movl 8(%esp),%eax;
movaps %xmm0,(%eax);
leave;
ret;