求助,C代码数组初化怎么优化
本帖最后由 uthman123444 于 2015-11-09 10:49:11 编辑
以下是求质数子程序。/*
初始化数组
prime{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20......n}
sieve{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20......n}
第一次删除2倍数得到数组
prime{0,1,0,3,0,5,0,7,0,9,0,11,0,13,0,15,0,17,0,19,0......n}
2的倍数是{4,6,8,10......}/2---->{2,3,4,5......}---->取模---->sieve{3,5,7,11,13,17,19,......n}
下一次删除的就是sieve[0]的倍数,刚好它要删除的倍数都在sieve数组里。同样得到新的sieve数组
进行下一次计算。
算法是每个非质数只删除一次
下一步再做优化
prime数组{0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1....,0,0,0,1,0,1} 6n-1 6n+1
sieve数组{5,7,11,13,17,19},2424规则
从质数5开始
(sieve[j] % i)此段工作量少了2/3
*/
int creat_prime(int prime[], int sieve[], int n)
{
register int i, j, k, l;
for (i = 0; i
{
prime[i]=sieve[i] = i;
}
l = 1;
for (i = 2, j = 2; l != 0; i = sieve[0])
{
l = 0;
k = i * i;//从此数开始作标记
while (k <= n)
{
if (sieve[j] % i) sieve[l++] = sieve[j];//存储下一轮(FOR)循环的质数倍数
prime[k] = 0;//非质数标记为0
k = i * sieve[++j];//下一个合数
}
j = 0;
}
//for (i = 2; i <= n; i++) if (prime[i]) printf("%d ", i);
return 0;
}
------解决思路----------------------
引用:Quote: 引用:Quote: 引用:Quote: 引用:想怎样优化,用memcpy?
从质数5开始
(sieve[j] % i)此段工作量少了2/3
所以数组初始化方式要更改,
数组想用下面的方式
prime数组{0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,1....,0,0,0,1,0,1} 6n-1 6n+1
sieve数组{5,7,11,13,17,19},2424规则
还是没明白,怎么prime中全是0和1, 6n-1 6n+1 怎么理解
n是自然数
所有大于6的数都可以表示为: 6n-2 6n-1 6n 6n+1 6n+2 6n+3 六种之一
6n 肯定能被6整除
6n+2 肯定能被2整除
6n+3 肯定能被3整除
6n-2 肯定能被2整除
prime初始: 0的话就是2或3的倍数,不是2或3的倍数为1
这样?
for (i = 0; i
{
if(i >= 6)
{
if(i % 2 && i % 3)
prime[i] = 1;
else
prime[i] = 0;
}
else
prime[i] = i;
}
sieve呢
------解决思路----------------------
直接筛奇数就可以了,直接省去1/2
3的倍数不好指定,所以就不必处理了
多个1/3 没关系不然直接写到文件里,并且写成C语言数组形式好了
当然,你也可以把 N内的素数全部写成 数组
然后编译,直接搞定
------解决思路----------------------
引用:坚持这么做的化,可以这么初始化
///1)Fill 0 ,memset(isprime,0,size);
这个效率通常很高的,没有停顿的循环,不带额外的比较操作,无跳转,核心指令只有一条stosb,带 rep 前缀的指令
rep stosb
X86 串操作指令,是高效的数据传输指令。
2)6n-1 6n+1
for(i=5;i
{
isprime[i]=1;
isprime[i+2]=1;
} page ,132
title memset - set sections of memory all to one byte
;***
;memset.asm - set a section of memory to all one byte
;
; Copyright (c) Microsoft Corporation. All rights reserved.
;
;Purpose:
; contains the memset() routine
;
;*******************************************************************************
.xlist
include cruntime.inc
.list
page
;***
;char *memset(dst, value, count) - sets "count" bytes at "dst" to "value"
;
;Purpose:
; Sets the first "count" bytes of the memory starting
; at "dst" to the character value "value".
;
; Algorithm:
; char *
; memset (dst, value, count)
; char *dst;
; char value;
; unsigned int count;
; {
; char *start = dst;
;
; while (count--)
; *dst++ = value;
; return(start);
; }
;
;Entry:
; char *dst - pointer to memory to fill with value
; char value - value to put in dst bytes
; int count - number of bytes of dst to fill
;
;Exit:
; returns dst, with filled bytes
;
;Uses:
;
;Exceptions:
;
;*******************************************************************************
CODESEG
extrn _VEC_memzero:near
extrn __sse2_available:dword
public memset
memset proc \
dst:ptr byte, \
value:byte, \
count:dword
OPTION PROLOGUE:NONE, EPILOGUE:NONE
.FPO ( 0, 3, 0, 0, 0, 0 )
mov edx,[esp + 0ch] ; edx = "count"
mov ecx,[esp + 4] ; ecx points to "dst"
test edx,edx ; 0?
jz short toend ; if so, nothing to do
xor eax,eax
mov al,[esp + 8] ; the byte "value" to be stored
; Special case large block zeroing using SSE2 support
test al,al ; memset using zero initializer?
jne dword_align
cmp edx,080h ; block size exceeds size threshold?
jb dword_align
cmp DWORD PTR __sse2_available,0 ; SSE2 supported?
je dword_align
jmp _VEC_memzero ; use fast zero SSE2 implementation
; no return
; Align address on dword boundary
dword_align:
push edi ; preserve edi
mov edi,ecx ; edi = dest pointer
cmp edx,4 ; if it's less then 4 bytes
jb tail ; tail needs edi and edx to be initialized
neg ecx
and ecx,3 ; ecx = # bytes before dword boundary
jz short dwords ; jump if address already aligned
sub edx,ecx ; edx = adjusted count (for later)
adjust_loop:
mov [edi],al
add edi,1
sub ecx,1
jnz adjust_loop
dwords:
; set all 4 bytes of eax to [value]
mov ecx,eax ; ecx=0/0/0/value
shl eax,8 ; eax=0/0/value/0
add eax,ecx ; eax=0/0val/val
mov ecx,eax ; ecx=0/0/val/val
shl eax,10h ; eax=val/val/0/0
add eax,ecx ; eax = all 4 bytes = [value]
; Set dword-sized blocks
mov ecx,edx ; move original count to ecx
and edx,3 ; prepare in edx byte count (for tail loop)
shr ecx,2 ; adjust ecx to be dword count
jz tail ; jump if it was less then 4 bytes
rep stosd
main_loop_tail:
test edx,edx ; if there is no tail bytes,
jz finish ; we finish, and it's time to leave
; Set remaining bytes
tail:
mov [edi],al ; set remaining bytes
add edi,1
sub edx,1 ; if there is some more bytes
jnz tail ; continue to fill them
; Done
finish:
mov eax,[esp + 8] ; return dest pointer
pop edi ; restore edi
ret
toend:
mov eax,[esp + 4] ; return dest pointer
ret
memset endp
end
------解决思路----------------------
引用:Quote: 引用:坚持这么做的化,可以这么初始化
///1)Fill 0 ,memset(isprime,0,size);
这个效率通常很高的,没有停顿的循环,不带额外的比较操作,无跳转,核心指令只有一条stosb,带 rep 前缀的指令
rep stosb
X86 串操作指令,是高效的数据传输指令。
2)6n-1 6n+1
for(i=5;i
{
isprime[i]=1;
isprime[i+2]=1;
} page ,132
title memset - set sections of memory all to one byte
;***
;memset.asm - set a section of memory to all one byte
;
; Copyright (c) Microsoft Corporation. All rights reserved.
;
;Purpose:
; contains the memset() routine
;
;*******************************************************************************
.xlist
include cruntime.inc
.list
page
;***
;char *memset(dst, value, count) - sets "count" bytes at "dst" to "value"
;
;Purpose:
; Sets the first "count" bytes of the memory starting
; at "dst" to the character value "value".
;
; Algorithm:
; char *
; memset (dst, value, count)
; char *dst;
; char value;
; unsigned int count;
; {
; char *start = dst;
;
; while (count--)
; *dst++ = value;
; return(start);
; }
;
;Entry:
; char *dst - pointer to memory to fill with value
; char value - value to put in dst bytes
; int count - number of bytes of dst to fill
;
;Exit:
; returns dst, with filled bytes
;
;Uses:
;
;Exceptions:
;
;*******************************************************************************
CODESEG
extrn _VEC_memzero:near
extrn __sse2_available:dword
public memset
memset proc \
dst:ptr byte, \
value:byte, \
count:dword
OPTION PROLOGUE:NONE, EPILOGUE:NONE
.FPO ( 0, 3, 0, 0, 0, 0 )
mov edx,[esp + 0ch] ; edx = "count"
mov ecx,[esp + 4] ; ecx points to "dst"
test edx,edx ; 0?
jz short toend ; if so, nothing to do
xor eax,eax
mov al,[esp + 8] ; the byte "value" to be stored
; Special case large block zeroing using SSE2 support
test al,al ; memset using zero initializer?
jne dword_align
cmp edx,080h ; block size exceeds size threshold?
jb dword_align
cmp DWORD PTR __sse2_available,0 ; SSE2 supported?
je dword_align
jmp _VEC_memzero ; use fast zero SSE2 implementation
; no return
; Align address on dword boundary
dword_align:
push edi ; preserve edi
mov edi,ecx ; edi = dest pointer
cmp edx,4 ; if it's less then 4 bytes
jb tail ; tail needs edi and edx to be initialized
neg ecx
and ecx,3 ; ecx = # bytes before dword boundary
jz short dwords ; jump if address already aligned
sub edx,ecx ; edx = adjusted count (for later)
adjust_loop:
mov [edi],al
add edi,1
sub ecx,1
jnz adjust_loop
dwords:
; set all 4 bytes of eax to [value]
mov ecx,eax ; ecx=0/0/0/value
shl eax,8 ; eax=0/0/value/0
add eax,ecx ; eax=0/0val/val
mov ecx,eax ; ecx=0/0/val/val
shl eax,10h ; eax=val/val/0/0
add eax,ecx ; eax = all 4 bytes = [value]
; Set dword-sized blocks
mov ecx,edx ; move original count to ecx
and edx,3 ; prepare in edx byte count (for tail loop)
shr ecx,2 ; adjust ecx to be dword count
jz tail ; jump if it was less then 4 bytes
rep stosd
main_loop_tail:
test edx,edx ; if there is no tail bytes,
jz finish ; we finish, and it's time to leave
; Set remaining bytes
tail:
mov [edi],al ; set remaining bytes
add edi,1
sub edx,1 ; if there is some more bytes
jnz tail ; continue to fill them
; Done
finish:
mov eax,[esp + 8] ; return dest pointer
pop edi ; restore edi
ret
toend:
mov eax,[esp + 4] ; return dest pointer
ret
memset endp
end#include
#include
__declspec(naked) __int64 __cdecl GetCycle(void)
{
__asm
{
rdtsc
ret
}
}
__declspec(naked) void EmuRepStosd(void* d, size_t n)
{
//esp + 4 d
//esp + 8 n
__asm
{
push edi
xor eax, eax
mov edi, [esp+4+4]
mov ecx, [esp+8+4]
cld
rep stosd
pop edi
ret
}
}
int main(int argc, CHAR* argv[])
{
#define LOOPSW 20
#define NUMSW 0x50000
__declspec(align(16)) static char arrays[NUMSW];
__int64 tInit, tMemset, tRepSto;
int i;
for (i=0;i
{
// cache ready ...
ZeroMemory(&arrays[0], sizeof(arrays));
ZeroMemory(&arrays[0], sizeof(arrays));
ZeroMemory(&arrays[0], sizeof(arrays));
ZeroMemory(&arrays[0], sizeof(arrays));
// get Memset Ticks head addr align 16
tInit=GetCycle();
ZeroMemory(&arrays[0], sizeof(arrays)/2);
tMemset=GetCycle()-tInit;
// get Rep OPR Ticks head addr 0xNNNNNNN1 (unaligned)
tInit=GetCycle();
EmuRepStosd(&arrays[1],sizeof(arrays)/2/4/*DWORD NUMS*/);
tRepSto=GetCycle()-tInit;
printf("REP TIME:%I64d CRT TIME:%I64d\n",tRepSto,tMemset);
}
}