转载自:
是一篇很好的文章,效率相当高,可惜注释少了些,看起来有些恼火
1000亿以内素数计数算法
/****************************************************************** copyright (C) 2007 Huang Yuanbing version 1.1, 2007 PrimeNumber mailto: bailuzhou@163.com1 (remove the last digit 1 for "laji" mail) free use for non-commercial purposes ****************************************************************** main ideal come from a paper by M.DELEGLISE ADN J.LAGARIAS "COMPUTING PI(x): THE MEISSEL, LEHMER, LAGARIAS, MILLER, ODLYZKO METHOD" a = PI(y); (y >>= x^(1/3) and y <= x^(1/2)) PI(x) = phi(x, a) + a - 1 - P2Xa(x,a); phi(x, a) = S0 + S = S0 + S1 + S3 + S2 = S0 + S1 + S3 + U + V = S0 + S1 + S3 + U + V1 + V2 = S0 + S1 + S3 + U + V1 + W1 + W2 + W3 + W4 + W5 it need O(n^2/3) space and MAXN > 10 and MAXN <= 1e11 **********************************************************************/ #i nclude <time.h> #i nclude <stdio.h> #i nclude <math.h> #i nclude <memory.h> #i nclude <assert.h> #define COMP 4 #define MASKN(n) (1 << ((n >> 1) & 7)) #define min(a, b) (a) < (b) ? (a) : (b) #define MULTI_THREAD #define THRED_NUMS 4 //#define PRINT_DEBUG typedef unsigned int uint; #ifdef _WIN32 typedef __int64 int64; #else typedef long long int64; #endif int *Prime; int *PI; int64 MAXN = (int64)1e11; int pt[130000]; // MAXN < 1e11, size = np int *phiTable; int *minfactor; int x23, x12, x13, x14, y; int64 x; int ST = 7, Q = 1, phiQ = 1; //Q = 2*3*5*7*11*13 = 30030 //phiQ = 1*2*4*6*10*12 = 5760 int phi(int x, int a) { if (a == ST) return (x / Q) * phiQ + phiTable[x % Q]; else if (a == 0) return x; else if (x < Prime[a]) return 1; else return phi(x, a - 1) - phi(x / Prime[a], a - 1); } #ifdef MULTI_THREAD struct ThreadInfo{ int pindex; int theadnums; int64 pnums; }Threadparam[2 * THRED_NUMS + 2]; #ifdef _WIN32 #i nclude <windows.h> DWORD WINAPI WIN32ThreadFun(LPVOID pinfo) #else #i nclude <pthread.h> void* POSIXThreadFun(void *pinfo) #endif { ThreadInfo *pThreadInfo = (ThreadInfo *) (pinfo); int addstep = pThreadInfo->theadnums * 2; for (int i = pThreadInfo->pindex; pt[i] != 0; i += addstep){ if (pt[i] > 0) pThreadInfo->pnums -= phi(pt[i], pt[i + 1]); else pThreadInfo->pnums += phi(-pt[i], pt[i + 1]); } // printf("ThreadID = %4d, phi(%d) = %d\n", GetCurrentThreadId(), pThreadInfo->pindex, pThreadInfo->pnums); // printf("ThreadID = %4d, phi(%d) = %d\n", pthread_self(), pThreadInfo->pindex, pThreadInfo->pnums); return 0; } int64 multiThread(int theadnums) { int i; int64 pnums = 0; assert(theadnums < 2 * THRED_NUMS); for (i = 0; i < theadnums; i++){ Threadparam[i].pindex = 2 * i + 1; Threadparam[i].theadnums = theadnums; Threadparam[i].pnums = 0; } #ifdef _WIN32 HANDLE tHand[THRED_NUMS * 2]; DWORD threadID[THRED_NUMS * 2]; for (i = 0; i < theadnums; i++){ tHand[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)WIN32ThreadFun, (LPVOID)(&Threadparam[i]), 0, &threadID[i]); if (tHand[i] == NULL) printf("create Win32 thread error\n"); } WaitForMultipleObjects(theadnums, tHand, true, INFINITE); for (i = 0; i < theadnums; i++){ pnums += Threadparam[i].pnums; CloseHandle(tHand[i]); } #else pthread_t tid[THRED_NUMS]; for (i = 0; i < theadnums; i++){ int error = pthread_create(&tid[i], NULL, POSIXThreadFun, &Threadparam[i]); if ( error != 0 ) printf("Create pthread error: %d\n", error); } for (i = 0; i < theadnums; i++){ pthread_join(tid[i], NULL); pnums += Threadparam[i].pnums; } #endif return pnums; } #endif int freememory(int alloc) { int memsize = (x / y) + 100; if (alloc == 0){ int psize = (int) (memsize / log(memsize)); //printf("psize = %d memeszie = %d\n", psize, (int)(psize * 1.2) ); PI = new int [memsize + 100]; Prime = new int[(int)(psize * 1.2) + 100]; assert(PI && Prime); }else{ delete phiTable; delete minfactor; delete Prime; delete PI; } return alloc; } void init_phiTable( ) { clock_t start = clock(); int p, i, j; if (x < 1e10) ST = 6; if (ST > PI[y]) ST = PI[y]; for (i = 1; i <= ST; ++i){ Q *= Prime[i]; phiQ *= Prime[i] - 1; } phiTable = new int[Q + 10]; for (i = 0; i < Q; ++i) phiTable[i] = i; for (i = 1; i <= ST; ++i){ p = Prime[i]; for (j = Q - 1; j >= 0; --j) phiTable[j] -= phiTable[j / p]; } printf(" Q = %d, PhiQ = %d\n", Q, phiQ); printf(" init_phiTable time = %ld ms\n", clock() - start); } void init_minFactor( ) { clock_t start = clock(); int i, j, maxy = y + 10; int sqrty = (int)sqrt(maxy) + 1; minfactor = new int[maxy + 10]; for (i = 0; i < maxy; i++) minfactor[i] = i; for (i = 1; Prime[i] <= maxy; i++){ for (j = Prime[i]; j <= maxy; j += Prime[i]){ if (minfactor[j] == -j || minfactor[j] == j) minfactor[j] = -Prime[i]; else minfactor[j] = -minfactor[j]; } } for (i = 1; Prime[i] <= sqrty; i++){ int powp = Prime[i] * Prime[i]; for (j = powp; j <= maxy; j += powp) minfactor[j] = 0; } printf(" init_minFactor time = %ld ms\n", clock() - start); } int sieve( ) { clock_t start = clock(); int primes = 1; int maxp = (x / y) + 10; Prime[1] = 2; unsigned char *bitMask = (unsigned char *) PI; memset(bitMask, 0, (maxp + 64) >> COMP); for (int p = 3; p < maxp; p += 2){ if ( !(bitMask[p >> COMP] & MASKN(p)) ){ Prime[++primes] = p; if (p > maxp / p) continue; for (int j = p * p; j < maxp; j += p << 1) bitMask[j >> COMP] |= MASKN(j); } } Prime[0] = primes; Prime[primes] = 0; printf("pi(%d) = %d\n", maxp, primes); printf(" sieve time = %ld ms\n", clock() - start); return primes; } void init_x( ) { x = (int64)MAXN; x23 = (int)(pow(x, 2.0 / 3) + 0.01); x12 = (int)(pow(x, 1.0 / 2) + 0.01); x13 = (int)(pow(x, 1.0 / 3) + 0.01); x14 = (int)(pow(x, 1.0 / 4) + 0.01); assert((int64)x12 * x12 <= x); assert((int64)x13 * x13 * x13 <= x); assert((int64)x14 * x14 * x14 * x14 <= x); y = x13; printf(" x14 = %d, x13 = %d, x12 = %d x23 = %d, y = %d\n", x14, x13, x12, x23, y); freememory(0); } int64 cal_S0( ) { int64 S0 = x; for (int j = 2; j <= y; j++){ if (minfactor[j] > 0) S0 += x / j; else if (minfactor[j] < 0) S0 -= x / j; } printf("S0 = %I64d\n", S0); return S0; } // so bad performance in this function int64 cal_S3( ) { clock_t start = clock(); int i, p, a; int np = 1; int64 S3 = 0; for (i = 1; i <= PI[x14]; i++){ p = Prime[i]; a = PI[p] - 1; #ifdef PRINT_DEBUG assert(p <= x14 && a >= 0); #endif for (int m = y / p + 1; m <= y; m++){ int xx = x / (int64)(m * p); #ifndef MULTI_THREAD if (minfactor[m] > p) S3 -= phi(xx, a); else if (minfactor[m] < -p) S3 += phi(xx, a); #else if (minfactor[m] > p){ pt[np++] = xx; pt[np++] = a; } else if (minfactor[m] < -p){ pt[np++] = -xx; pt[np++] = a; } #endif } } #ifdef MULTI_THREAD printf("np = %d\n", np); if (np < 100) S3 = multiThread(1); else S3 = multiThread(THRED_NUMS); #endif printf("S3 = %I64d\n", S3); printf(" cal S3 time = %ld ms\n", clock() - start); return S3; } void init_PI( ) { clock_t start = clock(); int Px = 1; PI[0] = PI[1] = 0; for (int i = 1; Prime[i]; i++, Px++){ for (int j = Prime[i]; j <= Prime[i + 1]; j++) PI[j] = Px; } //printf(" Px = %d, primes = %d\n", Px, Prime[0]); printf(" PI[%d] = %d\n", Px, PI[Px - 1]); printf(" init_PI time = %ld ms\n", clock() - start); } int64 cal_P2xa( ) { int64 phi2 = (int64)PI[y] * (PI[y] - 1) / 2 - (int64)PI[x12] * (PI[x12] - 1) / 2; for (int i = PI[y] + 1; i <= PI[x12] + 0; i++){ int p = Prime[i]; #ifdef PRINT_DEBUG assert(p > y && p <= x12); #endif phi2 += PI[x / p]; } printf("P2xa(%I64d, %d) = %I64d\n", x, PI[y], phi2); return phi2; } int64 cal_S1( ) { int64 temp = PI[y] - PI[x13]; int64 S1 = temp * (temp - 1) / 2; printf("S1 = %I64d\n", S1); return S1; } int64 cal_U( ) { int64 p, U = 0; int sqrt_xdivy = (i