水仙花数指的是一个 n 位数 ( n≥3 ),它的每个位上的数字的 n 次幂之和等于它本身。(例如:1^3 + 5^3 + 3^3 = 153)
下面代码,求19位,在E7200 2.53G的win xp 32b上耗时1秒左右。欢迎大家指出更多的优化方法。
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
unsigned long long g_maxValue;
unsigned long long g_minValue;
unsigned long long POWXN[10];
void preCalculation(unsigned char n)
{
g_maxValue = pow((long double)10, n);
g_minValue = pow((long double)10, n-1);
POWXN[0] = 0ui64;
POWXN[1] = 1ui64;
for(int i = 2ui64; i < 10ui64; i++)
{
POWXN[i] = (unsigned long long)(pow((long double)i, n) + 0.5);
}
}
bool search(unsigned long long sum, char *nums, unsigned char n)
{
char *p;
int c = 0;
int i = 0;
int e = n;
char tmp;
char buf[256];
memcpy(buf,nums,n);
/* // 19位时,交换花费的计算量仍然超过节约下来的搜索花费。
for(i = 0; i < n; ++i)
{
c = sum%10;
sum /= 10;
p = (char*)memchr(buf,c,e);
if(NULL == p)
{
return false;
}
else
{
--e;
if(p != buf + e)
{
tmp = buf[e];
buf[e] = *p;
*p = tmp;
}
}
}
*/
for(i = 0; i < n; ++i)
{
c = sum%10;
sum /= 10;
p = (char*)memchr(buf,c,n);
if(NULL == p)
{
return false;
}
else
{
*p = 'n';
}
}
return true;
}
int NarcissusNum(unsigned char n, unsigned long long *out)
{
preCalculation(n);
char *nums = (char *)malloc(sizeof(char) * n);
memset(nums, 0, sizeof(char) * n);
unsigned long long sum = 0;
int c = 0;
// 遍历所有组合。不用排列,和即是所需排列
// 000001, 000002, 000003, ... ,000009, 000011, 000012, ..., 000019, 000022, 000023, ..., 000111, ..., 00122, ...,
// 001111, ..., 011111, ..., 111111, ..., 899999, 999999
int xx = 0;
for(;;)
{
while(nums[0] <= 9)
{ //xx++;
sum = 0;
for(unsigned char j = 0; j < n; ++j)
{
sum += POWXN[nums[j]];
}
if(sum < g_minValue)
{
nums[0]++;
continue;
}
else if(sum > g_maxValue)
{
//break;
// 寻找下一个不超过最大值的组合。
// 与直接break出去比较,位数越大,优化效果越明显。19位时大约节约10~20ms
unsigned char i = 1;
unsigned char ii;
unsigned long long tmp = sum;
while(tmp > g_maxValue && i < n)
{
tmp = sum;
for(ii = 0; ii < i; ++ii)
{
if(9 == nums[i])
{
++i;
continue;
}
tmp -= POWXN[nums[ii]];
tmp += POWXN[nums[i]+1];
}
++i;
}
--i;
++nums[i];
for(unsigned char ii = 0; ii < i; ++ii)
{
nums[ii] = nums[i];
}
}
else
{
if(search(sum, nums, n))
{
*out = sum;
out++;
}
}
nums[0]++;
}
// 组合进位。
// 00009 => 000011, 01299 => 01333, 39999 => 44444
unsigned char k = 1;
for(; k < n; ++k)
{
nums[k]++;
if(nums[k] < 10)
{
for(; k > 1; --k)
{
nums[k-1] = nums[k];
}
break;
}
}
if(nums[n-1] > 9)
{
free(nums);
return xx;
}
nums[0] = nums[1];
}
free(nums);
return xx;
}
// 校验结果
bool Check(unsigned long long val, unsigned char n)
{
unsigned long long tmp = val;
unsigned long long sum = 0;
unsigned char c;
while(0i64 != tmp)
{
c = tmp % 10;
tmp /= 10;
sum += (unsigned long long )pow((long double)c, n);
}
if(sum == val)
return true;
return false;
}
int _tmain(int argc, _TCHAR* argv[])
{
FILETIME ct;
FILETIME et;
FILETIME kt;
FILETIME ut;
__int64 ktStart=0;
__int64 utStart=0;
__int64 ktStop=0;
__int64 utStop=0;
LARGE_INTEGER ss,ee,fq;
HANDLE cur;
int i,j,dump;
cur = GetCurrentThread();
QueryPerformanceCounter(&ss); //QueryPerformanceCounter的单位应该是cpu的时间周期时间,1/cpu频率
GetThreadTimes(cur,&ct,&et,&kt,&ut); // 起始时的内核时间和用户时间。不过精度也就10ms级
ktStart = (((__int64)kt.dwHighDateTime)<<32) + (__int64)kt.dwLowDateTime;
utStart = (((__int64)ut.dwHighDateTime)<<32) + (__int64)ut.dwLowDateTime;
unsigned long long nums[10] = {0};
unsigned char n = 19;
for(int c = 0; c < 10; c++)
{
printf("%d/n",NarcissusNum(n, nums));
}
GetThreadTimes(cur,&ct,&et,&kt,&ut); // 结束时的内核时间和用户时间
ktStop = (((__int64)kt.dwHighDateTime)<<32) + (__int64)kt.dwLowDateTime;
utStop = (((__int64)ut.dwHighDateTime)<<32) + (__int64)ut.dwLowDateTime;
printf("Kernel Time: %llu ms/n", (ktStop - ktStart)/10000); //FILETIME的单位是100ns,除10000变ms
printf("User Time: %llu ms/n", (utStop - utStart)/10000);
QueryPerformanceCounter(&ee);
QueryPerformanceFrequency(&fq);
// QueryPerformanceCounter高精度的,但记得并不是代码实际消耗的时间。
printf("Time: %llu us/n", (ee.QuadPart-ss.QuadPart)/(fq.QuadPart/1000000));
for(int i = 0; i < 10 && 0 != nums[i]; i++)
{
if(Check(nums[i], n))
{
printf("%llu/n", nums[i]);
}
else
{
printf("%llu worng/n", nums[i]);
}
}
return 0;
}
优化了下,800ms
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
unsigned long long g_maxValue;
unsigned long long g_minValue;
unsigned long long POWXN[10];
unsigned long long INC2N[10];
void preCalculation(unsigned char n)
{
g_maxValue = pow((long double)10, n);
g_minValue = pow((long double)10, n-1);
POWXN[0] = 0ui64;
POWXN[1] = 1ui64;
for(int i = 2; i < 10; i++)
{
POWXN[i] = (unsigned long long)(pow((long double)i, n) + 0.5);
}
INC2N[0] = 0;
INC2N[1] = 1;
for(int i = 2; i < 10; i++)
{
INC2N[i] = POWXN[i] - POWXN[i-1];
}
}
bool search(unsigned long long sum, char *nums, unsigned char n)
{
char c;
unsigned char nn;
unsigned char i;
unsigned char j;
char buf[256];
char *p=buf;
memcpy(buf,nums,n);
/* for(i = 0; i < n; ++i)
{
c = sum%10;
sum /= 10;
p = (char*)memchr(buf,c,n);
if(NULL == p)
{
return false;
}
else
{
*p = 'n';
}
}*/
if(n < 4)
{
nn = 0;
}
else
{
nn = n - 4;
}
for(i = 0; i < n; i++)
{
c = sum%10;
sum /= 10;
for(j=0; j < nn; j+=4)
{
if(c == p[j])
{
p[j] = 'n';
c = 'f';
break;
}
else if(c == p[j+1])
{
p[j+1] = 'n';
c = 'f';
break;
}
else if(c == p[j+2])
{
p[j+2] = 'n';
c = 'f';
break;
}
else if(c == p[j+3])
{
p[j+3] = 'n';
c = 'f';
break;
}
}
if('f' != c)
{
for(; j < n; j++)
{
if(c == p[j])
{
p[j] = 'n';
c = 'f';
break;
}
}
if('f'!= c)
return false;
}
}
return true;
}
int NarcissusNum(unsigned char n, unsigned long long *out)
{
char c;
unsigned char i,j,k,nn;
char buf[256];
unsigned long long tmp;
char *nums;
int xx = 0;
unsigned long long sum = 0;
nums = (char *)malloc(sizeof(char) * n + 1);
memset(nums, 0, sizeof(char) * n + 1);
memset(buf,0,256);
if(n < 4)
{
nn = 0;
}
else
{
nn = n - 4;
}
preCalculation(n);
// 遍历所有组合。不用排列,和即是所需排列
// 000001, 000002, 000003, ... ,000009, 000011, 000012, ..., 000019, 000022, 000023, ..., 000111, ..., 00122, ...,
// 001111, ..., 011111, ..., 111111, ..., 899999, 999999
for(;;)
{
while(nums[0] <= 9)
{ //++xx;
sum += INC2N[nums[0]];
if(sum < g_minValue)
{
++nums[0];
continue;
}
else if(sum > g_maxValue)
{//++xx;
//break;
// 寻找下一个不超过最大值的组合。
// 与直接break出去比较,位数越大,优化效果越明显。
i = 1;
tmp = sum;
// 假设1 3 8 9 9过大,1 4 4 4 4在范围内
while(tmp > g_maxValue && i < n)
{
tmp = sum;
// 第一次1 3 9 9 9 仍然过大
// 第二次1 4 4 4 4 范围内
for(j = 0; j <= i; ++j)
{
if(9 == nums[i])
{
++i;
--j;
continue;
}
tmp -= POWXN[nums[j]];
tmp += POWXN[nums[i]+1];
}
++i;
}
// tmp 为 1 4 4 4 4的和
--i;
if(9 == nums[n - 1])
{
free(nums);
return xx;
//break;
}
++nums[i];
for(j = 0; j < i; ++j)
{
nums[j] = nums[i];
}
// sum 降为 1 4 4 4 3的和,上面循环处再加回去。
sum = tmp - INC2N[nums[0]];
}
else
{//++xx;
/* 19位下,掉用函数大概慢了40ms
if(search(sum, nums, n))
{
*out = sum;
++out;
}
++nums[0];
*/
memcpy(buf,nums,n);
tmp = sum;
for(i = 0; i < n; i++)
{
c = tmp%10;
tmp /= 10;
// 展开循环
for(j=0; j < nn; j+=4)
{
if(c == buf[j])
{
buf[j] = 'n';
c = 'f';
break;
}
else if(c == buf[j+1])
{
buf[j+1] = 'n';
c = 'f';
break;
}
else if(c == buf[j+2])
{
buf[j+2] = 'n';
c = 'f';
break;
}
else if(c == buf[j+3])
{
buf[j+3] = 'n';
c = 'f';
break;
}
}
if('f' != c)
{
for(; j < n; j++)
{
if(c == buf[j])
{
buf[j] = 'n';
c = 'f';
break;
}
}
if('f'!= c)
{
goto NEXT;
}
}
}
*out = sum;
++out;
NEXT:
++nums[0];
}
}
// 组合进位。
// 00009 => 0000a => 000011, 01299 => 012aa => 01333, 39999 => 3aaaa => 44444
//++xx;
for(k = 1; k < n; ++k)
{
nums[k]++;
if(nums[k] < 10)
{
for(; k > 0; --k)
{
nums[k-1] = nums[k];
}
break;
}
}
if(nums[n-1] > 9)
{
free(nums);
return xx;
}
sum = POWXN[nums[0]-1]; // 进位后,最低位(nums[0])不可能是0
for(j = 1; j < n; ++j)
{
sum += POWXN[nums[j]];
}
}
free(nums);
return xx;
}
// 校验结果
bool Check(unsigned long long val, unsigned char n)
{
unsigned long long tmp = val;
unsigned long long sum = 0;
unsigned char c;
while(0i64 != tmp)
{
c = tmp % 10;
tmp /= 10;
sum += (unsigned long long )pow((long double)c, n);
}
if(sum == val)
return true;
return false;
}
int _tmain(int argc, _TCHAR* argv[])
{
FILETIME ct;
FILETIME et;
FILETIME kt;
FILETIME ut;
__int64 ktStart=0;
__int64 utStart=0;
__int64 ktStop=0;
__int64 utStop=0;
LARGE_INTEGER ss,ee,fq;
HANDLE cur;
cur = GetCurrentThread();
QueryPerformanceCounter(&ss); //QueryPerformanceCounter的单位应该是cpu的时间周期时间,1/cpu频率
GetThreadTimes(cur,&ct,&et,&kt,&ut); // 起始时的内核时间和用户时间。不过精度也就10ms级
ktStart = (((__int64)kt.dwHighDateTime)<<32) + (__int64)kt.dwLowDateTime;
utStart = (((__int64)ut.dwHighDateTime)<<32) + (__int64)ut.dwLowDateTime;
unsigned long long nums[10] = {0};
unsigned char n = 19;
for(int c = 0; c < 10; c++)
{
NarcissusNum(n, nums);
//printf("%d/n",NarcissusNum(n, nums));
/*NarcissusNum(c, nums);
printf("%d:/n",c);
for(int i = 0; i < 10 && 0 != nums[i]; i++)
{
if(Check(nums[i], c))
{
printf("%llu/n", nums[i]);
}
else
{
printf("%llu worng/n", nums[i]);
}
}
memset(nums,0,sizeof(unsigned long long) * 10);*/
}
GetThreadTimes(cur,&ct,&et,&kt,&ut); // 结束时的内核时间和用户时间
ktStop = (((__int64)kt.dwHighDateTime)<<32) + (__int64)kt.dwLowDateTime;
utStop = (((__int64)ut.dwHighDateTime)<<32) + (__int64)ut.dwLowDateTime;
printf("Kernel Time: %llu ms/n", (ktStop - ktStart)/10000); //FILETIME的单位是100ns,除10000变ms
printf("User Time: %llu ms/n", (utStop - utStart)/10000);
QueryPerformanceCounter(&ee);
QueryPerformanceFrequency(&fq);
// QueryPerformanceCounter高精度的,但记得并不是代码实际消耗的时间。
printf("Time: %llu us/n", (ee.QuadPart-ss.QuadPart)/(fq.QuadPart/1000000));
//printf("Time: %llu ns/n", (ee.QuadPart-ss.QuadPart)/(fq.QuadPart/1000000000));
for(int i = 0; i < 10 && 0 != nums[i]; i++)
{
if(Check(nums[i], n))
{
printf("%llu/n", nums[i]);
}
else
{
printf("%llu worng/n", nums[i]);
}
}
return 0;
}