亮点:实现了一个非常高效的跳越性调用函数,见代码part 4
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
//
//
//part1.
//以下是rand类的一个简单实现。比起rand(),它的函数调用的代码更简单(参考源代码),且:
//1.可以直接读取内态,这在某些场合下是需要用到的。
//2.通过定义不同的tprand变量,可以直接实现互不干涉的rand链。
//3.源码透明,有利于编译器做进一步优化。
/*
struct tprand
{
unsigned long r;
tprand(unsigned seed = 0) { r = seed; }
int operator()(void)
{
return((r = r * 214013L
+ 2531011L) >> 16) & 0x7fff;//关键链在于*链。
}
};
*/
//part2.
//在以上的基础上,可以拓展两个功能:
//jump和哈希式tprand(ul)调用。
/*
struct tprand
{
unsigned long r;
tprand(unsigned seed = 0) { r = seed; }
int operator()(void)
{
return((r = r * 214013L
+ 2531011L) >> 16) & 0x7fff;
}
int operator()(unsigned long r)const
{
return ((r * 214013L
+ 2531011L) >> 16) & 0x7fff;//竟然不能声明为static()。
}
int jump(int jp)//由于不涉及内存读取,【可以实现无限制循环展开。】非常tmd强!
{
for (int j = 0; j < jp; j++) {
r = r * 214013L + 2531011L;
}
return (r >> 16) & 0x7fff;
}//rand=jump(1),jump(0)可以实现0次的操作,即获得错过的值。
};
*/
//其中:
//tprand(ul)操作提供了对应的哈希。
//tprand()=jump(1),而jump(0)可以实现"0次"操作,即获得刚刚输出的随机值。也可以用tprand(::r)来实现。
//其中经检测,(release下)【jump和普通的tprand外循环效率没有差别,可见>>16和&0x7fff不在计算的关键路径内。】
//总之,jump的效率仍旧不足。
//part3.
//考虑到计算机整数计算的两个性质:【加法和乘法满足交换律和循环率】,结合【循环展开】的思想,可以进行如下优化:
/*
int jump(int jp)//由于不涉及顺序依赖,【意识到,可以实现【无限制循环展开】!以下是普通循环展开的示例。
{
int i = 0;
while (i < jp - 7)
{
//r = (((r * 214013L + 2531011L) * 214013L + 2531011L) * 214013L + 2531011L) * 214013L + 2531011L.......,;//【由于符合交换律和结合律】,可以看出,可以进行以下修改:
//等价于:r = r * (214013L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L) + (2531011L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L + 2531011L * 214013L+ 2531011L);
#if 0
void rfuncstr(int loop)//乘k次:
{
long rmut = 1, radd = 0;
for (int i = 0; i < loop; i++) {
radd += 2531011L * rmut;
rmut *= 214013L;
}
printf("r=r*(%ld)+(%ld)\n", rmut, radd);
}
#endif//等价于:
r = r * (-191841887) + 2115878600;//可以用rfuncstr获得。
i += 8;
}
for (; i < jp; i++) {
r = r * 214013L + 2531011L;
}
return (r >> 16) & 0x7fff;
}
*/
//part4.
//在3号优化的经验上,可以进行最终的优化,经过这次优化,"获得任意次调用后的结果",所需的时间变成了常数级别(对于一般的循环展开进行进一步化简):
/*
#if 0//用这个式子打印出最后的版本:
void rfuncstr(void)
{
long rmut = 214013L, radd = 2531011L;
for (int i = 0; i < 32; i++)
{
printf("if(jp&(1<<%d)){temp=temp*(%ld)+(%ld);}\n", i,rmut,radd);
radd = rmut * radd + radd;//r.next(a)=r*rmut(a)+radd(a),则r.next(a+a)=(r*rmut(a)+radd(a))*rmut(a)+radd(a).
rmut *= rmut;
}
}
#endif
int jump(unsigned jp)
{
unsigned long temp = r;
if (jp & (1 << 0)) { temp = temp * (214013) + (2531011); }
if (jp & (1 << 1)) { temp = temp * (-1443076087) + (505908858); }
if (jp & (1 << 2)) { temp = temp * (-570470319) + (159719620); }
if (jp & (1 << 3)) { temp = temp * (-191841887) + (2115878600); }
if (jp & (1 << 4)) { temp = temp * (1136269121) + (1043415696); }
if (jp & (1 << 5)) { temp = temp * (-762265983) + (-2108810976); }
if (jp & (1 << 6)) { temp = temp * (-2099004159) + (-2075229632); }
if (jp & (1 << 7)) { temp = temp * (-1085244927) + (-41532288); }
if (jp & (1 << 8)) { temp = temp * (-121310207) + (-542013184); }
if (jp & (1 << 9)) { temp = temp * (2048518145) + (-638119424); }
if (jp & (1 << 10)) { temp = temp * (376688641) + (1581130752); }
if (jp & (1 << 11)) { temp = temp * (-1243111423) + (1706838016); }
if (jp & (1 << 12)) { temp = temp * (-1882243071) + (1886949376); }
if (jp & (1 << 13)) { temp = temp * (-1348567039) + (1961959424); }
if (jp & (1 << 14)) { temp = temp * (-1623392255) + (971128832); }
if (jp & (1 << 15)) { temp = temp * (1048182785) + (-1278967808); }
if (jp & (1 << 16)) { temp = temp * (2096365569) + (1737031680); }
if (jp & (1 << 17)) { temp = temp * (-102236159) + (-820903936); }
if (jp & (1 << 18)) { temp = temp * (-204472319) + (-1641807872); }
if (jp & (1 << 19)) { temp = temp * (-408944639) + (1011351552); }
if (jp & (1 << 20)) { temp = temp * (-817889279) + (2022703104); }
if (jp & (1 << 21)) { temp = temp * (-1635778559) + (-249561088); }
if (jp & (1 << 22)) { temp = temp * (1023410177) + (-499122176); }
if (jp & (1 << 23)) { temp = temp * (2046820353) + (-998244352); }
if (jp & (1 << 24)) { temp = temp * (-201326591) + (-1996488704); }
if (jp & (1 << 25)) { temp = temp * (-402653183) + (301989888); }
if (jp & (1 << 26)) { temp = temp * (-805306367) + (603979776); }
if (jp & (1 << 27)) { temp = temp * (-1610612735) + (1207959552); }
if (jp & (1 << 28)) { temp = temp * (1073741825) + (-1879048192); }
if (jp & (1 << 29)) { temp = temp * (-2147483647) + (536870912); }
if (jp & (1 << 30)) { temp = temp * (1) + (1073741824); }
if (jp & (1 << 31)) { temp = temp * (1) + (-2147483648); }//注意值为1的参数出现了,显示了长周期计算后的规律性。
return ((r = temp) >> 16) & 0x7fff;
}
*/
//在这个基础上可以进一步优化(再次k*k展开),这个优化交给编译器进行。
//这个jump处理特殊的情形时,效率还会低于一开始的版本。为此,保留前者,并将这个常数级jump命名为deepjump。
//以下就是完整的代码:
struct tprand
{
unsigned long r;
tprand(unsigned seed=0) { r = seed; }
int operator()(void)
{
return((r = r * 214013L
+ 2531011L) >> 16) & 0x7fff;
}
int operator()(unsigned long r)const
{
return ((r * 214013L
+ 2531011L) >> 16) & 0x7fff;
}
int jump(int jp)
{
int i = 0;
while (i < jp - 7)
{
r = r * (-191841887) + 2115878600;
i += 8;
}
for (; i < jp; i++) {
r = r * 214013L + 2531011L;
}
return (r >> 16) & 0x7fff;
}
int deepjump(unsigned jp)
{
unsigned long temp = r;
if (jp & (1 << 0)) { temp = temp * (214013) + (2531011); }
if (jp & (1 << 1)) { temp = temp * (-1443076087) + (505908858); }
if (jp & (1 << 2)) { temp = temp * (-570470319) + (159719620); }
if (jp & (1 << 3)) { temp = temp * (-191841887) + (2115878600); }
if (jp & (1 << 4)) { temp = temp * (1136269121) + (1043415696); }
if (jp & (1 << 5)) { temp = temp * (-762265983) + (-2108810976); }
if (jp & (1 << 6)) { temp = temp * (-2099004159) + (-2075229632); }
if (jp & (1 << 7)) { temp = temp * (-1085244927) + (-41532288); }
if (jp & (1 << 8)) { temp = temp * (-121310207) + (-542013184); }
if (jp & (1 << 9)) { temp = temp * (2048518145) + (-638119424); }
if (jp & (1 << 10)) { temp = temp * (376688641) + (1581130752); }
if (jp & (1 << 11)) { temp = temp * (-1243111423) + (1706838016); }
if (jp & (1 << 12)) { temp = temp * (-1882243071) + (1886949376); }
if (jp & (1 << 13)) { temp = temp * (-1348567039) + (1961959424); }
if (jp & (1 << 14)) { temp = temp * (-1623392255) + (971128832); }
if (jp & (1 << 15)) { temp = temp * (1048182785) + (-1278967808); }
if (jp & (1 << 16)) { temp = temp * (2096365569) + (1737031680); }
if (jp & (1 << 17)) { temp = temp * (-102236159) + (-820903936); }
if (jp & (1 << 18)) { temp = temp * (-204472319) + (-1641807872); }
if (jp & (1 << 19)) { temp = temp * (-408944639) + (1011351552); }
if (jp & (1 << 20)) { temp = temp * (-817889279) + (2022703104); }
if (jp & (1 << 21)) { temp = temp * (-1635778559) + (-249561088); }
if (jp & (1 << 22)) { temp = temp * (1023410177) + (-499122176); }
if (jp & (1 << 23)) { temp = temp * (2046820353) + (-998244352); }
if (jp & (1 << 24)) { temp = temp * (-201326591) + (-1996488704); }
if (jp & (1 << 25)) { temp = temp * (-402653183) + (301989888); }
if (jp & (1 << 26)) { temp = temp * (-805306367) + (603979776); }
if (jp & (1 << 27)) { temp = temp * (-1610612735) + (1207959552); }
if (jp & (1 << 28)) { temp = temp * (1073741825) + (-1879048192); }
if (jp & (1 << 29)) { temp = temp * (-2147483647) + (536870912); }
if (jp & (1 << 30)) { temp = temp * (1) + (1073741824); }
if (jp & (1 << 31)) { temp = temp * (1) + (-2147483648); }
return ((r = temp) >> 16) & 0x7fff;
}
};
//测试代码如下:
int main()
{
//srand(1001);
tprand rnd(1001);
//for (int i = 0,j=0; i < 64; i++,j==7?j=0,putchar('\n'):j++) {
// printf("%5d,%-5d\t", rand(), rnd());
//}//tprand()得到了正确的行为。(调用k次后新的调用相等)
//进一步测试debug和release在jump上的效率:
clock_t from, to;
#define initrand 1001
#define testloop 1725//随便给一个值。
//库的rand:
srand(initrand);//初始化
from = clock();
for (int i = 0; i < 65536; i++)
for (int k = 0; k < testloop; k++)
rand();//jump testloop次。
to = clock();
printf("rand loop:%d-%lu\n", to - from, rand());//再次rand一次获得结果。
//rand类:
//普通循环:
rnd.r = initrand;
from = clock();
for (int i = 0; i < 65536; i++)
for (int k = 0; k < testloop; k++)
rnd();
to = clock();
printf("plain loop:%d-%lu\n", to - from, rnd());
//普通jump函数:
rnd.r = initrand;
from = clock();
for (int i = 0; i < 65536; i++)
rnd.jump(testloop);
to = clock();
printf("plain jump:%d-%lu\n", to - from, rnd());
//deep jump:
rnd.r = initrand;
from = clock();
for (int i = 0; i < 65536; i++)
rnd.deepjump(testloop);//deepjump具有最高的效率。
to = clock();
printf("deep jump:%d-%lu\n", to - from, rnd());
//以及:
rnd.deepjump(65536 * testloop);//检验略。
//得到的结论:tprand类的行为与rand()完全一致,且效率更优。
//deepjump和jump得到了正确的行为。(调用k次后新的调用相等)
//在需要长周期jump的情况下,deepjump的效率远高于其它的实现。
}
//一个输出示例:(vs release64下)
/*
rand loop:4531-12336
plain loop:87-12336
plain jump:29-12336
deep jump:0-12336
*/
有空的话,后边会更新一个利用类中的jump函数的例子。