利用类实现rand函数，以及相应的优化

最新推荐文章于 2024-05-04 23:45:19 发布
「已注销」
最新推荐文章于 2024-05-04 23:45:19 发布
阅读量281
点赞数
分类专栏：杂项文章标签：算法 c++
本文链接：https://blog.csdn.net/irmae/article/details/105585926
版权
杂项专栏收录该内容
8 篇文章 0 订阅
订阅专栏
亮点：实现了一个非常高效的跳越性调用函数，见代码part 4
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
//
//
//part1.
//以下是rand类的一个简单实现。比起rand(),它的函数调用的代码更简单(参考源代码)，且：
//1.可以直接读取内态，这在某些场合下是需要用到的。
//2.通过定义不同的tprand变量，可以直接实现互不干涉的rand链。
//3.源码透明，有利于编译器做进一步优化。
/*
struct tprand
{
	unsigned long r;
	tprand(unsigned seed = 0) { r = seed; }
	int operator()(void)
	{
		return((r = r * 214013L
			+ 2531011L) >> 16) & 0x7fff;//关键链在于*链。
	}
};
*/
//part2.
//在以上的基础上，可以拓展两个功能：
//jump和哈希式tprand(ul)调用。
/*
struct tprand
{
	unsigned long r;
	tprand(unsigned seed = 0) { r = seed; }
	int operator()(void)
	{
		return((r = r * 214013L
			+ 2531011L) >> 16) & 0x7fff;
	}
	int operator()(unsigned long r)const
	{
		return ((r * 214013L
			+ 2531011L) >> 16) & 0x7fff;//竟然不能声明为static()。
	}
	int jump(int jp)//由于不涉及内存读取，【可以实现无限制循环展开。】非常tmd强！
	{
		for (int j = 0; j < jp; j++) {
			r = r * 214013L + 2531011L;
		}
		return (r >> 16) & 0x7fff;
		
	}//rand=jump(1)，jump(0)可以实现0次的操作，即获得错过的值。
};
*/
//其中：
//tprand(ul)操作提供了对应的哈希。
//tprand()=jump(1),而jump(0)可以实现"0次"操作，即获得刚刚输出的随机值。也可以用tprand(::r)来实现。
//其中经检测，(release下)【jump和普通的tprand外循环效率没有差别，可见>>16和&0x7fff不在计算的关键路径内。】
//总之，jump的效率仍旧不足。
//part3.
//考虑到计算机整数计算的两个性质：【加法和乘法满足交换律和循环率】，结合【循环展开】的思想，可以进行如下优化：
/*
int jump(int jp)//由于不涉及顺序依赖，【意识到，可以实现【无限制循环展开】！以下是普通循环展开的示例。
	{
		int i = 0;
		while (i < jp - 7)
		{
			//r = (((r * 214013L + 2531011L) * 214013L + 2531011L) * 214013L + 2531011L) * 214013L + 2531011L.......,;//【由于符合交换律和结合律】，可以看出，可以进行以下修改：
			//等价于：r = r * (214013L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L) + (2531011L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L * 214013L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L * 214013L + 2531011L * 214013L * 214013L + 2531011L * 214013L+ 2531011L);
			#if 0
			void rfuncstr(int loop)//乘k次：
			{
				long rmut = 1, radd = 0;
				for (int i = 0; i < loop; i++) {
					radd += 2531011L * rmut;
					rmut *= 214013L;
				}
				printf("r=r*(%ld)+(%ld)\n", rmut, radd);
			}
			#endif//等价于：
			r = r * (-191841887) + 2115878600;//可以用rfuncstr获得。
			i += 8;
		}
		for (; i < jp; i++) {
			r = r * 214013L + 2531011L;
		}
		return (r >> 16) & 0x7fff;
	}
*/
//part4.
//在3号优化的经验上，可以进行最终的优化，经过这次优化，"获得任意次调用后的结果"，所需的时间变成了常数级别(对于一般的循环展开进行进一步化简)：
/*
#if 0//用这个式子打印出最后的版本：
void rfuncstr(void)
{
	long rmut = 214013L, radd = 2531011L;
	for (int i = 0; i < 32; i++)
	{
		printf("if(jp&(1<<%d)){temp=temp*(%ld)+(%ld);}\n", i,rmut,radd);
		radd = rmut * radd + radd;//r.next(a)=r*rmut(a)+radd(a),则r.next(a+a)=(r*rmut(a)+radd(a))*rmut(a)+radd(a).
		rmut *= rmut;
	}
}
#endif

int jump(unsigned jp)
{
unsigned long temp = r;
if (jp & (1 << 0)) { temp = temp * (214013) + (2531011); }
if (jp & (1 << 1)) { temp = temp * (-1443076087) + (505908858); }
if (jp & (1 << 2)) { temp = temp * (-570470319) + (159719620); }
if (jp & (1 << 3)) { temp = temp * (-191841887) + (2115878600); }
if (jp & (1 << 4)) { temp = temp * (1136269121) + (1043415696); }
if (jp & (1 << 5)) { temp = temp * (-762265983) + (-2108810976); }
if (jp & (1 << 6)) { temp = temp * (-2099004159) + (-2075229632); }
if (jp & (1 << 7)) { temp = temp * (-1085244927) + (-41532288); }
if (jp & (1 << 8)) { temp = temp * (-121310207) + (-542013184); }
if (jp & (1 << 9)) { temp = temp * (2048518145) + (-638119424); }
if (jp & (1 << 10)) { temp = temp * (376688641) + (1581130752); }
if (jp & (1 << 11)) { temp = temp * (-1243111423) + (1706838016); }
if (jp & (1 << 12)) { temp = temp * (-1882243071) + (1886949376); }
if (jp & (1 << 13)) { temp = temp * (-1348567039) + (1961959424); }
if (jp & (1 << 14)) { temp = temp * (-1623392255) + (971128832); }
if (jp & (1 << 15)) { temp = temp * (1048182785) + (-1278967808); }
if (jp & (1 << 16)) { temp = temp * (2096365569) + (1737031680); }
if (jp & (1 << 17)) { temp = temp * (-102236159) + (-820903936); }
if (jp & (1 << 18)) { temp = temp * (-204472319) + (-1641807872); }
if (jp & (1 << 19)) { temp = temp * (-408944639) + (1011351552); }
if (jp & (1 << 20)) { temp = temp * (-817889279) + (2022703104); }
if (jp & (1 << 21)) { temp = temp * (-1635778559) + (-249561088); }
if (jp & (1 << 22)) { temp = temp * (1023410177) + (-499122176); }
if (jp & (1 << 23)) { temp = temp * (2046820353) + (-998244352); }
if (jp & (1 << 24)) { temp = temp * (-201326591) + (-1996488704); }
if (jp & (1 << 25)) { temp = temp * (-402653183) + (301989888); }
if (jp & (1 << 26)) { temp = temp * (-805306367) + (603979776); }
if (jp & (1 << 27)) { temp = temp * (-1610612735) + (1207959552); }
if (jp & (1 << 28)) { temp = temp * (1073741825) + (-1879048192); }
if (jp & (1 << 29)) { temp = temp * (-2147483647) + (536870912); }
if (jp & (1 << 30)) { temp = temp * (1) + (1073741824); }
if (jp & (1 << 31)) { temp = temp * (1) + (-2147483648); }//注意值为1的参数出现了，显示了长周期计算后的规律性。
return ((r = temp) >> 16) & 0x7fff;
	}
*/
//在这个基础上可以进一步优化(再次k*k展开)，这个优化交给编译器进行。
//这个jump处理特殊的情形时，效率还会低于一开始的版本。为此，保留前者，并将这个常数级jump命名为deepjump。
//以下就是完整的代码：
struct tprand
{
	unsigned long r;
	tprand(unsigned seed=0) { r = seed; }
	int operator()(void)
	{
		return((r = r * 214013L
			+ 2531011L) >> 16) & 0x7fff;
	}
	int operator()(unsigned long r)const
	{
		return ((r * 214013L
			+ 2531011L) >> 16) & 0x7fff;
	}
	int jump(int jp)
	{
		int i = 0;
		while (i < jp - 7)
		{
			r = r * (-191841887) + 2115878600;
			i += 8;
		}
		for (; i < jp; i++) {
			r = r * 214013L + 2531011L;
		}
		return (r >> 16) & 0x7fff;
	}
	int deepjump(unsigned jp)
	{
		unsigned long temp = r;
		if (jp & (1 << 0)) { temp = temp * (214013) + (2531011); }
		if (jp & (1 << 1)) { temp = temp * (-1443076087) + (505908858); }
		if (jp & (1 << 2)) { temp = temp * (-570470319) + (159719620); }
		if (jp & (1 << 3)) { temp = temp * (-191841887) + (2115878600); }
		if (jp & (1 << 4)) { temp = temp * (1136269121) + (1043415696); }
		if (jp & (1 << 5)) { temp = temp * (-762265983) + (-2108810976); }
		if (jp & (1 << 6)) { temp = temp * (-2099004159) + (-2075229632); }
		if (jp & (1 << 7)) { temp = temp * (-1085244927) + (-41532288); }
		if (jp & (1 << 8)) { temp = temp * (-121310207) + (-542013184); }
		if (jp & (1 << 9)) { temp = temp * (2048518145) + (-638119424); }
		if (jp & (1 << 10)) { temp = temp * (376688641) + (1581130752); }
		if (jp & (1 << 11)) { temp = temp * (-1243111423) + (1706838016); }
		if (jp & (1 << 12)) { temp = temp * (-1882243071) + (1886949376); }
		if (jp & (1 << 13)) { temp = temp * (-1348567039) + (1961959424); }
		if (jp & (1 << 14)) { temp = temp * (-1623392255) + (971128832); }
		if (jp & (1 << 15)) { temp = temp * (1048182785) + (-1278967808); }
		if (jp & (1 << 16)) { temp = temp * (2096365569) + (1737031680); }
		if (jp & (1 << 17)) { temp = temp * (-102236159) + (-820903936); }
		if (jp & (1 << 18)) { temp = temp * (-204472319) + (-1641807872); }
		if (jp & (1 << 19)) { temp = temp * (-408944639) + (1011351552); }
		if (jp & (1 << 20)) { temp = temp * (-817889279) + (2022703104); }
		if (jp & (1 << 21)) { temp = temp * (-1635778559) + (-249561088); }
		if (jp & (1 << 22)) { temp = temp * (1023410177) + (-499122176); }
		if (jp & (1 << 23)) { temp = temp * (2046820353) + (-998244352); }
		if (jp & (1 << 24)) { temp = temp * (-201326591) + (-1996488704); }
		if (jp & (1 << 25)) { temp = temp * (-402653183) + (301989888); }
		if (jp & (1 << 26)) { temp = temp * (-805306367) + (603979776); }
		if (jp & (1 << 27)) { temp = temp * (-1610612735) + (1207959552); }
		if (jp & (1 << 28)) { temp = temp * (1073741825) + (-1879048192); }
		if (jp & (1 << 29)) { temp = temp * (-2147483647) + (536870912); }
		if (jp & (1 << 30)) { temp = temp * (1) + (1073741824); }
		if (jp & (1 << 31)) { temp = temp * (1) + (-2147483648); }
		return ((r = temp) >> 16) & 0x7fff;
	}
};

//测试代码如下：
int main()
{
	//srand(1001);
	tprand rnd(1001);
	//for (int i = 0,j=0; i < 64; i++,j==7?j=0,putchar('\n'):j++) {
	//	printf("%5d,%-5d\t", rand(), rnd());
	//}//tprand()得到了正确的行为。(调用k次后新的调用相等)
	//进一步测试debug和release在jump上的效率：
	clock_t from, to;
#define initrand 1001
#define testloop 1725//随便给一个值。
	//库的rand：
	srand(initrand);//初始化
	from = clock();
	for (int i = 0; i < 65536; i++)
		for (int k = 0; k < testloop; k++)
			rand();//jump testloop次。
	to = clock();
	printf("rand loop:%d-%lu\n", to - from, rand());//再次rand一次获得结果。
	//rand类：
	//普通循环：
	rnd.r = initrand;
	from = clock();
	for (int i = 0; i < 65536; i++)
		for (int k = 0; k < testloop; k++)
			rnd();
	to = clock();
	printf("plain loop:%d-%lu\n", to - from, rnd());
	//普通jump函数：
	rnd.r = initrand;
	from = clock();
	for (int i = 0; i < 65536; i++)
		rnd.jump(testloop);
	to = clock();
	printf("plain jump:%d-%lu\n", to - from, rnd());
	//deep jump：
	rnd.r = initrand;
	from = clock();
	for (int i = 0; i < 65536; i++)
		rnd.deepjump(testloop);//deepjump具有最高的效率。
	to = clock();
	printf("deep jump:%d-%lu\n", to - from, rnd());
	//以及：
	rnd.deepjump(65536 * testloop);//检验略。
	//得到的结论：tprand类的行为与rand()完全一致，且效率更优。
	//deepjump和jump得到了正确的行为。(调用k次后新的调用相等)
	//在需要长周期jump的情况下，deepjump的效率远高于其它的实现。
}
//一个输出示例：(vs release64下)
/*
rand loop:4531-12336
plain loop:87-12336
plain jump:29-12336
deep jump:0-12336
*/