很多年前,老刘在群里问个问题:如何找到数字第一个非0的bit位置。
第一想法肯定是遍历,稍微想想肯定就会想到空间换时间的写法。
template<class T>
int traverseBitFind(T number, char maskArray[])
{
if (number == 0)
return -1;
unsigned char* array = (unsigned char*)&number;
int pos = 0;
for (int i = 0; i < sizeof(T); i++)
{
if (array[i] == 0)
{
pos += 8;
continue;
}
return pos + maskArray[array[i]];
}
return -1;
}
当时想起《编程珠玑》里面说的查找问题使用二分法,这个也是查找问题,那么如果使用二分法的话要怎么构造,当时和老刘讲述了下逻辑,但自己却从来没有手动写代码去实现过,这次采用低位非0 bit,逻辑简单点。
template<class T, int M> class MaskInit;
template<class T, int M> class MaskInit
{
public:
//enum { mask = (0x1 << M-1) | MaskInit<T, M-1>::mask };
static const T mask = ((T)0x1 << M-1) | MaskInit<T, M-1>::mask;
};
template<class T> class MaskInit<T, 1>
{
public:
static const T mask = 0x1;
//enum { mask = 0x1 };
};
template<class T>
class BinaryBitSearch
{
public:
static const T MASK = MaskInit< T, sizeof(T)<<2 >::mask;
static int cycleBitFind(T number)
{
if (number == 0)
return -1;
int pos = 0;
T mask = MASK;
int shift = sizeof(T)<<2;
while (shift > 0)
{
T tmp = number & mask;
if (tmp == 0)
{
pos += shift;
number >>= shift;
}
shift >>= 1;
mask >>= shift;
}
return pos;
}
};
单纯从理论上分析,二分法时间复杂度是O(log n), 而第一种算法是O(n),但因为位数很小,其实只有在64位时才有优势,因为实现根据类型自动选择算法。
template<bool V> class Bool2Type
{
};
template<int V>
class Power
{
public:
static const int value = 1 + Power<V>>1>::value;
};
template<> class Power<1>
{
public:
static const int value = 0;
};
template<class T>
class TimeComplexity
{
public:
static const bool value = Power<sizeof(T)*8>::value >= sizeof(T);
};
template<class T>
class BitFindWrap
{
public:
static int bitFind(T number)
{
return bitFind(number, Bool2Type<TimeComplexity<T>::value>());
}
private:
static int bitFind(T number, Bool2Type<true>)
{
return traverseBitFind<T>(number, maskArray);
}
static int bitFind(T number, Bool2Type<false>)
{
return BinaryBitSearch<T>::cycleBitFind(number);
}
};
因为是有与或运算,所以还是希望类型为整型,当传入整数类型时能在编译时期检查出错误
typedef TYPELIST_8(unsigned long, long, unsigned int, int, unsigned short, short, unsigned char, char) INNERTYPELIST;
template<class TList,class T> struct IndexOf;
template<class T> struct IndexOf<NullType, T>
{
enum { value = -1};
};
template<class Tail, class T>
struct IndexOf<Typelist<T, Tail>, T>
{
enum { value = 0 };
};
template<class H, class T, class N>
struct IndexOf<Typelist<H, T>, N>
{
private:
enum { temp = IndexOf<T,N>::value };
public:
enum { value = temp == -1 ? -1 : 1 + IndexOf<T, N>::value };
};
template<bool> struct CompileTimeError;
template<> struct CompileTimeError<true> {};
#define STATIC_CHECK(expr) \
{ CompileTimeError<(expr) != 0 >(); }
#define INNERTYPE_CHECK(type) \
STATIC_CHECK((IndexOf<INNERTYPELIST, type>::value != -1))
在书中有一个检查可以携带信息版本,但试了下发现不行,所以还是采用这个版本。
上面的选择方式会增加执行时的时间,并不理想,改用偏特化与宏来减少函数层数。
template<class T, bool Choice, bool InnerType> class BitFindClass;
template<class T>
class BitFindClass<T, false, true>
{
......
};
class BitFindClass<T, true, true>
{
......
};
#define BITFIND(Type, number) \
BitFindClass<Type, (TimeComplexity<Type>::value), (IndexOf<INNERTYPELIST, Type>::value != -1)>::bitFind(number)
第一版本是根据理论想象中的版本,事实上采用二分法的性能非常烂,与运算占用大量时间,后续文章是基于此版本进行优化处理,其性能在64位情况下是会优于第一个算法,但提升并不大。
参考:《C++设计新思维》
发现在这个博客不能携带附件,就把最终代码贴在后面好了:
utils.h
#include <sys/timeb.h>
#include <stdlib.h>
#include <stdio.h>
class NullType {};
template <class H,class T>
struct Typelist {
typedef H Head;
typedef T Tail;
};
//通过定义一些宏使得typelist线性化
#define TYPELIST_0() NullType
#define TYPELIST_1(T1) Typelist<T1,TYPELIST_0()>
#define TYPELIST_2(T1,T2) Typelist<T1,TYPELIST_1(T2)>
#define TYPELIST_3(T1,T2,T3) Typelist<T1,TYPELIST_2(T2,T3)>
#define TYPELIST_4(T1,T2,T3,T4) Typelist<T1,TYPELIST_3(T2,T3,T4)>
#define TYPELIST_5(T1,T2,T3,T4,T5) Typelist<T1,TYPELIST_4(T2,T3,T4,T5)>
#define TYPELIST_6(T1,T2,T3,T4,T5,T6) Typelist<T1,TYPELIST_5(T2,T3,T4,T5,T6)>
#define TYPELIST_7(T1,T2,T3,T4,T5,T6,T7) Typelist<T1,TYPELIST_6(T2,T3,T4,T5,T6,T7)>
#define TYPELIST_8(T1,T2,T3,T4,T5,T6,T7,T8) Typelist<T1,TYPELIST_7(T2,T3,T4,T5,T6,T7,T8)>
#define TYPELIST_9(T1,T2,T3,T4,T5,T6,T7,T8,T9) Typelist<T1,TYPELIST_8(T2,T3,T4,T5,T6,T7,T8,T9)>
#define TYPELIST_10(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10) Typelist<T1,TYPELIST_9(T2,T3,T4,T5,T6,T7,T8,T9,T10)>
typedef TYPELIST_8(unsigned long, long, unsigned int, int, unsigned short, short, unsigned char, char) INNERTYPELIST;
template<class TList,class T> struct IndexOf;
template<class T> struct IndexOf<NullType, T>
{
enum { value = -1};
};
template<class Tail, class T>
struct IndexOf<Typelist<T, Tail>, T>
{
enum { value = 0 };
};
template<class H, class T, class N>
struct IndexOf<Typelist<H, T>, N>
{
private:
enum { temp = IndexOf<T,N>::value };
public:
enum { value = temp == -1 ? -1 : 1 + IndexOf<T, N>::value };
};
//template<bool> struct CompileTimeChecker
//{
// CompileTimeChecker(...) {}
//};
//template<> struct CompileTimeChecker<false> {};
//#define STATIC_CHECK(expr, msg) \
// { \
// class ERROR_##msg {}; \
// sizeof(CompileTimeChecker<(expr)>(ERROR_##msg())); \
// }
template<bool> struct CompileTimeError;
template<> struct CompileTimeError<true> {};
#define STATIC_CHECK(expr) \
{ CompileTimeError<(expr) != 0 >(); }
#define INNERTYPE_CHECK(type) \
STATIC_CHECK((IndexOf<INNERTYPELIST, type>::value != -1))
template<bool V> class Bool2Type
{
};
template<int V>
class Power
{
public:
static const int value = 1 + Power<V>>1>::value;
};
template<> class Power<1>
{
public:
static const int value = 0;
};
static char maskArray[256];
void initTestArray(long array[])
{
long mask = 1;
for (int i = 0; i < 64; i++)
{
array[i] = mask;
mask <<= 1;
}
}
void initMaskArray(char maskArray[])
{
maskArray[0] = 8;
for (unsigned char i = 0xFF; i > 0; i--)
{
unsigned char mask = 0x1;
for (int j = 0; j < 8; j++)
{
if ((i & mask) != 0)
{
maskArray[i] = j;
break;
}
mask <<= 1;
}
}
}
long long systemtime()
{
timeb t;
ftime(&t);
return t.time*1000+t.millitm;
}
version1.cpp
#include "utils.h"
template<class T>
int traverseBitFind(T number, char maskArray[])
{
if (number == 0)
return -1;
unsigned char* array = (unsigned char*)&number;
int pos = 0;
for (int i = 0; i < sizeof(T); i++)
{
if (array[i] == 0)
{
pos += 8;
continue;
}
return pos + maskArray[array[i]];
}
return -1;
}
template<class T, int M> class MaskInit;
template<class T, int M> class MaskInit
{
public:
//enum { mask = (0x1 << M-1) | MaskInit<T, M-1>::mask };
static const T mask = ((T)0x1 << M-1) | MaskInit<T, M-1>::mask;
};
template<class T> class MaskInit<T, 1>
{
public:
static const T mask = 0x1;
//enum { mask = 0x1 };
};
template<class T>
class BinaryBitSearch
{
public:
static const T MASK = MaskInit< T, sizeof(T)<<2 >::mask;
static int cycleBitFind(T number)
{
if (number == 0)
return -1;
int pos = 0;
T mask = MASK;
int shift = sizeof(T)<<2;
while (shift > 0)
{
T tmp = number & mask;
if (tmp == 0)
{
pos += shift;
number >>= shift;
}
shift >>= 1;
mask >>= shift;
}
return pos;
}
};
template<class T>
class TimeComplexity
{
public:
static const bool value = (Power<sizeof(T)*8>::value >= sizeof(T));
};
template<class T>
class BitFindWrap
{
public:
static int bitFind(T number)
{
INNERTYPE_CHECK(T);
return bitFind(number, Bool2Type<TimeComplexity<T>::value>());
}
private:
static int bitFind(T number, Bool2Type<true>)
{
return traverseBitFind<T>(number, maskArray);
}
static int bitFind(T number, Bool2Type<false>)
{
return BinaryBitSearch<T>::cycleBitFind(number);
}
};
template<class T, bool Choice, bool InnerType> class BitFindClass;
template<class T>
class BitFindClass<T, false, true>
{
public:
static const T MASK = MaskInit< T, sizeof(T)<<2 >::mask;
static int bitFind(T number)
{
if (number == 0)
return -1;
int pos = 0;
T mask = MASK;
int shift = sizeof(T)<<2;
while (shift > 0)
{
T tmp = number & mask;
if (tmp == 0)
{
pos += shift;
number >>= shift;
}
shift >>= 1;
mask >>= shift;
}
return pos;
}
};
template<class T>
class BitFindClass<T, true, true>
{
public:
static int bitFind(T number)
{
if (number == 0)
return -1;
unsigned char* array = (unsigned char*)&number;
int pos = 0;
for (int i = 0; i < sizeof(T); i++)
{
if (array[i] == 0)
{
pos += 8;
continue;
}
return pos + maskArray[array[i]];
}
return -1;
}
};
#define BITFIND(Type, number) \
BitFindClass<Type, (TimeComplexity<Type>::value), (IndexOf<INNERTYPELIST, Type>::value != -1)>::bitFind(number)
int main(int argc, char* argv[])
{
initMaskArray(maskArray);
long array[64];
initTestArray(array);
long begTime = systemtime();
for (int i = 0; i < 1000000; i++)
for (int j = 0; j < 64; j++)
BitFindWrap<long>::bitFind(array[j]);
long test1 = systemtime()-begTime;
begTime = systemtime();
for (int i = 0; i < 1000000; i++)
for (int j = 0; j < 64; j++)
traverseBitFind<long>(array[j], maskArray);
long test2 = systemtime()-begTime;
begTime = systemtime();
for (int i = 0; i < 1000000; i++)
for (int j = 0; j < 64; j++)
BinaryBitSearch<long>::cycleBitFind(array[j]);
long test3 = systemtime()-begTime;
begTime = systemtime();
for (int i = 0; i < 1000000; i++)
for (int j = 0; j < 64; j++)
BITFIND(long, array[j]);
long test4 = systemtime()-begTime;
printf("BitFindWrap:%d, traverseBitFind:%d, BinaryBitSearch:%d, BITFIND:%d\n", test1, test2, test3, test4);
return 0;
}