求二进制数中1的个数

#ifndef PROB21_H
#define PROB21_H
#include <iostream>

/*** this problem counts 1s in a 8bit num
 * More reference can be seen in
 * http://en.wikipedia.org/wiki/Hamming_weight
 * http://blog.csdn.net/bvbook/archive/2008/04/15/2292823.aspx
 */

// Byte has to be set to unsigned char
// or big number will be cast to negative
typedef unsigned char Byte;

/*** this version uses division and module operation
 * to shrink v and test 1
 * the drawback is that module and division operation is slow
 */
int count1(Byte v)
{
    int num = 0;
    while(v)
    {
        if(v % 2 == 1)
            num++;
        v /= 2;
    }
    return num;
}

/*** this version use bit operation and shift operation
 * to speed version 1
 */
int count2(Byte v)
{
    int num = 0;
    while(v)
    {
        num += (v & 0x01);
        v >>= 1;
    }
    return num;
}

/*** the above version is O(number of bits)
 * this version is only related to the number of 1s in v
 */
int count3(Byte v)
{
    int num = 0;
    while(v)
    {
        v &= (v-1);
        num++;
    }
    return num;
}


/***
 * SUMMARY: the above 3 versions is all using loop structure
 * But according CSAPP, branch penalty is very 10s CPU circles
 */


/*** we need a more efficient algorithm
 * we can count all the numbers in advance
 * and then look them
 * int count(v)
 * {
 *     int num = 0;
 *     switch (v)
 *     {
 *          case 0x1:
 *          case 0x2:
 *          .......
 *     }
 * }
 * However, the above function is slower than version1,2,3,
 * because the requires 128 comparisions in average
 * better solution is to build a bookup table
 */
int countTable[256]={0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
                            1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
                            1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
                            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
                            1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
                            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
                            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
                            3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
                            1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
                            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
                            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
                            3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
                            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
                            3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
                            3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
                            4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};

int count4(Byte v)
{
    return countTable[(unsigned)v];
}
// lookup table seems to be fast, but it may access memory not in caches
// it may use several hundrands of CPU circle


/***
 * extension is to count the number in DWORD
 * we cannot simply use lookup table here
 * we need to adjust version 4 slightly
 */
typedef unsigned int DWORD;
int count5(DWORD d)
{
    int num = 0;
    num += countTable[d&0xff];
    d >>= 8;
    num += countTable[d&0xff];
    d >>= 8;
    num += countTable[d&0xff];
    d >>= 8;
    num += countTable[d&0xff];
    d >>= 8;
    return num;
}


/*** the problem is actually the digit sum of the binary representation
 *   of a given number and the ℓ₁ norm of a bit vector.
 *   this is the hamming weight of bit vector.
 *   more reference can be seen
 *   http://en.wikipedia.org/wiki/Hamming_weight
 *   the below code is copied from wiki
 */
//types and constants used in the functions below
typedef long long uint64_t;
const uint64_t m1  = 0x5555555555555555; //binary: 0101...
const uint64_t m2  = 0x3333333333333333; //binary: 00110011..
const uint64_t m4  = 0x0f0f0f0f0f0f0f0f; //binary:  4 zeros,  4 ones ...
const uint64_t m8  = 0x00ff00ff00ff00ff; //binary:  8 zeros,  8 ones ...
const uint64_t m16 = 0x0000ffff0000ffff; //binary: 16 zeros, 16 ones ...
const uint64_t m32 = 0x00000000ffffffff; //binary: 32 zeros, 32 ones
const uint64_t hff = 0xffffffffffffffff; //binary: all ones
const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...

//This is a naive implementation, shown for comparison,
//and to help in understanding the better functions.
//It uses 24 arithmetic operations (shift, add, and).
int popcount_1(uint64_t x) {
    x = (x & m1 ) + ((x >>  1) & m1 ); //put count of each  2 bits into those  2 bits
    x = (x & m2 ) + ((x >>  2) & m2 ); //put count of each  4 bits into those  4 bits
    x = (x & m4 ) + ((x >>  4) & m4 ); //put count of each  8 bits into those  8 bits
    x = (x & m8 ) + ((x >>  8) & m8 ); //put count of each 16 bits into those 16 bits
    x = (x & m16) + ((x >> 16) & m16); //put count of each 32 bits into those 32 bits
    x = (x & m32) + ((x >> 32) & m32); //put count of each 64 bits into those 64 bits
    return x;
}

//This uses fewer arithmetic operations than any other known
//implementation on machines with slow multiplication.
//It uses 17 arithmetic operations.
int popcount_2(uint64_t x) {
    x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
    x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
    x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits
    x += x >>  8;  //put count of each 16 bits into their lowest 8 bits
    x += x >> 16;  //put count of each 32 bits into their lowest 8 bits
    x += x >> 32;  //put count of each 64 bits into their lowest 8 bits
    return x & 0x7f;
}

//This uses fewer arithmetic operations than any other known
//implementation on machines with fast multiplication.
//It uses 12 arithmetic operations, one of which is a multiply.
int popcount_3(uint64_t x) {
    x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
    x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
    x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits
    return (x * h01)>>56;  //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
}
// the methods in wikipedia are actually faster
// it can be done without access to memory



/*** GCC provides build-in function to do it
*int __builtin_popcount (unsigned int x);
*int count6(unsigned int x)
*{
*    return __buildin_popcount(x);
*}
*/

/*** actually, SSE support population count
 * command: POPCNT
 */
#endif // PROB21_H

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值