网上搜索大数据量排序问题时,www.jobcoding.com/big-data/bigdata-sampl 找到相关的介绍,在重复问题中有道题
已知某个文件内包含一些电话号码,每个号码为8位数字,统计不同号码的个数。
文章给了一段C代码,因为自己C++更熟悉一点,于是就动了把代码改写成C++的想法,于是就华了点时间改写成如下代码:
BitMap.h
#ifndef BITMAP_H
#define BITMAP_H
class BitMap{
public:
BitMap(int left=0,int right=1000);
~BitMap();
void setBit(int);
int getBit(int);
void clearBit(int);
void printBit();
private:
int wordOffset(int);
int bitOffset(int);
int min;
int max;
int *words;
};
#endif
BitMap.cpp
#include "BitMap.h"
#include <iostream>
#include <stdlib.h>
using namespace std;
#define ARRNUM 100
#define BITS_PER_WORD 32
BitMap::BitMap(int left,int right):min(left),max(right){
int N = max - min + 1;
words = new int[1 + N/BITS_PER_WORD];
if(words==NULL){
cout<<"new error\n"<<endl;
exit(0);
}
for(int i=0;i<N;++i)
clearBit(i);
}
BitMap::~BitMap(){
delete[] words;
words = NULL;
}
void BitMap::setBit(int n){
n -= min;
words[wordOffset(n)] |= (1 << bitOffset(n));
}
int BitMap::getBit(int n){
int bit = words[wordOffset(n)] & (1 << bitOffset(n));
return bit != 0;
}
void BitMap::clearBit(int n){
words[wordOffset(n)] &= ~(1 << bitOffset(n));
}
void BitMap::printBit(){
for(int i = 0;i < max-min+1;++i){
if(getBit(i))
cout<<(i+min)<<" ";
}
}
int BitMap::wordOffset(int b){
return b/BITS_PER_WORD;
}
int BitMap::bitOffset(int b){
return b%BITS_PER_WORD;
}
测试代码testBitMap.cpp
#include "BitMap.h"
#include "time.h"
#include <iostream>
#include <stdlib.h>
using namespace std;
#define ARRNUM 100
int main(){
BitMap bitMap(10000000,99999999);
int arr[ARRNUM];
srand( time(0) );
for(int i = 0;i < ARRNUM; ++i){
arr[i] = 10000000 + rand()%(99999999-10000000+1);
cout<<arr[i]<<" ";
}
cout<<"\n";
for(int i = 0; i < ARRNUM; ++i)
bitMap.setBit(arr[i]);
bitMap.printBit();
}