用位图法实现海量数据排序和重复问题
C
#include "stdafx.h"
#include <time.h>
#define BITWORD 32
#define ARRNUM 10000
int mmin = 10000000;
int mmax = 99999999;
int N = (mmax-mmin+1); //存储的最大数字
#define BITS_PER_WORD 32
#define WORD_OFFSET(b) ((b) / BITS_PER_WORD)
#define BIT_OFFSET(b) ((b) % BITS_PER_WORD)
void SetBit(int *words, int n)
{
n -= mmin;
words[WORD_OFFSET(n)] |= (1 << BIT_OFFSET(n));
}
void ClearBit(int *words, int n)
{
words[WORD_OFFSET(n)] &= ~(1 << BIT_OFFSET(n));//将逻辑位置为n的二进制位置为0
}
int GetBit(int *words, int n)
{
int bit = words[WORD_OFFSET(n)] & (1 << BIT_OFFSET(n));//测试逻辑位置为n的二进制位是否为1
return bit != 0;
}
int main( )
{
int i;
int j;
int arr[ARRNUM];
int* words = new int[1 + N/BITS_PER_WORD]; //建立的位图的大小=存储的最大数字/每个字节的位数+1
if(words == NULL) {
cout << "new error\n" << endl;
exit(0);
}
int count = 0;
/*
for (i = 0; i < N; i++)
{
ClearBit(words, i); //将逻辑位置为n的二进制位置为0
}
*/
for (j = 0; j < 1 + N/BITS_PER_WORD; j++)
{
words[j]=0; //直接将该int置0,应该也是可以的
}
srand( (unsigned)time( NULL ) );
printf("数组大小:%d\n", ARRNUM);
for (j = 0; j < ARRNUM; j++)
{
arr[j]= rand( )%N;
arr[j] += mmin;
printf("%d\t", arr[j]);
}
for (j = 0; j < ARRNUM; j++)
{
SetBit(words, arr[j]);
}
printf("排序后a为:\n");
for (i = 0; i < N; i++)
{
if (GetBit(words, i)) {
printf("%d\t", i+mmin);
count++;
}
}
printf("总个数为:%d\n",count);
delete[] words;
words = NULL;
system("pause");
return 0;
}
C++
#include "stdafx.h"
#include <bitset>
#include <iostream>
#include <time.h>
#include <iomanip>
#define ARRNUM 2500
#define N ARRNUM/(sizeof(int)*8)+1
using namespace std;
int main( )
{
int i;
int cnt=0;
bitset<ARRNUM> bit;
cout<<"数组大小: "<<ARRNUM<<endl;
srand((unsigned)time(NULL));
for (i = 0; i < ARRNUM; i++)
{
int tmp= rand( )%ARRNUM;
bit.set(tmp,1);
cout<<setw(5)<<tmp<<" ";
}
cout<<endl<<"排序后: "<<endl;
for(i=0;i<ARRNUM;i++){
if(bit[i]==1){
cout<<setw(5)<<i<<" ";
cnt++;
}
}
cout<<endl<<"不重复的数目为: "<<cnt<<endl;
system("pause");
return 0;
}