讨论组12.20讲述内容笔记

最新推荐文章于 2021-12-12 12:57:45 发布

toast and f**k all

最新推荐文章于 2021-12-12 12:57:45 发布

阅读量574

点赞数

分类专栏：实验狗文章标签： BitSet mapreduce

本文链接：https://blog.csdn.net/sun_jiadong/article/details/50370091

版权

实验狗专栏收录该内容

3 篇文章 0 订阅

订阅专栏

介绍java.util.BitSet
BitSet 类创建一个特殊类型的数组保存位值。该BitSet中数组的大小可以根据需要增加。

1.构造函数

private long[] words;
private final static int ADDRESS_BITS_PER_WORD = 6;
public BitSet(int nbits) {
    // nbits can't be negative; size 0 is OK
    if (nbits < 0)
        throw new NegativeArraySizeException("nbits < 0: " + nbits);

    initWords(nbits);
    sizeIsSticky = true;
}

private void initWords(int nbits) {
    words = new long[wordIndex(nbits-1) + 1];   // 初始化会根据的位数决定要申请多大的数组，long 类型是 64 位，所以你如果 nbits 是 1~64，你只需要一个长度为1的数组就好。
}

private static int wordIndex(int bitIndex) {
    return bitIndex >> ADDRESS_BITS_PER_WORD;
}

2.set方法

public void set(int bitIndex) {
        if (bitIndex < 0)
            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

        int wordIndex = wordIndex(bitIndex);
        expandTo(wordIndex);
        words[wordIndex] |= (1L << bitIndex); // Restores invariants
        checkInvariants();
        /**
        这是在改变long数组的某一个元素的值，首先需要确定的是改变哪一个元素，其次需要使用与或操作改变这个元素，在上面的代码中，首先将bitIndex>>6，这样就确定了是修改哪一个元素的值。
注意： java中的移位操作会模除位数，也就是说，long类型的移位会模除64。例如对long类型的值左移65位，实际是左移了65%64=1位。所以这行代码就等于：
int cache = bitIndex % 64;  words[wordsIndex] |= (1L << cache); 
*/

}

3.hashcode

public int hashCode() {
    long h = 1234;
    for (int i = wordsInUse; --i >= 0; )
        h ^= words[i] * (i + 1);

    return (int)((h >> 32) ^ h);
}

4.size() length()

public int size() {
        return words.length * BITS_PER_WORD;
}
//BITS_PER_WORD = 64，这里很重要的一点就是，如果使用size来返回BitSet数组的大小，其值一定是64的倍数

public int length() {
        if (wordsInUse == 0)
            return 0;

        return BITS_PER_WORD * (wordsInUse - 1) +
            (BITS_PER_WORD - Long.numberOfLeadingZeros(words[wordsInUse - 1]));
}
//这个方法法返回的是BitSet的逻辑大小，比如说你声明了一个129位的BitSet,设置了第23，45，67位，那么其逻辑大小就是67。

用途：
存储海量数据：
一个int占4个字节。
比如有一堆数字，需要存储，source=[3,5,6,9]，用int就需要4*4个字节。
java.util.BitSet可以存true/false。
如果用java.util.BitSet，则会少很多：
1，先找出数据中最大值maxvalue=9
2，声明一个BitSet bs,它的size是maxvalue+1=10
3，遍历数据source，bs[source[i]]设置成true.
这样一个本来要int型需要占4字节共32位的数字现在只用了1位，比例32:1

BloomFilter：

import org.apache.hadoop.io.Writable;

class BloomFilter<E> implements Writable {

    private BitSet bf;
    private int bitArraySize = 100000000;
    private int numHashFunc = 6;

    public BloomFilter() {
        bf = new BitSet(bitArraySize);
    }

    public void add(E obj) {
        int[] indexes = getHashIndexes(obj);

        for (int index : indexes) {
            bf.set(index);
        }
    }

    public boolean contains(E obj) {
        int[] indexes = getHashIndexes(obj);

        for (int index : indexes) {
            if (bf.get(index) == false) {
                return false;
            }
        }
        return true;
    }

    public void union(BloomFilter<E> other) {
        bf.or(other.bf);
    }

    protected int[] getHashIndexes(E obj) {
        int[] indexes = new int[numHashFunc];
        long seed = 0;
        byte[] digest;
        try {
            MessageDigest md = MessageDigest.getInstance("MD5");
            md.update(obj.toString().getBytes());
            digest = md.digest();

            for (int i = 0; i < 6; i++) { 
                seed = seed | (((long)digest[i] & 0xFF))<<(8*i);
            }
        } catch (NoSuchAlgorithmException  e) {}
        Random gen = new Random(seed);
        for (int i = 0; i < numHashFunc; i++) {
            indexes[i] = gen.nextInt(bitArraySize);
        }
        return indexes;
        }
    }

1、用MapReduce找共同朋友编程实现
数据格式如下：

第一个字母代表本人，其他是他的朋友，找出共同朋友的人，和共同朋友是谁？

public class FindFriend {
    final static String INPUT_PATH = "hdfs://master:8020/liguodong/test1";
    final static String OUTPUT_PATH = "hdfs://master:8020/liguodong/test1out";

    public static void main(String[] args) throws IOException, 
    URISyntaxException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();   

        final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH),conf);     
        if(fileSystem.exists(new Path(OUTPUT_PATH)))
        {
            fileSystem.delete(new Path(OUTPUT_PATH),true);
        }

        Job job = Job.getInstance(conf,"Find friend");
        job.setJarByClass(FindFriend.class);
        job.setMapperClass(MyMapper.class);     
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(INPUT_PATH));        
        FileOutputFormat.setOutputPath(job,new Path(OUTPUT_PATH));

        System.exit(job.waitForCompletion(true)?0:1);
    }

    static class MyMapper extends Mapper<LongWritable,Text,Text,Text>{
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            //分割字符串
            StringTokenizer stringTokenizer = new StringTokenizer(value.toString());
            Text owner = new Text();//存放自己

            Set<String> set = new TreeSet<String>();//存放朋友

            owner.set(stringTokenizer.nextToken());
            while(stringTokenizer.hasMoreTokens()){
                set.add(stringTokenizer.nextToken());
            }

            String[] friends = new String[set.size()];//朋友
            friends = set.toArray(friends);

            for(int i=0; i<friends.length;i++){
                for(int j=i+1; j<friends.length; j++){
                    String outputkey = friends[i]+friends[j];//朋友之间两两组合
                    context.write(new Text(outputkey), owner);//<朋友组合，自己>
                }
            }
        }

    }

    static class MyReducer extends Reducer<Text, Text ,Text, Text>
    {
        protected void reduce(Text key, Iterable<Text> values,
                Context context)
                throws IOException, InterruptedException {
            //以朋友组合作为key值，自己作为value值。
            String commonFriends = "";
            for(Text val:values)
            {
                if(commonFriends == ""){
                    commonFriends = val.toString();
                }else{
                    commonFriends = commonFriends+"--"+val.toString();
                }
            }
            context.write(key, new Text(commonFriends));
        }
    }
}