LintCode 大数据专项题集

499 · 单词计数 (Map Reduce版本)

/**
 * Definition of OutputCollector:
 * class OutputCollector<K, V> {
 *     public void collect(K key, V value);
 *         // Adds a key/value pair to the output buffer
 * }
 */
public class WordCount {

    public static class Map {
        public void map(String key, String value, OutputCollector<String, Integer> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, int value);
            String[] words = value.split(" ");
            for (String word : words) {
                output.collect(word, 1);
            }
        }
    }

    public static class Reduce {
        public void reduce(String key, Iterator<Integer> values,
                           OutputCollector<String, Integer> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, int value);
            int freq = 0;
            while (values.hasNext()) {
                Integer value = values.next();
                freq += value;
            }
            output.collect(key, freq);
        }
    }
}

503 · 乱序字符串 (Map Reduce版本)

/**
 * Definition of OutputCollector:
 * class OutputCollector<K, V> {
 *     public void collect(K key, V value);
 *         // Adds a key/value pair to the output buffer
 * }
 */
public class Anagram {

    public static class Map {
        public void map(String key, String value,
                        OutputCollector<String, String> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, String value);
            String[] words = value.split(" ");
            for (String word : words) {
                char[] chars = word.toCharArray();
                Arrays.sort(chars);
                String sortedWord = String.valueOf(chars);
                output.collect(sortedWord, word);
            }
        }
    }

    public static class Reduce {
        public void reduce(String key, Iterator<String> values,
                           OutputCollector<String, List<String>> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, List<String> value);
            ArrayList<String> words = new ArrayList<>();
            while (values.hasNext()) {
                words.add(values.next());
            }
            output.collect(key, words);
        }
    }
}

504 · 倒排索引 (Map Reduce版本)

/**
 * Definition of OutputCollector:
 * class OutputCollector<K, V> {
 *     public void collect(K key, V value);
 *         // Adds a key/value pair to the output buffer
 * }
 * Definition of Document:
 * class Document {
 *     public int id;
 *     public String content;
 * }
 */
public class InvertedIndex {

    public static class Map {
        public void map(String key, Document value,
                        OutputCollector<String, Integer> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, int value);
            StringTokenizer stringTokenizer = new StringTokenizer(value.content);
            while (stringTokenizer.hasMoreTokens()) {
                output.collect(stringTokenizer.nextToken(), value.id);
            }
        }
    }

    public static class Reduce {
        public void reduce(String key, Iterator<Integer> values,
                           OutputCollector<String, List<Integer>> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, List<Integer> value);
            ArrayList<Integer> index = new ArrayList<>();
            int previousId = -1;
            while (values.hasNext()) {
                Integer currentId = values.next();
                if (currentId != previousId) {
                    index.add(currentId);
                    previousId = currentId;
                }
            }
            output.collect(key, index);
        }
    }
}

537 · N-Gram (Map Reduce)

/**
 * Definition of OutputCollector:
 * class OutputCollector<K, V> {
 *     public void collect(K key, V value);
 *         // Adds a key/value pair to the output buffer
 * }
 */
public class NGram {

    public static class Map {
        public void map(String s, int n, String str,
                        OutputCollector<String, Integer> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, Integer value);
            for (int i = 0; i <= str.length() - n; i++) {
                String gram = str.substring(i, i + n);
                output.collect(gram, 1);
            }
        }
    }

    public static class Reduce {
        public void reduce(String key, Iterator<Integer> values,
                           OutputCollector<String, Integer> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, int value);
            int freq = 0;
            while (values.hasNext()) {
                Integer value = values.next();
                freq += value;
            }
            output.collect(key, freq);
        }
    }
}

549 · 最常使用的k个单词(Map Reduce)

/**
 * Definition of OutputCollector:
 * class OutputCollector<K, V> {
 *     public void collect(K key, V value);
 *         // Adds a key/value pair to the output buffer
 * }
 * Definition of Document:
 * class Document {
 *     public int id;
 *     public String content;
 * }
 */
public class TopKFrequentWords {

    public static class Map {
        public void map(String key, Document value,
                        OutputCollector<String, Integer> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, int value);
            String[] words = value.content.split("\\s+");
            for (String word : words) {
                output.collect(word, 1);
            }
        }
    }

    public static class Reduce {
        static class Pair {
            private String key;
            private Integer value;

            Pair() {}

            Pair(String key, int value) {
                this.key = key;
                this.value = value;
            }

            public String getKey() {
                return key;
            }

            public Integer getValue() {
                return value;
            }

            public void setKey(String key) {
                this.key = key;
            }

            public void setValue(Integer value) {
                this.value = value;
            }
        }

        private PriorityQueue<Pair> heap;
        private int k;

        public void setup(int k) {
            // initialize your data structure here
            this.heap = new PriorityQueue<>((o1, o2) -> {
                if (o1.getValue().equals(o2.getValue())) {
                    return o1.getKey().compareTo(o2.getKey());
                } else {
                    return o2.getValue().compareTo(o1.getValue());
                }
            });
            this.k = k;
        }   

        public void reduce(String key, Iterator<Integer> values) {
            // Write your code here
            int freq = 0;
            while (values.hasNext()) {
                freq += values.next();
            }
            heap.add(new Pair(key, freq));
        }

        public void cleanup(OutputCollector<String, Integer> output) {
            // Output the top k pairs <word, times> into output buffer.
            // Ps. output.collect(String key, Integer value);
            for (int i = 0; i < k && !heap.isEmpty(); i++) {
                Pair pair = heap.poll();
                output.collect(pair.getKey(), pair.getValue());
            }
            heap.clear();
        }
    }
}

554 · 排序整数 (Map Reduce版)

/**
 * Definition of OutputCollector:
 * class OutputCollector<K, V> {
 *     public void collect(K key, V value);
 *         // Adds a key/value pair to the output buffer
 * }
 */
public class SortIntegers {

    public static class Map {
        public void map(int key, List<Integer> value,
                        OutputCollector<String, List<Integer>> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, List<Integer> value);
            Collections.sort(value);
            output.collect("key", value);
        }
    }
        
    public static class Reduce {
        public void reduce(String key, List<List<Integer>> values,
                           OutputCollector<String, List<Integer>> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, List<Integer> value);
            PriorityQueue<List<Integer>> heap = new PriorityQueue<>(new Comparator<List<Integer>>() {
                @Override
                public int compare(List<Integer> o1, List<Integer> o2) {
                    return o1.get(0) - o2.get(0);
                }
            });

            for (List<Integer> value : values) {
                if (value.size() > 0) {
                    heap.add(value);
                }
            }

            ArrayList<Integer> sortedNumbers = new ArrayList<>();
            while (heap.size() > 0) {
                List<Integer> list = heap.poll();
                Integer number = list.get(0);
                sortedNumbers.add(number);
                list.remove(0);
                if (list.size() > 0) {
                    heap.add(list);
                }
            }

            output.collect("key", sortedNumbers);
        }
    }
}

1787 · Google Suggestion (Map Reduce)

/**
 * Definition of OutputCollector:
 * class OutputCollector<K, V> {
 *     public void collect(K key, V value);
 *         // Adds a key/value pair to the output buffer
 * }
 * Definition of Document:
 * class Document {
 *     public int count;
 *     public String content;
 * }
 *
 *class Pair {
 *   private String content;
 *   private int count;
 *   
 *   Pair(String key, int value) {
 *       this.key = key;
 *       this.value = value;
 *   }
 *   public String getContent(){
 *	 	 return this.content;
 *	 }
 *	public int getCount(){
 *   	 return this.count;
 *   }
 *
 *}
 */
public class GoogleSuggestion {

    public static class Map {
        public void map(Document value,
                        OutputCollector<String, Pair> output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, Pair value);
            for (int i = 1; i <= value.content.length(); ++i) {
                String prefix = value.content.substring(0, i);
                output.collect(prefix, new Pair(value.content, value.count));
            }
        }
    }

    public static class Reduce {
        private PriorityQueue<Pair> heap;

        public void setup() {
            // initialize your data structure here
            this.heap = new PriorityQueue<>(new Comparator<Pair>() {
                @Override
                public int compare(Pair o1, Pair o2) {
                    if (o1.getCount() == o2.getCount()) {
                        return o1.getContent().compareTo(o2.getContent());
                    } else {
                        return o2.getCount() - o1.getCount();
                    }
                }
            });
        }   
		public void reduce(String key, Iterator<Pair> values, OutputCollector<String, Pair> output) {
    		// Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, Pair value);
            setup();

            while (values.hasNext()) {
                this.heap.add(values.next());
            }

            for (int i = 0; i < 10 && !heap.isEmpty(); i++) {
                output.collect(key, heap.poll());
            }

            this.heap.clear();
        }
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值