堆的应用

一、堆排序

堆排序见之前的一篇博客   http://blog.csdn.NET/zhutulang/article/details/7746033


二、top k 问题

有一千万条短信,其中有很多重复的。要求找出重复最多的10条短信。


在这里不考虑资源受限的情况,比较经典的解法是构造一个10个元素的最小堆,然后将剩余的元素挨个入堆,取代堆顶元素(入堆后执行下沉操作以保持堆序性质)。另外,也不考虑重复次数的问题。事实上这个问题看似简单,可是一旦加上各种限制条件延伸开,恐怕是可以很复杂很复杂的。


首先我们构造一个含有一千万条短信的文件。


package com.my.test5;

import java.io.*;
import java.util.UUID;

/**
 * Title: 构造一个有一千万条短信的文本文件,每行一条短信<br/>
 * Intention: <br/>
 * <p>
 * Class Name: com.my.test5.BuildTxtMsgFile<br/>
 * Create Date: 2017/6/30 0:56 <br/>
 * Project Name: MyTest <br/>
 * Company:  All Rights Reserved. <br/>
 * Copyright © 2017 <br/>
 * </p>
 * <p>
 * author: GaoWei <br/>
 * 1st_examiner: <br/>
 * 2nd_examiner: <br/>
 * </p>
 *
 * @version 1.0
 * @since JDK 1.7
 */
public class BuildTxtMsgFile {

	public static void main(String[] args) throws Exception{
		//以下是9百万条重复短信
		 MsgObj[] msgArr = {
				new MsgObj("明天还要早起呢!",390000),//第9多
				new MsgObj("帮我带早餐。",200000),
				new MsgObj("人工智能真的会统治世界吗?",200000),
				new MsgObj("hello java",700000), //第3多
				new MsgObj("听说女神离职了?",3000000), //第1多
				new MsgObj("九月九日忆山东兄弟",430000),//第7多
				new MsgObj("山不在高有仙则灵",610000),//第4多
				new MsgObj("生于忧患死于安乐啊!!!",200000),
				new MsgObj("性格的培养至关重要",600000),//第5多
				new MsgObj("天龙八部",570000),//第6多
				new MsgObj("书剑恩仇录",200000),
				new MsgObj("做任何事情都是有技巧的",1000000),//第2多
				new MsgObj("吾日三省吾身", 400000),//第8多
				new MsgObj("在人间",200000),
				new MsgObj("三千越甲可吞吴!",300000)//第10多
		};

		long start = System.currentTimeMillis();
		BufferedWriter  bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("E:\\tmp\\msg.txt")), "UTF-8"));
		//1、写入9百万条重复短信内容
		for(MsgObj obj:msgArr){
			for(int i=0;i<obj.getCount();i++){
				bw.write(obj.getContent()+System.lineSeparator());
			}
		}
		//2、写入1百万条随机内容短信
		for (int i=0;i<1000000;i++) {
			bw.write(UUID.randomUUID()+System.lineSeparator());
		}
		if (bw !=  null) {
			bw.flush();
			bw.close();
		}
		long end = System.currentTimeMillis();

		System.out.println("写入完成,耗时:"+(end - start));
	}


	/**
	 * 短信model,包含短信内容和即将写入文件的次数
	 */
	public static class MsgObj {

		private String content;

		private int count;

		public MsgObj(String content, int count) {
			this.content = content;
			this.count = count;
		}

		public String getContent() {
			return content;
		}

		public void setContent(String content) {
			this.content = content;
		}

		public int getCount() {
			return count;
		}

		public void setCount(int count) {
			this.count = count;
		}
	}
}

堆节点元素定义:

package com.my.test5;

/**
 * Title: 堆节点元素,存储短信内容和次数<br/>
 * Intention: <br/>
 * <p>
 * Class Name: com.my.test5.MsgModel<br/>
 * Create Date: 2017/7/3 0:34 <br/>
 * Project Name: MyTest <br/>
 * Company:  All Rights Reserved. <br/>
 * Copyright © 2017 <br/>
 * </p>
 * <p>
 * author: GaoWei <br/>
 * 1st_examiner: <br/>
 * 2nd_examiner: <br/>
 * </p>
 *
 * @version 1.0
 * @since JDK 1.7
 */
public class MsgModel implements Comparable<MsgModel>{

	/**
	 * 短信内容
	 */
	private String content;

	/**
	 * 次数
	 */
	private int count;

	public MsgModel(){

	}

	public MsgModel(String content, int count) {
		this.content = content;
		this.count = count;
	}

	public String getContent() {
		return content;
	}

	public void setContent(String content) {
		this.content = content;
	}

	public int getCount() {
		return count;
	}

	public void setCount(int count) {
		this.count = count;
	}

	public int compareTo(MsgModel o) {
		return count - o.getCount();
	}

	@Override
	public String toString() {
		return "[content="+content+", count="+count+"]";
	}
}

find top K:

package com.my.test5;

import com.my.test6.MinHeap;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

/**
 * Title: <br/>
 * Intention: <br/>
 * <p>
 * Class Name: com.my.test5.FindTxsMsg<br/>
 * Create Date: 2017/6/30 21:19 <br/>
 * Project Name: MyTest <br/>
 * Company:  All Rights Reserved. <br/>
 * Copyright © 2017 <br/>
 * </p>
 * <p>
 * author: GaoWei <br/>
 * 1st_examiner: <br/>
 * 2nd_examiner: <br/>
 * </p>
 *
 * @version 1.0
 * @since JDK 1.7
 */
public class FindTxtMsg {

	public static void main(String[] args) throws Exception{
		findTop(10);
	}

	public static void findTop(int k) throws Exception {
		long start = System.currentTimeMillis();
		//用map统计短信内容和次数
		Map<String,Integer> map = new HashMap<String, Integer>();
		BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("E:\\tmp\\msg.txt")));
		String msg;
		while((msg = br.readLine()) != null) {
			if (map.containsKey(msg)) {
				map.put(msg, map.get(msg)+1);
			} else {
				map.put(msg, 1);
			}
		}
		long end = System.currentTimeMillis();
		System.out.println("map统计完成,耗时="+(end - start));
		System.out.println("map统计完成,map大小="+map.size());

		//Map内容中前k个内容转换为MsgModel list,构造最小堆,寻找次数最大的k个短信
		start = System.currentTimeMillis();
		ArrayList<MsgModel> list = new ArrayList<MsgModel>(k);
		MinHeap<MsgModel> myMinHeap = null;
		MsgModel myMinHeapTop;
		int i = 0;
		for (Map.Entry<String, Integer> entry : map.entrySet()) {
			if (++i <= k){
				list.add(new MsgModel(entry.getKey(), entry.getValue()));
				if (i == k) {
					//构造元素个数为k的最小堆
					myMinHeap = new MinHeap<MsgModel>(list);
				}
			} else {
				//剩余的元素,如果比堆顶元素大,则取代堆顶元素
				myMinHeapTop = (MsgModel) myMinHeap.top();
				if (entry.getValue() > myMinHeapTop.getCount()) {
					myMinHeap.replaceTop(new MsgModel(entry.getKey(), entry.getValue()));
				}
			}
		}
		end = System.currentTimeMillis();
		//打印结果
		System.out.println(myMinHeap);
		System.out.println("find top k 完成,耗时="+(end - start));
	}
}

运行结果:

map统计完成,耗时=2635
map统计完成,map大小=1000015
[[content=三千越甲可吞吴!, count=300000], [content=九月九日忆山东兄弟, count=430000], [content=明天还要早起呢!, count=390000], [content=性格的培养至关重要, count=600000], [content=天龙八部, count=570000], [content=吾日三省吾身, count=400000], [content=hello java, count=700000], [content=山不在高有仙则灵, count=610000], [content=听说女神离职了?, count=3000000], [content=做任何事情都是有技巧的, count=1000000]]
find top k 完成,耗时=57


当然,如果你愿意的话,可以给最后的结果排下序。



三、优先队列

可以查看jdk的PriorityQueue源码,实际上就是用最小堆实现的


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值