多线程读取大数据文件

代码改错,已更新为:http://blog.csdn.net/lrq1988/article/details/17954715

工作之故,要读取一个几十万条的文本,就写了这个程序,倒腾了俩天,改来改去,并不一定是最终版,姑且先记录下来。

1、本地读取以后改为网络读取

2、timer是为了作定时刷新

3、容器启动时,首先加载MobileUtil.init()方法

4、多核服务器,加载会更快,根据服务器内核切割获取的内容来组装map

5、基于线程安全考虑,HashMap可能改为ConcurrentHashMap

6、之所以没用NIO,是因为文本行数的计算在JDK6不支持,另外实现的代价又高。貌似JDK7已提供相应API。

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.CRC32;

import org.apache.commons.codec.digest.DigestUtils;

//import org.apache.commons.codec.digest.DigestUtils;

public class MobileUtil {
	private static final ScheduledExecutorService timer = Executors
			.newScheduledThreadPool(Runtime.getRuntime().availableProcessors());
	private static final String fileName = "/Users/leefelix/Downloads/all.csv";
	private static long crc32;// 字符串进行crc32数据校验
	private static String content = null;// 获取的内容字符串
	public static HashMap<String, Location> locationMap = new HashMap<String, Location>();
	public static volatile boolean started = false;

	private MobileUtil() {
	}

	private void initial() {
		timer.scheduleWithFixedDelay(new Runnable() {
			public void run() {
				long start = System.nanoTime();
				try {
					//crc32校验
					System.out.println("md4....");
					if(crc32 == crc32(fileName)){
						System.out.println("md5....");
						return;
					}
					crc32 = crc32(fileName);
					
					FileInputStream fi = new FileInputStream(fileName);
					InputStreamReader inreader = new InputStreamReader(fi,
							"UTF-8");
					BufferedReader reader = new BufferedReader(inreader);
					String line = null;
					long start1 = System.nanoTime();
					StringBuilder sb = new StringBuilder();
					System.out.println(start1);
					int strCount = 0;
					while ((line = reader.readLine()) != null) {
						strCount++;
						sb.append(line + "\r\n");
					}
					System.out.println("end...");
					String tmp=sb.toString();
					System.out.println("tmp length:"+tmp.length());
//					System.out.println("content length:"+content.length());
					content = sb.toString();
					System.out.println("time1:" + (System.nanoTime() - start1));
					String contentCopy = content;
					int total = contentCopy.length();
					System.out.println(total);
					// 使用的线程数量
					int threadCounts = Runtime.getRuntime()
							.availableProcessors();
					ExecutorService exec = Executors
							.newFixedThreadPool(threadCounts);
					List<Callable<HashMap<String, Location>>> callList = new ArrayList<Callable<HashMap<String, Location>>>();
					int len = strCount / threadCounts;// 平均分割strCount
					// strCount小于线程数
					if (len == 0) {
						threadCounts = strCount;// 采用一个线程处理List中的一个元素
						len = strCount / threadCounts;// 重新平均分割List
					}
					for (int i = 0; i < threadCounts; i++) {
						// 根据线程数量切割字符串为线程数量个子字符串
						final String subContent;
						if (0 == threadCounts - 1) {
							subContent = contentCopy;
						} else {
							int startPos = i * total / threadCounts;
							int endPos = (i + 1) * total / threadCounts;
							if (i != 0)
								while (!contentCopy.substring(startPos - 2,
										startPos).endsWith("\r\n")) {
									startPos++;
								}
							if (i != threadCounts - 1)
								while (!contentCopy.substring(endPos - 2,
										endPos).endsWith("\r\n")) {
									endPos++;
								}
							subContent = contentCopy
									.substring(startPos, endPos);
						}
						callList.add(new Callable<HashMap<String, Location>>() {
							public HashMap<String, Location> call()
									throws Exception {
								String contentCopy = subContent;
								HashMap<String, Location> map = new HashMap<String, Location>();
								while (true) {
									String splitStr = null;
									int j = contentCopy.indexOf("\r\n");
									if (j < 0) {
										break;
									}
									splitStr = contentCopy.substring(0, j);
									Location lc = new Location();
									String[] arr = new String[4];
									arr[0] = splitStr.substring(0,
											splitStr.indexOf("\t")).trim();
									splitStr = splitStr.substring(splitStr
											.indexOf("\t") + 1);
									arr[1] = splitStr.substring(0,
											splitStr.indexOf("\t")).trim();
									splitStr = splitStr.substring(splitStr
											.indexOf("\t") + 1);
									arr[2] = splitStr.substring(0,
											splitStr.indexOf("\t")).trim();
									splitStr = splitStr.substring(splitStr
											.indexOf("\t") + 1);
									arr[3] = splitStr.trim();
									lc.setNum(arr[0]);
									lc.setProvince(arr[1]);
									lc.setCity(arr[2]);
									lc.setOperator(arr[3]);
									map.put(arr[0], lc);
									System.out.println(arr[1]);
									contentCopy = contentCopy.substring(j + 1);
								}
								return map;
							}
						});

						List<Future<HashMap<String, Location>>> futureList = exec
								.invokeAll(callList);
						HashMap<String, Location> result = new HashMap<String, Location>();
						for (Future<HashMap<String, Location>> future : futureList) {
							result.putAll(future.get());
						}
						locationMap = result;
						System.out.println("locationMap:"+result.size());
						started = true;
						System.out.println(true);
						System.out.println(System.nanoTime() - start);
						exec.shutdown();
					}
				} catch (FileNotFoundException e) {
					e.printStackTrace();
					System.out.println("找不到文件" + fileName + "...");
				} catch (IOException e) {
					e.printStackTrace();
					System.out.println("与文件" + fileName + "通信异常...");

				} catch (InterruptedException e) {
					e.printStackTrace();
				} catch (ExecutionException e) {
					e.printStackTrace();
				}
			}
		}, 0, 1, TimeUnit.MINUTES);
	}

	public static void init() {
		final MobileUtil mobileUtil = new MobileUtil();
		mobileUtil.initial();
	}
	public static MobileUtil create(){
		final MobileUtil mobileUtil = new MobileUtil();
		while(!started){
			if(started)break;
		}
		return mobileUtil;
	}

	// static {
	// long start = System.nanoTime();
	// // FileInputStream fis;
	// // FileChannel fc;
	// // ByteBuffer bf;
	// try {
	// if (content != null && md5Data.equals(DigestUtils.md5Hex(content))) {
	//
	// } else {
	// // fis = new FileInputStream(fileName);
	// // // 创建UTF-8/GBK符集
	// // Charset charset = Charset.forName("GBK");
	// // // 得到文件通道
	// // fc = fis.getChannel();
	// // // 分配与文件尺寸等大的缓冲区
	// // bf = ByteBuffer.allocate((int) fc.size());
	// // // 整个文件内容全读入缓冲区,即是内存映射文件
	// // fc.read(bf);
	// // // 把缓冲中当前位置回复为零
	// // bf.rewind();
	// // // 输出缓冲区中的内容
	// // content = charset.decode(bf).toString();
	// // fc.close();
	// // int strCount = 0;
	// // while (true) {
	// // int j = contentCopy.indexOf("\r\n");
	// // if (j < 0) {
	// // break;
	// // }
	// // strCount++;
	// // contentCopy = contentCopy.substring(j + 1);
	// // }
	// //之所以使用BufferedReader而不使用NIO,是为了方便计算行数
	// int strCount = 0;
	// FileInputStream fi = new FileInputStream(fileName);
	// InputStreamReader inreader = new InputStreamReader(fi, "GBK");
	// BufferedReader reader = new BufferedReader(inreader);
	// String line = null;
	// long start1 = System.nanoTime();
	// StringBuilder sb = new StringBuilder();
	// while ((line = reader.readLine()) != null) {
	// strCount++;
	// sb.append(line+"\r\n");
	// }
	// content=sb.toString();
	// System.out.println("time1:"+(System.nanoTime()-start1));
	// // long start2 = System.nanoTime();
	// // String ss=null;
	// // while ((line = reader.readLine()) != null) {
	// // strCount++;
	// // ss+=line+"\r\n";
	// // }
	// // content=ss;
	// // System.out.println("time2:"+(System.nanoTime()-start2));
	// md5Data = DigestUtils.md5Hex(content);
	// String contentCopy = content;
	// int total = contentCopy.length();
	// // 使用的线程数量
	// int threadCounts = Runtime.getRuntime().availableProcessors();
	// ExecutorService exec = Executors
	// .newFixedThreadPool(threadCounts);
	// List<Callable<HashMap<String, Location>>> callList = new
	// ArrayList<Callable<HashMap<String, Location>>>();
	// int len = strCount / threadCounts;// 平均分割strCount
	// // strCount小于线程数
	// if (len == 0) {
	// threadCounts = strCount;// 采用一个线程处理List中的一个元素
	// len = strCount / threadCounts;// 重新平均分割List
	// }
	// for (int i = 0; i < threadCounts; i++) {
	// //根据线程数量切割字符串为线程数量个子字符串
	// final String subContent;
	// if(0 == threadCounts -1){
	// subContent = contentCopy;
	// }else{
	// int startPos = i*total/threadCounts;
	// int endPos = (i+1)*total/threadCounts;
	// if(i!=0)
	// while(!contentCopy.substring(startPos-2,startPos).endsWith("\r\n")){
	// startPos++;
	// }
	// if(i!=threadCounts -1)
	// while(!contentCopy.substring(endPos-2,endPos).endsWith("\r\n")){
	// endPos++;
	// }
	// subContent = contentCopy.substring(startPos,endPos);
	// }
	// callList.add(new Callable<HashMap<String, Location>>() {
	// public HashMap<String, Location> call()
	// throws Exception {
	// String contentCopy = subContent;
	// HashMap<String, Location> map = new HashMap<String, Location>();
	// while (true) {
	// String splitStr = null;
	// int j = contentCopy.indexOf("\r\n");
	// if (j < 0) {
	// break;
	// }
	// splitStr = contentCopy.substring(0, j);
	// Location lc = new Location();
	// String[] arr = new String[4];
	// arr[0] = splitStr.substring(0,
	// splitStr.indexOf("\t")).trim();
	// splitStr = splitStr.substring(splitStr
	// .indexOf("\t") + 1);
	// arr[1] = splitStr.substring(0,
	// splitStr.indexOf("\t")).trim();
	// splitStr = splitStr.substring(splitStr
	// .indexOf("\t") + 1);
	// arr[2] = splitStr.substring(0,
	// splitStr.indexOf("\t")).trim();
	// splitStr = splitStr.substring(splitStr
	// .indexOf("\t") + 1);
	// arr[3] = splitStr.trim();
	// lc.setNum(arr[0]);
	// lc.setProvince(arr[1]);
	// lc.setCity(arr[2]);
	// lc.setOperator(arr[3]);
	// map.put(arr[0], lc);
	// contentCopy = contentCopy.substring(j + 1);
	// }
	// return map;
	// }
	// });
	// }
	//
	// List<Future<HashMap<String, Location>>> futureList = exec
	// .invokeAll(callList);
	// HashMap<String, Location> result = new HashMap<String, Location>();
	// for (Future<HashMap<String, Location>> future : futureList) {
	// result.putAll(future.get());
	// }
	// md5Data = DigestUtils.md5Hex(content);
	// locationMap=result;
	// System.out.println(System.nanoTime() - start);
	// exec.shutdown();
	// }
	// } catch (FileNotFoundException e) {
	// e.printStackTrace();
	// System.out.println("找不到文件" + fileName + "...");
	// } catch (IOException e) {
	// e.printStackTrace();
	// System.out.println("与文件" + fileName + "通信异常...");
	//
	// } catch (InterruptedException e) {
	// e.printStackTrace();
	// } catch (ExecutionException e) {
	// e.printStackTrace();
	// }
	// }
	public static void main(String args[]) throws IOException {
		System.out.println(getCity("13811014978"));
		System.out.println(getOperator("13811014978"));
	}

	static class Location {

		private String num;
		private String province;
		private String city;
		private String operator;

		public Location() {
		}

		public String getNum() {
			return num;
		}

		public void setNum(String num) {
			this.num = num;
		}

		public String getProvince() {
			return province;
		}

		public void setProvince(String province) {
			this.province = province;
		}

		public String getCity() {
			return city;
		}

		public void setCity(String city) {
			this.city = city;
		}

		public String getOperator() {
			return operator;
		}

		public void setOperator(String operator) {
			this.operator = operator;
		}
	}

	public static Location getLocation(String mobile) {
		mobile = getNum7(mobile);
		while(!started){
			if(started)break;
		}
		return locationMap.get(mobile);
	}

	public static String getCity(String mobile) {
		while(!started){
			if(started)break;
		}
		mobile = getNum7(mobile);
		System.out.println("mobile:"+mobile);
		if (locationMap.get(mobile) == null)
			return null;
		return locationMap.get(mobile).getCity();
	}

	public static String getProvince(String mobile) {
		mobile = getNum7(mobile);
		while(!started){
			if(started)break;
		}
		if (locationMap.get(mobile) == null)
			return null;
		return locationMap.get(mobile).getProvince();
	}

	public static String getOperator(String mobile) {
		mobile = getNum7(mobile);
		while(!started){
			if(started)break;
		}
		if (locationMap.get(mobile) == null)
			return null;
		return locationMap.get(mobile).getOperator();
	}

	private static String getNum7(String mobile) {
		mobile = mobile.trim();
		if (mobile.length() != 11 || !mobile.startsWith("1")
				|| !mobile.matches("\\d+"))
			throw new IllegalArgumentException("传入的手机号码" + mobile
					+ "不正确,请使用正确的11位数字号码");
		return mobile.substring(0, 7);
	}

	private static String getContentByUri(String uri) {
		URL url;
		URLConnection urlconn;
		try {
			url = new URL(uri);
			urlconn = url.openConnection();
			HttpURLConnection httpConnection = (HttpURLConnection) urlconn;
			httpConnection.setConnectTimeout(1000000);
			httpConnection.setReadTimeout(1000000);
			httpConnection.setRequestProperty("User-Agent", "new");
			httpConnection.setRequestMethod("POST");
			InputStream in = httpConnection.getInputStream();
			BufferedReader br = new BufferedReader(new InputStreamReader(in,
					"UTF-8"));

			String line = "";
			while ((line = br.readLine()) != null) {
				System.out.println(line);
			}
			br.close();
			in.close();
			return line;
		} catch (IOException e) {
			e.printStackTrace();
			return null;
		}
	}

	private static int getNumber(String des, String reg) {
		Pattern p = Pattern.compile(reg);
		Matcher m = p.matcher(des);
		int count = 0;// 记录个数
		while (m.find()) {
			count++;
		}
		return count;
	}

	public static String getUrlContent(String url) {
		// CloseableHttpClient httpclient = HttpClients.createDefault();
		// HttpGet httpget = new HttpGet(url);
		// CloseableHttpResponse response = null;
		String value = null;
		// try {
		// response = httpclient.execute(httpget);
		// HttpEntity entity = response.getEntity();
		// if (entity != null) {
		// value=EntityUtils.toString(entity);
		// }
		// } catch (Exception e) {
		// e.printStackTrace();
		// } finally {
		// try {
		// response.close();
		// httpclient.close();
		// } catch (IOException e) {
		// e.printStackTrace();
		// }
		// }
		return value;
	}
	
	private static long crc32(String str){
		CRC32 crc32 = new CRC32();
		byte[] data = str.getBytes();
		for(byte i=0;i<data.length;i++) {
		    data[i] = i;
		}
		crc32.update(data);
		return crc32.getValue();
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值