支付宝笔试题

往事看到一道支付宝笔试题,自己做了一下,尽管效率不高,也是个人思考的结果。题目如下:

有一个100G大小的文件里存的全是数字,并且每个数字见用逗号隔开。现在在这一大堆数字中找出100个最大的数出来。

 

做法:

假设数字为4字节整数,逗号为2字节unicode字符,100G文件本人电脑无法容纳,所以取2亿整数,文件大小1.2G

 

1. 生成二进制文件(使用DataOutputStream,使用缓冲区,耗时79秒):

 

                               File file = new File("E:\\test.dat");
		if (!file.exists()) {
			file.createNewFile();
		}
		long time = System.currentTimeMillis();
		DataOutputStream stream = new DataOutputStream(new BufferedOutputStream(new  FileOutputStream(file)));
		Random random = new Random();
		long count = 200000000;
		System.out.println(count+"is max long int in java");
		int temp;
		for (long i = 0; i < count; i++) {
			temp = random.nextInt();
			stream.writeInt(temp);
			stream.writeChar(',');			
		}
		System.out.println("循环完成");
		stream.flush();
		stream.close();
		time = System.currentTimeMillis() - time;
		System.out.println(time+"毫秒");

 

 

2. 分析文件(使用DataInputStream,使用缓冲区,耗时65秒)

     a. 读取前100个整数

     b. 排序,把排序后的数组看成堆,最小值在根节点

     c. 遍历整个文件,把读到的数和最小值比较,如果比最新值小,则丢弃,如果比最小值大则替换最小值重建堆。

     d. 文件读取完毕,堆中的元素就是要找的100个最大值,再执行一次排序。

    

TestRead.java
public static void main(String[] args) throws IOException, InterruptedException {
		File file = new File("E:\\test.dat");		
		long time = System.currentTimeMillis();
		DataInputStream stream = new DataInputStream(new  BufferedInputStream(new FileInputStream(file)));
		int len = 100;
		
		
		long count = 100;
		int arr[] = new int[100];		
		for (int i = 0; i < len; i++) {				
			arr[i] = stream.readInt();
			stream.readChar();			
		}
		
		Arrays.sort(arr);		
		print(arr);		
		int temp = 0;
		while(true) {	
			try {					
			   temp = stream.readInt();
			   stream.readChar();
			   count++;
			   if(temp > arr[0]) {
			   		addToheap(arr,temp);		   		
			   } else {
			   		continue;
			   }
			 } catch(EOFException ioe) {
			 	  break;
		   }
		}
		stream.close();
		time = System.currentTimeMillis() - time;
		System.out.println(time+"毫秒"+":"+count+"个");
		Arrays.sort(arr);
		print(arr);
		
		
	}
	
   static void addToheap(int arr[], int temp){
	   arr[0] = temp;
	   int index = 0;
	   int left = 1; 
	   int right = 2;
	   int minIndex = index;
	   while (left < arr.length) {
		   if (arr[index] > arr[left]) {
			   minIndex = left;
		   }
		   if (right < arr.length && arr[minIndex] > arr[right]) {
			   minIndex = right;
		   }
		   if (minIndex == index) {
			   break;
		   } else {
			   temp = arr[minIndex];
			   arr[minIndex] = arr[index];
			   arr[index] = temp;
			   index = minIndex;
			   left = 2*index + 1;
			   right = 2*index + 2;
		   }
			
		}
			   
	}
static void print(int[] aa) {
  for (int i = 0; i < aa.length; i++) {
   System.out.print(aa[i] + ",");
   if ((i + 1) % 10 == 0) {
    System.out.println();
   }
  }
 }

 

3. 使用内存映射,nio,代替DataInputStream,用时12秒,只能使用MyEclipse6.5jre, 使用jdk1.5,jdk1.6时

    存储空间不足,堆空间不足。

4. 对文件进行分段映射(暂时分为10段),每个线程负责读取一段,找出该段最大的100个,

    在找到10X100个数中找最大的100个,用时10秒,性能没有显著改善。

package test;

import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;





public class MultiThreadReader {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		long time = System.currentTimeMillis();
		long len = 200000000 * 6;
		int reads = 200;
		LinkedList<RandomReader> randomReaders = new LinkedList<RandomReader>();
	    RandomReader randomReader  = null;
		for(int i = 0; i < reads; i++) {
			randomReader = new RandomReader(i*len/reads, len/reads/6);
			randomReaders.add(randomReader);
			new Thread(randomReader).start();
		}
		int numberNeedFound = 100;
		int firstArr[] = new int[numberNeedFound];
		boolean firstFound = false;
		HashSet<RandomReader> set = new HashSet<RandomReader>();
		try {
			Thread.sleep(100);
		} catch (InterruptedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	    while(set.size() < reads) {
			for(int i = 0; i < reads; i++) {
				randomReader = randomReaders.get(i);
				if (randomReader.done && !firstFound) {
					firstFound = true;
					firstArr = randomReader.arr;
					set.add(randomReader);
				} else if (firstFound && randomReader.done 
						&& !set.contains(randomReader)) {
					set.add(randomReader);
					for (int j = 0; j < randomReader.arr.length; j++) {
						if (randomReader.arr[j]>firstArr[0]) {
                                                                                                 TestRead.addToheap(firstArr, randomReader.arr[j]);
                                                                                                }		
			                                }
				}
			}
		}
	    time = System.currentTimeMillis() - time;
	    Arrays.sort(firstArr);
	    TestRead.print(firstArr);
	    System.out.printf("使用时间%d秒\n", time);
		

	}

}

class RandomReader implements Runnable {
	long offset = 0;
	long len = 10;
	RandomAccessFile file;
	boolean done = false;
	int numberNeedFound = 100;
	int arr[] = new int[numberNeedFound];
    MappedByteBuffer buffer;
    static int id = 0;
    int sid;
	public RandomReader(long offset, long len) {
		sid = id++;
		this.offset = offset;
		this.len = len;
		try {
			file = new RandomAccessFile("E:\\test.dat", "r");
			buffer = file.getChannel().map(FileChannel.MapMode.READ_ONLY, offset, len*6);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public void run() {
		int count = 0;
		for (int i = 0; i < numberNeedFound; i++) {
			arr[i] = buffer.getInt();
			buffer.getChar();
			count++;
		}
		Arrays.sort(arr);
		try {
			int temp = 0;
			while (count < len) {
				temp = buffer.getInt();
				buffer.getChar();				
				count++;
				if (temp > arr[0]) {
					TestRead.addToheap(arr, temp);
				}
				if(count == len/2) {
					System.out.printf("reader %d completed 50 percent\n", sid);
				}
			}
			done = true;
			System.out.printf("reader %d completed 100 percent count = %d \n", sid,count);

		} catch (Exception e) {
			System.out.printf("reader %d is dead count = %d\n", sid,count);
			e.printStackTrace();
		}
		
	}
}

 

 

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值