import java.io.*;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public class SpeedClear {
public static void main(String[] args) {
if(args.length==0){
print();
System.exit(1);
}
if(args.length!=2){
System.out.println("Format error...");
System.exit(1);
}
String pathname = args[0];
String newPath = args[1];
clear(pathname,newPath); //调用去重复的方法...
}
/**
*
* @param pathname
* 源文件路径
* @param newPath
* 新的文件路径
* @throws Exception
*/
public static void clear(String pathname, String newPath) {
System.out.println("Start... ");
try{//懒的写Try..直接都包围起来吧....
File file = new File(pathname);
BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
BufferedReader buffer = new BufferedReader(new InputStreamReader(fis,"utf-8"),20*1024*1024);// 用5M的缓冲读取文本文件
//FileWriter fw = new FileWriter(new File(newPath),true); //去除后的文本
OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(new File(newPath)),"utf-8") ;
Set set = new HashSet();
String temp = ""; // 临时字符串
int x = 0;
while ((temp = buffer.readLine()) != null) { // 读文件,一行读一个
set.add(temp); // 存储到Set集合里面
if(x%30000==0){
System.out.print("..") ;
}
x++;
}
fis.close();
buffer.close(); //关闭读取操作
//下面开始写文件
for (String xxser : set) {
out.write(xxser+"\r\n");
}
System.out.println("") ;
out.close(); //关闭写操作
System.out.println("size = " + set.size());
System.out.println("End...");
}catch(Exception e){
System.out.println("文件太大了,建议先100MB大小..") ;
}
}
public static void print(){
System.out.println("*************************************************");
System.out.println("\t\tTo repeat \t\t");
System.out.println();
System.out.println(" format: java -Xmx1000m SpeedClear c:\\old.txt c:\\new.txt\t\t");
System.out.println();
System.out.println("\t\tAuthor:xxserQQ:616100108");
System.out.println("*************************************************");
}
}