需求:一个文件夹中有若干个txt文件,其中每个文件的格式大概为:
做现场土壤和地下水调查
(848479566)
群主
海冬清
(1136655133)
管理员
管理员潋滟
(1951098843)
管理员
无心即乐
(1207685)
資源國際
(1816377)
土星哥
(2186944)
~海~
(2862282)
海洋世界
(3253989)
沧海一粟
(3520672)
东方
(5391796)
上海斐斯热脱附
(6699554)
文件编码并不统一,为了能提取()中的号码,写入到另一文件中,代码如下:
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
public class QQNumberExtract {
public static void main(String[] args) {
String folderPath = "F:" + File.separator + "土壤qq群好友";
File fileFolder = new File(folderPath);
File[] txtFiles = fileFolder.listFiles();
for(File f : txtFiles){
String charset = getCharset(f);
extract(f,charset);
}
}
public static void extract(File txtFile, String encoding){
File file = txtFile;
FileInputStream inputStream = null;
InputStreamReader reader = null;
BufferedReader bfReader = null;
BufferedWriter bw = null;
try {
inputStream = new FileInputStream(file);
reader = new InputStreamReader(inputStream,encoding);
bfReader = new BufferedReader(reader);
StringBuilder sb = new StringBuilder();
String line = null;
String line1 = null;
String line2 = null;
while ((line = bfReader.readLine()) != null){
String[] temp = line.split("\\(");
if(temp != null && temp.length == 2){
line1 = temp[1];
}
if(line1 != null && !line1.equals("")){
String[] temp2 = line1.split("\\)");
if(temp2 != null && temp2.length == 1){
line2 = temp2[0];
}
sb.append(line2 + " ");
}
line1 = null;
line2 = null;
}
System.out.println(sb.toString());
//写入
String parentPath = txtFile.getParent();
String savePath = parentPath + "1";
File saveFilePath = new File(savePath);
if(!saveFilePath.exists()){
saveFilePath.mkdirs();
}
String newFileName = getNameWithOutSuffix(txtFile.getName()) + "1.txt";
String newTxtFilePath = savePath + File.separator + newFileName;
File file2 = new File(newTxtFilePath);
if(file2.exists()){
file2.delete();
file2.createNewFile();
}
bw = new BufferedWriter(new FileWriter(file2, true));
bw.write(sb.toString());
bw.flush();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
inputStream.close();
bw.close();
bfReader.close();
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 提取文件名,不带后缀
* @param name
* @return
*/
public static String getNameWithOutSuffix(String name){
String[] temp = name.split("\\.");
if(temp != null && temp.length == 2){
return temp[0];
}
return null;
}
/**
* 获取文件编码格式
* @param file
* @return
*/
private static String getCharset(File file){
BufferedInputStream bin = null;
try {
bin = new BufferedInputStream(new FileInputStream(file));
int p = (bin.read() << 8) + bin.read();
String code = null;
switch (p) {
case 0xefbb:
code = "UTF-8";
break;
case 0xfffe:
code = "Unicode";
break;
case 0xfeff:
code = "UTF-16BE";
break;
default:
code = "GBK";
}
return code;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
bin.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return null;
}
}
转载于:https://blog.51cto.com/zp1990/1372245