package High_frequency_word;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import javax.imageio.stream.FileImageInputStream;
public class SplitTextToWord {
int memoryBufSize = 1024;
byte[] sizedBuffer = new byte[memoryBufSize];
ByteBuffer byteBuf = ByteBuffer.allocate(1024);
ArrayList charHouse = new ArrayList();
HashMap hashMap = new HashMap();
public void selectAndSort(String[] srcText) {
Map.Entry entry = null;
// Map mapTreeSort = new TreeMap();
Object[] keyValuesPairs = hashMap.entrySet().toArray();
for (int i = 0; i < keyValuesPairs.length; i++) {
entry = (Map.Entry) keyValuesPairs[i];
for (int k = 0; k < srcText.length; k++) {
if (entry.getKey().toString().equals(srcText[k])) {
entry.setValue((int) entry.getValue() + 1);
}
}
}
// System.out.println(hashMap.toString());
List<Map.Entry<String, Integer>> infoIds = new ArrayList<Map.Entry<String, Integer>>(
hashMap.entrySet());
Collections.sort(infoIds, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Entry<String, Integer> o1,
Entry<String, Integer> o2) {
// TODO Auto-generated method stub
return o2.getValue().compareTo(o1.getValue());
}
});
try {
OutputStreamWriter osw = new OutputStreamWriter(
new FileOutputStream(
"D:\\Users\\lq\\Desktop\\fileTest\\result08.txt"));
try {
for (int i = 0; i < infoIds.size(); i++) {
String id = infoIds.get(i).toString()+" ";
osw.write(id);
}
osw.close();//文件不关闭显示0kb
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//System.out.println("ok");
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void readMyFile() {
File file = new File("D:\\Users\\lq\\Desktop\\fileTest\\Etest.txt");
BufferedInputStream fis;
try {
fis = new BufferedInputStream(new FileInputStream(file));
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),1024);
StringBuffer sbTemp = new StringBuffer();
String line = "";
try {
while((line = reader.readLine())!=null)
{
sbTemp.append(line);
}
String[]splitedArray = textSplit(sbTemp.toString());
for (int i = 0; i < splitedArray.length; i++) {
// result[i] = "";
}
for (int i = 0; i < splitedArray.length; i++) {
charHouse = collectElement(charHouse, splitedArray);
}
int size = charHouse.size();
int[] value = new int[size];
ArrayList<String> key = charHouse;
for (int i = 0; i < size; i++) {
value[i] = 0;
}
for (int i = 0; i < size; i++) {
hashMap.put(key.get(i), value[i]);
}
selectAndSort(splitedArray);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (FileNotFoundException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
}
public String[] textSplit(String str) {
String[] array;
array = str.split(" +");
StringBuffer strbuf = new StringBuffer();
for (int i = 0; i < array.length; i++) {
array[i] = array[i].replaceAll("\\W", "");// 版本一,可以清楚单词所在字符串的所有非字符符号//版本二只清楚单词中第一个和最后一个的字符串。
}
return array;
}
public ArrayList collectElement(ArrayList collection, String[] testElement) {
for (int j = 0; j < testElement.length; j++) {
if (!collection.contains(testElement[j])) {
collection.add(testElement[j]);
}
}
return collection;
}
public static void main(String[] args) {
new SplitTextToWord().readMyFile();
}
}
这里高—— 频 哦—— 敏——感——词——汇太多了,我都找中 qiang 了 ,这个让我怎解释呢
软件 工 程 作 业01
最新推荐文章于 2024-09-11 17:43:04 发布