一、WordMap类,分析实现类,分析过程分为文件读取、句子分析、单词分析和输出格式
/**
* 英文文件分析,单词长度比率和句子平均单词个数* @author Administrator
*
*/
public class WordMap {
private ArrayList<String> words = new ArrayList<>();
private ArrayList<String> words1 = new ArrayList<>();
private ArrayList<String> words2 = new ArrayList<>();
private ArrayList<String> words3 = new ArrayList<>();
private ArrayList<String> words4 = new ArrayList<>();
private ArrayList<String> words5 = new ArrayList<>();
private ArrayList<String> words6 = new ArrayList<>();
private ArrayList<String> words7 = new ArrayList<>();
private ArrayList<String> words8 = new ArrayList<>();
private ArrayList<String> words9 = new ArrayList<>();
private ArrayList<String> words10 = new ArrayList<>();
private ArrayList<String> words11 = new ArrayList<>();
private ArrayList<String> words12 = new ArrayList<>();
private ArrayList<String> words13 = new ArrayList<>();
private ArrayList<String> words14 = new ArrayList<>();
private ArrayList<String> words15 = new ArrayList<>();
private ArrayList<String> sentence = new ArrayList<>();
private ArrayList<Integer> sentenceLength = new ArrayList<>();
/**
* 以行为单位读取文件
*/
public String readFileByLines(String fileName)
{
File file = new File(fileName);
BufferedReader reader = null;
String allWords = "";
try
{
reader = new BufferedReader(new FileReader(file));
String tempString = null;
// 一次读入一行,直到读入null为文件结束
while ((tempString = reader.readLine()) != null)
{
allWords+=tempString;
}
reader.close();
} catch (IOException e)
{
e.printStackTrace();
} finally {
if (reader != null)
{
try
{
reader.close();
} catch (IOException e1)
{
}
}
}
return allWords;
}
/**
* 统计单词,格式输出
*/
public void outputResult()
{
//统计单词
System.out.printf("words of length 1: %4.2f%%\n",(float)words1.size()/words.size());
System.out.printf("words of length 2: %4.2f%%\n",(float)words2.size()/words.size());
System.out.printf("words of length 3: %4.2f%%\n",(float)words3.size()/words.size());
System.out.printf("words of length 4: %4.2f%%\n",(float)words4.size()/words.size());
System.out.printf("words of length 5: %4.2f%%\n",(float)words5.size()/words.size());
System.out.printf("words of length 6: %4.2f%%\n",(float)words6.size()/words.size());
System.out.printf("words of length 7: %4.2f%%\n",(float)words7.size()/words.size());
System.out.printf("words of length 8: %4.2f%%\n",(float)words8.size()/words.size());
System.out.printf("words of length 9: %4.2f%%\n",(float)words9.size()/words.size());
System.out.printf("words of length 10: %4.2f%%\n",(float)words10.size()/words.size());
System.out.printf("words of length 11: %4.2f%%\n",(float)words11.size()/words.size());
System.out.printf("words of length 12: %4.2f%%\n",(float)words12.size()/words.size());
System.out.printf("words of length 13: %4.2f%%\n",(float)words13.size()/words.size());
System.out.printf("words of length 14: %4.2f%%\n",(float)words14.size()/words.size());
System.out.printf("words of length 15 or larger: %4.2f%%\n",(float)words15.size()/words.size());
System.out.printf("average sentence length: %4.2f\n",(float)words.size()/sentence.size());
}
/**
* 拆分单词
* @param substrs
*/
public void parseWords(String[] substrs)
{
//拆分单词
String tempString="";
for(int i=0;i<substrs.length;i++)
{
// System.out.println(substrs[i]);
sentence.add(substrs[i]);
tempString = substrs[i].replaceAll(",|:", " ").replaceAll("\\s+", " ");
int sentenceWords = 0;
while (tempString.indexOf(" ") != -1 )
{
int startP = tempString.indexOf(" ");
String tmp = tempString.substring(0, startP);
if(!tmp.equals(" ") && !tmp.equals("") && !tmp.equals("--"))
{
words.add(tmp);
// System.out.println(tmp);
int tmpLength = tmp.length();
switch(tmpLength)
{
case 1:
words1.add(tmp);
break;
case 2:
words2.add(tmp);
break;
case 3:
words3.add(tmp);
break;
case 4:
words4.add(tmp);
break;
case 5:
words5.add(tmp);
break;
case 6:
words6.add(tmp);
break;
case 7:
words7.add(tmp);
break;
case 8:
words8.add(tmp);
break;
case 9:
words9.add(tmp);
break;
case 10:
words10.add(tmp);
break;
case 11:
words11.add(tmp);
break;
case 12:
words12.add(tmp);
break;
case 13:
words13.add(tmp);
break;
case 14:
words14.add(tmp);
break;
default:
words15.add(tmp);
break;
}
sentenceWords++;
}
tempString = tempString.substring(startP+1,tempString.length());
}
sentenceLength.add(sentenceWords);
}
}
/**
* 拆分句子
* @param allWords
* @return
*/
public String[] parseSentence(String allWords)
{
String regEx="[。?!?.!]";
Pattern p =Pattern.compile(regEx);
String[] substrs = p.split(allWords);
return substrs;
}
/**
* 解析文件
*/
public void parseFile(String fileName)
{
String allWords = readFileByLines(fileName);
String[] substrs = parseSentence(allWords);
parseWords(substrs);
outputResult();
}
}
二、WordMapDriver程序入口类
/**
* 单词分析入口类
* @author Administrator
*
*/
public class WordMapDriver {
public static void main(String[] args)
{
if(args.length==1)
{
System.out.println("Analyzed text: "+args[0]);
WordMap wordMap = new WordMap();
wordMap.parseFile(args[0]);
}
}
}