第1步:输出单个文件中的前 N 个最常出现的英语单词。
功能1:输出文件中所有不重复的单词,按照出现次数由多到少排列,出现次数同样多的,以字典序排列。
功能2: 指定文件目录,对目录下每一个文件执行统计的操作。
功能3:指定文件目录,是会递归遍历目录下的所有子目录的文件进行统计单词的功能。
功能4:输出出现次数最多的前 n 个单词,
package test; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Scanner; import java.util.StringTokenizer; public class test2 { public static void main(String[] args)throws IOException { List<Integer> list=new ArrayList<>(); Scanner scan=new Scanner(System.in); File f = new File("D:\\飘c1.txt"); FileInputStream fip = new FileInputStream(f); InputStreamReader reader = new InputStreamReader(fip, "gbk"); StringBuffer sb = new StringBuffer(); while (reader.ready()) { sb.append((char) reader.read()); } reader.close(); fip.close(); int i; int option=10; while(option!=0) { System.out.println("1、统计字母的个数 2、统计单词个数 3、统计出现最多次数的几个单词 4、统计删除无用表后的单词 0、退出"); option=scan.nextInt(); if(option==1) tongjizimu(sb.toString()); if(option==2) tongjidanci(sb.toString()); if(option==3) { int sum1=0; System.out.println("显示前n个出现最多的单词,请输入n"); sum1=scan.nextInt(); tongjidanci1(sb.toString(),sum1); } if(option==4) { tongjidanci2(sb.toString()); } if(option==0) { System.out.println("已退出。"); } }} static char ch(char c) { if(!(c>=97&&c<=122)) c+=32; return c; } static String[] StatList(String str) { StringBuffer sb = new StringBuffer(); HashMap<String ,Integer> has = new HashMap<String ,Integer> (); // 打开一个哈希表 String[] slist = str.split("\\W+"); int sum=0; int sum1=0; for (int i = 0; i < slist.length; i++) { if (!has.containsKey(slist[i])) { // 若尚无此单词 has.put(slist[i], 1); sum++; sum1++; } else {//如果有,就在将次数加1 Integer nCounts = has.get(slist[i]); has.put(slist[i],nCounts+1 ); } } int temp=0; int temp1=0; String []a=new String[sum]; int []b=new int[sum1]; Iterator iterator = has.keySet().iterator(); while(iterator.hasNext()){ String word = (String) iterator.next(); a[temp]=word; temp++; } return a; } static int[] StatList1(String str) { StringBuffer sb = new StringBuffer(); HashMap<String ,Integer> has = new HashMap<String ,Integer> (); // 打开一个哈希表 String[] slist = str.split("\\W+"); int sum=0; int sum1=0; for (int i = 0; i < slist.length; i++) { if (!has.containsKey(slist[i])) { // 若尚无此单词 has.put(slist[i], 1); sum++; sum1++; } else {//如果有,就在将次数加1 Integer nCounts = has.get(slist[i]); has.put(slist[i],nCounts+1 ); } } int temp=0; int temp1=0; String []a=new String[sum]; int []b=new int[sum1]; Iterator iterator = has.keySet().iterator(); while(iterator.hasNext()){ String word = (String) iterator.next(); b[temp1]=has.get(word); temp1++; } return b; } public static void tongjizimu(String a) { DecimalFormat df=new DecimalFormat("######0.00"); int i; String A=a; String M="abcdefghijklmnopqrstuvwxyz"; String temp = ""; char NUM[]=new char[A.length()]; char Z[]=new char[26]; int X[]=new int[26]; int MAX=0; Z=M.toCharArray(); for(int k=0;k<26;k++) { X[k]=0; for(i=0;i<A.length();i++) { NUM[i]=A.charAt(i); if(Z[k]==NUM[i]||Z[k]==ch(NUM[i])) { X[k]++; } } } System.out.println("这篇文章中英文字母个数分别为:"); double sum=0; System.out.println("排序如下:"); for(i=0;i<25;i++) for(int k=0;k<25-i;k++) { if(X[k]<X[k+1]) { int temp2=X[k]; X[k]=X[k+1]; X[k+1]=temp2; char temp3=Z[k]; Z[k]=Z[k+1]; Z[k+1]=temp3; } } for(i=0;i<26;i++) { System.out.println(Z[i]+"字母个数为:"+X[i]); sum=sum+X[i]; } for(i=0;i<26;i++) { double jkl=(X[i])/sum*100; System.out.println(Z[i]+"字母频率为:"+df.format(jkl)+"%"); } } public static void tongjidanci(String a) { int i; StringTokenizer st = new StringTokenizer(a,",.! \n"); String []a1=StatList(a); int[]b1=StatList1(a); System.out.println("//"); for(i=0;i<a1.length-1;i++) for(int j=0;j<a1.length-1-i;j++) { if(b1[j]<b1[j+1]) { int temp6=b1[j]; b1[j]=b1[j+1]; b1[j+1]=temp6; String temp7=a1[j]; a1[j]=a1[j+1]; a1[j+1]=temp7; } } for(i=0;i<a1.length-1;i++) { System.out.println("单词:"+a1[i]+" 且出现的次数:"+b1[i]); } } public static void tongjidanci1(String a,int n) { int i; StringTokenizer st = new StringTokenizer(a,",.! \n"); String []a1=StatList(a); int[]b1=StatList1(a); System.out.println("//"); for(i=0;i<a1.length-1;i++) for(int j=0;j<a1.length-1-i;j++) { if(b1[j]<b1[j+1]) { int temp6=b1[j]; b1[j]=b1[j+1]; b1[j+1]=temp6; String temp7=a1[j]; a1[j]=a1[j+1]; a1[j+1]=temp7; } } for(i=0;i<n;i++) { System.out.println("单词:"+a1[i]+" 且出现的次数:"+b1[i]); } } public static void tongjidanci2(String a) { int i; StringTokenizer st = new StringTokenizer(a,""); String []a1=StatList(a); int[]b1=StatList1(a); System.out.println("//"); for(i=0;i<a1.length-1;i++) for(int j=0;j<a1.length-1-i;j++) { if(b1[j]<b1[j+1]) { int temp6=b1[j]; b1[j]=b1[j+1]; b1[j+1]=temp6; String temp7=a1[j]; a1[j]=a1[j+1]; a1[j+1]=temp7; } } for(i=0;i<a1.length-1;i++) { System.out.println("单词:"+a1[i]+" 且出现的次数:"+b1[i]); } } }