java输出结果到txt_java结果输出至txt

该Java程序实现从文本中统计词频,并将结果按词频降序排序,最后将排序后的结果输出到txt文件。程序包括分词、统计、排序和输出到文件等功能,使用TreeSet和ArrayList进行数据处理。
摘要由CSDN通过智能技术生成

展开全部

帮你修改了一下 你看看可以吗

package com.isoftstone.baidu;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileWriter;

import java.io.IOException;

import java.util.ArrayList;

import java.util.Collections;

import java.util.Comparator;

import java.util.Iterator;

import java.util.List;

import java.util.Set;

import java.util.TreeSet;

public class Article {

//保存文章的e68a84e8a2ad3231313335323631343130323136353331333332616438内容

String content;

//保存分割后的单词集合

String[] rawWords;

//保存统计后的单词集合

String[] words;

//保存单词对应的词频

int[] wordFreqs;

//构造函数,输入文章内容

//提高部分:从文件中读取

public Article() {

content =

"kolya is one of the richest films i've seen in some time . zdenek sverak plays a confirmed old bachelor ( who's likely to remain so ) , who finds his life as a czech cellist increasingly impacted by the five-year old boy that he's taking care of . though it ends rather abruptly-- and i'm whining , 'cause i wanted to spend more time with these characters-- the acting , writing , and production values are as high as , if not higher than , comparable american dramas . this father-and-son delight-- sverak also wrote the script , while his son , jan , directed-- won a golden globe for best foreign language film and , a couple days after i saw it , walked away an oscar . in czech and russian , with english subtitles . ";

}

//对文章根据分隔符进行分词,将结果保存到rawWords数组中

public void splitWord() {

//分词的时候,因为标点符号不参与,所以所有的符号全部替换为空格

final char SPACE = ' ';

content = content.replace('\'', SPACE).replace(',', SPACE).replace('.', SPACE);

content = content.replace('(', SPACE).replace(')', SPACE).replace('-', SPACE);

rawWords = content.split("\\s+");//凡是空格隔开的都算单词,上面替换了', 所以I've 被分成2个 //单词

}

//统计词,遍历数组

public void countWordFreq() {

//将所有出现的字符串放入唯一的set中,不用map,是因为map寻找效率太低了

Set set = new TreeSet();

for (String word : rawWords) {

set.add(word);

}

Iterator ite = set.iterator();

List wordsList = new ArrayList();

List freqList = new ArrayList();

//多少个字符串未知,所以用list来保存先

while (ite.hasNext()) {

String word = (String) ite.next();

int count = 0;//统计相同字符串的个数

for (String str : rawWords) {

if (str.equals(word)) {

count++;

}

}

wordsList.add(word);

freqList.add(count++);

}

//存入数组当中

words = wordsList.toArray(new String[0]);

wordFreqs = new int[freqList.size()];

for (int i = 0; i < freqList.size(); i++) {

wordFreqs[i] = freqList.get(i);

}

}

//根据词频,将词数组和词频数组进行降序排序

public void sort() {

class Word {

private String word;

private int freq;

public Word(String word, int freq) {

this.word = word;

this.freq = freq;

}

}

//注意:此处排序,1)首先按照词频降序排列, 2)如果词频相同,按照字母降序排列,

//如 'abc' > 'ab' >'aa'

class WordComparator implements Comparator {

public int compare(Object o1, Object o2) {

Word word1 = (Word) o1;

Word word2 = (Word) o2;

if (word1.freq < word2.freq) {

return 1;

} else if (word1.freq > word2.freq) {

return -1;

} else {

int len1 = word1.word.trim().length();

int len2 = word2.word.trim().length();

String min = len1 > len2 ? word2.word : word1.word;

String max = len1 > len2 ? word1.word : word2.word;

for (int i = 0; i < min.length(); i++) {

if (min.charAt(i) < max.charAt(i)) {

return 1;

}

}

return 1;

}

}

}

List wordList = new ArrayList();

for (int i = 0; i < words.length; i++) {

wordList.add(new Word(words[i], wordFreqs[i]));

}

Collections.sort(wordList, new WordComparator());

for (int i = 0; i < wordList.size(); i++) {

Word wor = (Word) wordList.get(i);

words[i] = wor.word;

wordFreqs[i] = wor.freq;

}

}

//将排序结果输出

public void printResult() {

System.out.println("Total " + words.length + " different words in the content!");

for (int i = 0; i < words.length; i++) {

System.out.println(wordFreqs[i] + "  " + words[i]);

}

}

// 输出到文本

private void outputResult() {

File file = new File("C:\\output.txt");

try {

if (file.exists()) file.delete();

BufferedWriter bw = new BufferedWriter(new FileWriter(file));

StringBuffer out = new StringBuffer();

for(int i = 0; i < words.length; i++) {

out.append(wordFreqs[i] + "  " + words[i] + "\n");

}

bw.write(out.toString());

bw.flush();

bw.close();

} catch (IOException e) {

e.printStackTrace();

}

}

//测试类的功能

public static void main(String[] args) {

Article a = new Article();

a.splitWord();

a.countWordFreq();

a.sort();

a.printResult();

a.outputResult();

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值