短文词频

最新推荐文章于 2020-11-25 15:29:33 发布

hhys08120

最新推荐文章于 2020-11-25 15:29:33 发布

阅读量439

点赞数

分类专栏：算法文章标签：词频

本文链接：https://blog.csdn.net/hhys08120/article/details/10148285

版权

算法专栏收录该内容

16 篇文章 0 订阅

订阅专栏

本程序从正文文件text.txt读入一篇英文短文,统计该短文中不同单词和它的出现次数,并按词典编辑顺序将单词及它的出现次数输出到正文文件word.txt中.程序用一棵有序二叉树存储这些单词及其出现的次数,一边读入一边建立.然后中序遍历该二叉树,将遍历经过的二叉树上的节点的内容输出.

#include<iostream>
#include<string>
#include<fstream>

using namespace std;

ifstream inFile("text.txt",ios::in);
ofstream outFile("word.txt",ios::trunc);

struct treeNode
{
	string data;
	int count;
	treeNode * leftChild,*rightChild;
	treeNode(string data)
	{
		this->data = data;
		this->count = 1;
		this->leftChild = this->rightChild = NULL;
	}
};

void biTree(treeNode *&node,string word)//建立有序二叉树
{
	if( node==NULL )
	{
		node = new treeNode(word);
	}
	else
	{
		int cmp;
		cmp = strcmp(node->data.c_str(),word.c_str());
		if(cmp == 0)
			++node->count;
		else if(cmp > 0)
			biTree(node->leftChild,word);
		else
			biTree(node->rightChild,word);
	}
}

void midOrder(treeNode *node) //中序遍历
{
	if(node==NULL)
	{
		return;
	}	
	midOrder(node->leftChild);
	cout<<node->data<<ends<<node->count<<endl;
	outFile<<node->data<<ends<<node->count<<endl;
	midOrder(node->rightChild);	
}

int main()
{
	treeNode *node=NULL;
	//char inBuffer[100];
	string inBuffer;
	string word;	
	if(!inFile.is_open())
	{
		cout<<"Error opening file"<<endl;
		exit(0);
	}
	if(!outFile)
	{
		cerr<<"Error opening outfile"<<endl;
		exit(0);
	}
	while(getline(inFile,inBuffer))  //读取一行
	{
		size_t i=0;
		inBuffer+='\0';
		//cout<<inBuffer<<endl;
		while(i<inBuffer.size())   //分解成单词
		{ 
			if((inBuffer[i]<='z'&&inBuffer[i]>='a') ||(inBuffer[i]<='Z'&&inBuffer[i]>='A')) 
				word += inBuffer[i];
			else
			{
				if(!word.empty())
					biTree(node,word);
					//cout<<word<<endl;
				word.clear();
			}
			++i;
		}
		inBuffer.clear();
	}
	midOrder(node);
	inFile.close();
	outFile.close();
	return 0;
}

hhys08120

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
短文词频

本程序从正文文件text.txt读入一篇英文短文,统计该短文中不同单词和它的出现次数,并按词典编辑顺序将单词及它的出现次数输出到正文文件word.txt中.程序用一棵有序二叉树存储这些单词及其出现的次数,一边读入一边建立.然后中序遍历该二叉树,将遍历经过的二叉树上的节点的内容输出.#include#include#includeusing namespace std;ifs
复制链接

扫一扫

专栏目录

短文 词频

短文词频