/*
统计一个文件内,英文单词的个数,并按 word = count的格式顺序输出
统计输出总的英文单词个数
*/
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
int main(){
string line;
//打开输入文件
ifstream input("transform.txt");
//打开输出文件
ofstream output("result.txt");
//使用两个vector来实现map的功能,区别是按顺序统计单词,而不是按键值
vector<string> wordVec;
vector<int> countVec(200);
//单词计数
int wordCount=0;
//每次读一行
while(getline(input,line))
{
size_t pos=0;
//先按空格分割字符串
while(pos!=line.size()){
if(line[pos]!=' '){
string word;
while(pos!=line.size() && line[pos]!=' '){
word+=line[pos];
++pos;
}
//去掉子字符串的前导标点符号
int prepos=0;
while(prepos!=word.size() && ispunct(word[prepos])){
++prepos;
}
//去掉子字符串的后缀标点符号
//注意无符号数和有符号数
int pofixPos=word.size()-1;
while((pofixPos>=0) && ispunct(word[pofixPos])){
--pofixPos;
}
//跳过无效单词的统计操作
if(prepos==word.size() || pofixPos<0)
continue;
else
++wordCount;
//得到处理后的单词,统计单词个数
string pureWord=word.substr(prepos,pofixPos-prepos+1);
vector<string>::iterator iter=find(wordVec.begin(),wordVec.end(),word);
if(iter==wordVec.end()){
countVec[iter-wordVec.begin()]++;
wordVec.push_back(pureWord);
}else
countVec[iter-wordVec.begin()]++;
if(pos==line.size())
break;
}
++pos;
}
}
//关闭输入文件
input.close();
//遍历得的单词统计的详细结果
vector<string>::iterator traiter=wordVec.begin();
vector<int>::iterator coniter=countVec.begin();
output<<"单词总数 :"<<wordCount<<endl;
while(traiter!=wordVec.end()){
output<<*traiter<<" = "<<*coniter<<endl;
++traiter;
++coniter;
}
//关闭输出文件
output.close();
return 0;
}
输入文件transform.txt内容:
My father was a self-taught mandolin player. He was one of the best string instrument players in our town.
He could't read music, but if he heard a tune a few times, he could play it. When he was younger, he was a member of a small country music band.
They would play at local dances and on a few occasions would play for the local radio station.
He often told us how he had auditioned and earned a position in a band that featured Patsy Cline as their lead singer.
输出文件result.txt内容:
单词总数 :91
My = 1
father = 1
was = 4
a = 8
self-taught = 1
mandolin = 1
player = 1
He = 3
one = 1
of = 2
the = 2
best = 1
string = 1
instrument = 1
players = 1
in = 2
our = 1
town = 1
could't = 1
read = 1
music = 2
but = 1
if = 1
he = 5
heard = 1
tune = 1
few = 2
times = 1
could = 1
play = 3
it = 1
When = 1
younger = 1
member = 1
small = 1
country = 1
band = 2
They = 1
would = 2
at = 1
local = 2
dances = 1
and = 2
on = 1
occasions = 1
for = 1
radio = 1
station = 1
often = 1
told = 1
us = 1
how = 1
had = 1
auditioned = 1
earned = 1
position = 1
that = 1
featured = 1
Patsy = 1
Cline = 1
as = 1
their = 1
lead = 1
singer = 1