C++统计英文文章中的单词和词频,以字典序写入新文件(三种方法)

1. 纯vector实现(慢)

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>
using std::cerr;
using std::cout;
using std::endl;
using std::string;
using std::vector;

struct Record
{
    Record(const string & word,int fre)
        :_word(word)
         ,_frequency(fre)
    {}
    string _word;
    int _frequency;
};
class Dictionary{
public:
    Dictionary(int capa){
        _dict.reserve(capa);
    }
    void read(const string &filename){
        std:: ifstream ifs(filename);
        if(!ifs){
            cerr << "ifstream open file failed" <<endl;
            return ;
        }

        string line;
        while(getline(ifs,line)){
            std::istringstream iss(line);
            string word;
            while(iss >> word){
                dealWord(word);
                insert(word);
            }
        }
        //排序
        sort(_dict.begin(),_dict.end(),[](const Record &a,const Record &b){   
          return a._word < b._word;
          });
        ifs.close();
}
    void dealWord(string & word){
        auto it = word.begin();
        while(it != word.end()){
            if(!isalpha(*it)){
                it = word.erase(it);
            }else{
                ++it;
            }
        }
    }
    void insert(const string & word){
        if(word == string()){
            return;
        }
        size_t idx = 0;
        for(;idx < _dict.size();++idx){
            if(word == _dict[idx]._word){
                ++_dict[idx]._frequency;
                break;
            }
        }
        if(idx == _dict.size()){
            _dict.push_back(Record(word,1));
        }
    }
void store(const string &filename){
    std::ofstream ofs(filename);
    if(!ofs){
        cerr << "ofstream open file failed" << endl;
        return;
    }
   // for(size_t idx = 0; idx < _dict.size();++idx){
   //     ofs << _dict[idx]._word << " " << _dict[idx]._frequency << endl;
   // }
    for(const auto &record : _dict){
        ofs << record._word << " " << record._frequency << endl;
    }
    ofs.close();
}
private:
    vector<Record> _dict;
};
void test0(){
    Dictionary dict(10000);
    dict.read("The_Holy_Bible.txt");
    dict.store("dictVector.dat");
}

int main(void){
    test0();
    return 0;
}

2. map实现

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <map>
#include <algorithm>
#include <cctype>
using std::cout;
using std::endl;
using std::string;
using std::cerr;

class Dictionary{
public:
    void read(const string &filename);
    void store(const string &filename);
private:
    std::map<string,int> wordFrequency;
};

void Dictionary::read(const string &filename){
    std::ifstream file(filename);

    string line,word;

    while(std::getline(file,line)){
        std::istringstream iss(line);
        while(iss >> word){
            //将单词转换为小写
            std::transform(word.begin(),word.end(),word.begin(),::tolower);
            //如果字符串中含有非字母则不统计这个单词
            if (std::any_of(word.begin(), word.end(), [](char c) { return !std::isalpha(c); })) {
                continue; // 如果含有非字母字符,跳过该单词
            }

            if(!word.empty()){
                wordFrequency[word]++;
            }    
        }
    }
    file.close();
}

void Dictionary::store(const string &filename){
    std::ofstream outFile(filename);

    for(const auto &wordfre : wordFrequency){
        outFile << wordfre.first << " " <<wordfre.second << endl;
    }
    outFile.close();
}

int main(){
    Dictionary dict;
    string inputFilePath  = "The_Holy_Bible.txt";
    string outputFilePath = "dictMap.dat";

    dict.read(inputFilePath);
    dict.store(outputFilePath);

    cout << "success" << endl;
    return 0;
}

3. unordered_map+vector实现 

#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <unordered_map>
#include <algorithm>
#include <cctype>
using std::cout;
using std::endl;
using std::vector;
using std::string;
using std::cerr;

struct Record{
    string _word;
    int _frequency;
};
class Dictionary{
public:
    void read(const string &filename);
    void store(const string &filename);
private:
    vector<Record> _dict;
};

void Dictionary::read(const string &filename){
    std::ifstream file(filename);

    std::unordered_map<string,int> wordFrequency;
    string line,word;

    while(std::getline(file,line)){
        std::istringstream iss(line);
        while(iss >> word){
            //将单词转换为小写
            std::transform(word.begin(),word.end(),word.begin(),::tolower);
        
            //如果字符串中含有非字母则不统计这个单词
            if (std::any_of(word.begin(), word.end(), [](char c) { return !std::isalpha(c); })) {
                continue; // 如果含有非字母字符,跳过该单词
            }

            if(!word.empty()){
                wordFrequency[word]++;
            }    
        }
    }
    file.close();

    for(const auto &pair : wordFrequency){
        _dict.push_back({pair.first,pair.second});
    }

    //按字典顺序排序
    std::sort(_dict.begin(),_dict.end(),[](const Record &a,const Record &b){
              return a._word < b._word;
              });
}

void Dictionary::store(const string &filename){
    std::ofstream outFile(filename);

    for(const auto &record: _dict){
        outFile << record._word << " " <<record._frequency << endl;
    }
    outFile.close();
}

int main(){
    Dictionary dict;
    string inputFilePath  = "The_Holy_Bible.txt";
    string outputFilePath = "dictUnorder_map.dat";

    dict.read(inputFilePath);
    dict.store(outputFilePath);

    cout << "success" << endl;
    return 0;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值