1. 纯vector实现(慢)
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>
using std::cerr;
using std::cout;
using std::endl;
using std::string;
using std::vector;
struct Record
{
Record(const string & word,int fre)
:_word(word)
,_frequency(fre)
{}
string _word;
int _frequency;
};
class Dictionary{
public:
Dictionary(int capa){
_dict.reserve(capa);
}
void read(const string &filename){
std:: ifstream ifs(filename);
if(!ifs){
cerr << "ifstream open file failed" <<endl;
return ;
}
string line;
while(getline(ifs,line)){
std::istringstream iss(line);
string word;
while(iss >> word){
dealWord(word);
insert(word);
}
}
//排序
sort(_dict.begin(),_dict.end(),[](const Record &a,const Record &b){
return a._word < b._word;
});
ifs.close();
}
void dealWord(string & word){
auto it = word.begin();
while(it != word.end()){
if(!isalpha(*it)){
it = word.erase(it);
}else{
++it;
}
}
}
void insert(const string & word){
if(word == string()){
return;
}
size_t idx = 0;
for(;idx < _dict.size();++idx){
if(word == _dict[idx]._word){
++_dict[idx]._frequency;
break;
}
}
if(idx == _dict.size()){
_dict.push_back(Record(word,1));
}
}
void store(const string &filename){
std::ofstream ofs(filename);
if(!ofs){
cerr << "ofstream open file failed" << endl;
return;
}
// for(size_t idx = 0; idx < _dict.size();++idx){
// ofs << _dict[idx]._word << " " << _dict[idx]._frequency << endl;
// }
for(const auto &record : _dict){
ofs << record._word << " " << record._frequency << endl;
}
ofs.close();
}
private:
vector<Record> _dict;
};
void test0(){
Dictionary dict(10000);
dict.read("The_Holy_Bible.txt");
dict.store("dictVector.dat");
}
int main(void){
test0();
return 0;
}
2. map实现
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <map>
#include <algorithm>
#include <cctype>
using std::cout;
using std::endl;
using std::string;
using std::cerr;
class Dictionary{
public:
void read(const string &filename);
void store(const string &filename);
private:
std::map<string,int> wordFrequency;
};
void Dictionary::read(const string &filename){
std::ifstream file(filename);
string line,word;
while(std::getline(file,line)){
std::istringstream iss(line);
while(iss >> word){
//将单词转换为小写
std::transform(word.begin(),word.end(),word.begin(),::tolower);
//如果字符串中含有非字母则不统计这个单词
if (std::any_of(word.begin(), word.end(), [](char c) { return !std::isalpha(c); })) {
continue; // 如果含有非字母字符,跳过该单词
}
if(!word.empty()){
wordFrequency[word]++;
}
}
}
file.close();
}
void Dictionary::store(const string &filename){
std::ofstream outFile(filename);
for(const auto &wordfre : wordFrequency){
outFile << wordfre.first << " " <<wordfre.second << endl;
}
outFile.close();
}
int main(){
Dictionary dict;
string inputFilePath = "The_Holy_Bible.txt";
string outputFilePath = "dictMap.dat";
dict.read(inputFilePath);
dict.store(outputFilePath);
cout << "success" << endl;
return 0;
}
3. unordered_map+vector实现
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <unordered_map>
#include <algorithm>
#include <cctype>
using std::cout;
using std::endl;
using std::vector;
using std::string;
using std::cerr;
struct Record{
string _word;
int _frequency;
};
class Dictionary{
public:
void read(const string &filename);
void store(const string &filename);
private:
vector<Record> _dict;
};
void Dictionary::read(const string &filename){
std::ifstream file(filename);
std::unordered_map<string,int> wordFrequency;
string line,word;
while(std::getline(file,line)){
std::istringstream iss(line);
while(iss >> word){
//将单词转换为小写
std::transform(word.begin(),word.end(),word.begin(),::tolower);
//如果字符串中含有非字母则不统计这个单词
if (std::any_of(word.begin(), word.end(), [](char c) { return !std::isalpha(c); })) {
continue; // 如果含有非字母字符,跳过该单词
}
if(!word.empty()){
wordFrequency[word]++;
}
}
}
file.close();
for(const auto &pair : wordFrequency){
_dict.push_back({pair.first,pair.second});
}
//按字典顺序排序
std::sort(_dict.begin(),_dict.end(),[](const Record &a,const Record &b){
return a._word < b._word;
});
}
void Dictionary::store(const string &filename){
std::ofstream outFile(filename);
for(const auto &record: _dict){
outFile << record._word << " " <<record._frequency << endl;
}
outFile.close();
}
int main(){
Dictionary dict;
string inputFilePath = "The_Holy_Bible.txt";
string outputFilePath = "dictUnorder_map.dat";
dict.read(inputFilePath);
dict.store(outputFilePath);
cout << "success" << endl;
return 0;
}