查询文本,输入单词,打印文件中出现该单词的次数,以及行号,
同一行出现多次,仅算做1次
单词不区分大小写
文本标点暂时只有. , !
#include <memory>
#include <stdio.h>
#include <string>
#include <iostream>
#include <fstream>
#include <string.h>
#include <algorithm>
#include <map>
#include <iterator>
#include <sstream>
#include <unordered_map>
#include <set>
using namespace std;
//string 存单词,set存行号,set的大小,就是单词出现的次数
typedef unordered_map<string,set<int> > word_db;
string & word_convert(string & word)
{
if (word[0] >= 'A' && word[0] <= 'Z')
word[0] += 32;
if (word[word.length() -1] == ',' ||
word[word.length() -1] == '.' ||
word[word.length() -1] == '!' ) {
word = word.substr(0,word.length() -1);
}
return word;
}
shared_ptr<word_db> init_file(const char* filename)
{
int lnum = 0;
int count = 0;
auto pret = make_shared<word_db>();
char line[512];
//char* filename = "README";
ifstream fin(filename);
if (!fin) {
cout << "file" << filename << "not exsit\n";
exit(-1);
}
//istream_iterator<string> scin;
string word;
while(fin.getline(line,512)) {
//cout << line << endl;
++lnum;
istringstream ss(line);
auto s = set<int>();
while(ss >> word) {
word = word_convert(word);
auto it = (*pret).find(word);
//如果word在没有出现过,添加1次
if (it == end(*pret) ) {
//添加key,key对应一个新建的set,同时添加本行行号
s.insert(lnum);
(*pret)[word] = s;
} else {
//否则直接添加行号
it->second.insert(lnum);
}
}
}
return pret;
}
void cout_by_line(int num, const char* filename)
{
char line[512];
int lnum = 0;
ifstream fin(filename);
if (!fin) {
cout << "file" << filename << "not exsit\n";
exit(-1);
}
while(fin.getline(line,512)) {
++lnum;
if(lnum == num) cout << "(Line " << num << " ) " << line << endl;
}
}
int main(int argc, char* argv[])
{
char line[512];
char* filename = argv[1];
ifstream fin(filename);
if (!fin) {
cout << "file" << filename << "not exsit\n";
exit(-1);
}
//istream_iterator<string> scin;
string word;
string a = string("Cool.");
string b = string("Sool!");
string c = string("Soosdsdf.");
cout << word_convert(a) << word_convert(b) << endl;
auto database = init_file(filename);
string input;
while(1) {
cout << "input a word:\n";
cin >> input;
input = word_convert(input);
if(input == "quit") break;
auto it = database->find(input);
if ( it != end(*database) ) {
cout << "Found word " << input << " " << it->second.size() << " times.\n" ;
for( auto itset:(it->second)) {
cout_by_line(itset,filename);
}
} else {
cout << "Not find anything.\n";
}
}
return 0;
}