#ifndef TEXTQUERY_H
#define TEXTQUERY_H
#include <string>
#include <vector>
#include <map>
#include <set>
#include <iostream>
#include <fstream>
#include <cctype>
#include <cstring>
#include <sstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <iostream>
#include <fstream>
#include <cctype>
#include <cstring>
#include <stdexcept>
using std::istringstream;
using std::set;
using std::string;
using std::getline;
using std::map;
using std::vector;
using std::cerr;
using std::cout;
using std::cin;
using std::ifstream;
using std::endl;
using std::ispunct;
using std::tolower;
using std::strlen;
using std::out_of_range;
class TextQuery {
// as before
public:
// typedef to make declarations easier
typedef std::string::size_type str_size; 定义别名,string.size()返回的类型,它定义与unsigned型具有相同的含义
typedef std::vector<std::string>::size_type line_no;
//std::vector<std::string>::size_type 下标操作符来获取元素,类似数组下标。
//vector 类模板,也叫容器,vector<std::string>::iterator,每个容器都有迭代器 iter=vector<string>.begin(); iter类似于指向某个元素的指针
void read_file(std::ifstream &is)
{ store_file(is); build_map(); }
std::set<line_no> run_query(const std::string&) const;
std::string text_line(line_no) const;
str_size size() const { return lines_of_text.size(); }
void display_map(); // debugging aid: print the map
private:
// utility functions used by read_file
void store_file(std::ifstream&); // store input file
void build_map(); // associated each word with a set of line numbers
// remember the whole input file
std::vector<std::string> lines_of_text;
// map word to set of the lines on which it occurs
std::map< std::string, std::set<line_no> > word_map; map可以理解为关联数组,这里关联的是一个字符串和它所在的行集
// characters that constitute whitespace
static std::string whitespace_chars;
// canonicalizes text: removes punctuation and makes everything lower case
static std::string cleanup_str(const std::string&);
};
#endif
void TextQuery::store_file(ifstream &is)
{
string textline;
while (getline(is, textline))
lines_of_text.push_back(textline); //先存储行。
}
void TextQuery::build_map()
{
// process each line from the input vector
for (line_no line_num = 0;
line_num != lines_of_text.size();
++line_num)
{ //lines_of_text[line_num] vector也可按下标,即数组形式访问。
// we'll use line to read the text a word at a time
istringstream line(lines_of_text[line_num]);
string word;
while (line >> word)
// add this line number to the set;
// subscript will add word to the map if it's not already there
word_map[cleanup_str(word)].insert(line_num); //使用下标访问map对象
} // 例如: map<string,int> word_count;
// word_count["anna"]=1;
}
string TextQuery::cleanup_str(const string &word)
{
string ret;//const_iterator 与 iterator的不同在于const_iterator只能访问,不能修改元素值。
for (string::const_iterator it = word.begin(); it != word.end(); ++it) {
if (!ispunct(*it)) //判断一个字符c是否为标点符号
ret += tolower(*it);//把字符转换成小写字母,非字母字符不做出处理,string 的重载符 += 连接字符串作用
}
return ret;
}
set<TextQuery::line_no> TextQuery::run_query(const string &query_word) const
{ //函数返回的是元素所在的行。
// Note: must use find and not subscript the map directly
// to avoid adding words to word_map!
//find(K) 返回的是指向K的迭代器,如果元素不存在,则返回end迭代器
map<string, set<line_no> >::const_iterator loc = word_map.find(cleanup_str(query_word));
if (loc == word_map.end())
return set<line_no>(); // not found, return empty set
else
// fetch and return set of line numbers for this word
return loc->second;second成员
}
void TextQuery::display_map()
{
map< string, set<line_no> >::iterator iter = word_map.begin(),
iter_end = word_map.end();
// for each word in the map
for ( ; iter != iter_end; ++iter) {
cout << "word: " << iter->first << " {";
// fetch location vector as a const reference to avoid copying it
const set<line_no> &text_locs = iter->second;
set<line_no>::const_iterator loc_iter = text_locs.begin(),
loc_iter_end = text_locs.end();
// print all line numbers for this word
while (loc_iter != loc_iter_end)//输出行
{
cout << *loc_iter;
if (++loc_iter != loc_iter_end)
cout << ", ";
}
cout << "}\n"; // end list of output this word
}
cout << endl; // finished printing entire map
}