描述
使用 map reduce 来计算单词频率
https://hadoop.apache.org/docs/r1.2.1/mapred_tutorial.html#Example%3A+WordCount+v1.0
您在真实的面试中是否遇到过这个题?
是
样例
chunk1: "Google Bye GoodBye Hadoop code"
chunk2: "lintcode code Bye"
Get MapReduce result:
Bye: 2
GoodBye: 1
Google: 1
Hadoop: 1
code: 2
lintcode: 1
思路简单,看懂英文提示就明白了。
/**
* Definition of Input:
* template<class T>
* class Input {
* public:
* bool done();
* // Returns true if the iteration has elements or false.
* void next();
* // Move to the next element in the iteration
* // Runtime error if the iteration has no more elements
* T value();
* // Get the current element, Runtime error if
* // the iteration has no more elements
* }
*/
class WordCountMapper: public Mapper {
public:
void Map(Input<string>* input) {
// Write your code here
// Please directly use func 'output' to
// output the results into output buffer.
// void output(string &key, int value);
vector<string> m_vec;
while(!input->done()){
string m_str=input->value();
int i=0;
for(int j=0;j<=m_str.size();j++){
if(m_str[j]==' '||j==m_str.size()){
string temp=m_str.substr(i,j-i);
output(temp,1);
i=j+1;
}
}
input->next();
}
}
};
class WordCountReducer: public Reducer {
public:
void Reduce(string &key, Input<int>* input) {
// Write your code here
// Please directly use func 'output' to
// output the results into output buffer.
// void output(string &key, int value);
int sum = 0;
while(!input->done()) {
sum += input->value();
input->next();
}
output(key, sum);
}
};