1、程序执行命令:
hadoop pipes -D hadoop.pipes.java.recordreader=true -D hadoop.pipes.java.recordwriter=true -input /input/wordcount/sample.txt -output /output/wordcount -program /bin/wordcount
2、具体代码:
wordcount.h
#include <algorithm>
#include <stdint.h>
#include <string>
#include <vector>
#include "Pipes.hh"
#include "TemplateFactory.hh"
#include "StringUtils.hh"
#include <iostream>
using namespace std;
class WordcountMapper : public HadoopPipes::Mapper
{
public:
WordcountMapper(HadoopPipes::TaskContext& context);
vector<string> split(const string& src, const string& separator);
//重写map程序
void map(HadoopPipes::MapContext& context);
};
class WordcountReducer : public HadoopPipes::Reducer
{
public:
WordcountReducer(HadoopPipes::TaskContext& context);
//重写reduce程序
void reduce(HadoopPipes::ReduceContext& context);
};
wordcount.cpp
#include "wordcount.h"
WordcountMapper::WordcountMapper(HadoopPipes::TaskContext& context)
{
}
void WordcountMapper::map(HadoopPipes::MapContext& context)
{
int count = 1;
string line = context.getInputValue();
vector<string> wordVec = split(line, " ");
for(unsigned i=0; i<wordVec.size(); i++)
{
context.emit(wordVec[i], HadoopUtils::toString(count));
}
}
vector<string> WordcountMapper::split(const string& src, const string& separator)
{
vector<string> dest;
string str = src;
string substring;
string::size_type start = 0, index = 0;
while(index != string::npos)
{
index = str.find_first_of(separator,start);
if (index != string::npos)
{
substring = str.substr(start,index-start);
dest.push_back(substring);
start = str.find_first_not_of(separator,index);
if (start == string::npos) return dest;
}
}
substring = str.substr(start);
dest.push_back(substring);
return dest;
}
WordcountReducer::WordcountReducer(HadoopPipes::TaskContext& context)
{
}
void WordcountReducer::reduce(HadoopPipes::ReduceContext& context)
{
int wSum = 0;
while (context.nextValue())
{
wSum = wSum + HadoopUtils::toInt(context.getInputValue()) ;
}
context.emit(context.getInputKey(), HadoopUtils::toString(wSum));
}
main.cpp
/*
**hadoop的mapreduce框架,其中map数据准备,reduce数据汇总,我们通过客户端程序和hadoop集群进行通信
**一般java程序使用streaming标准流,c++使用socket套接字
**map在数据节点上执行,本地化无网络占用;reduce由map提供数据(map输出=reduce输入),网络占用较高;
**一般java入参可以用int和string,但c++只有string,虽然简化接口,但很多时候需要人工转化
***********************************
**hadoop集群执行word统计功能
**hadoop执行task方法:runTask
**hadoop执行job命令:mapredjob.sh
**HDFS数据文件:/input/wordcount/sample.txt
**HDFS输出文件:/output/wordcount
*/
#include "wordcount.h"
int main(int argc, char *argv[])
{
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordcountMapper, WordcountReducer>());
}
makefile程序:
.SUFFIXES:.h .c .cpp .o
CC=g++
CPPFLAGS = -m64
RM = rm
SRCS = wordcount.cpp main.cpp
PROGRAM = wordcount
OBJS=$(SRCS:.cpp=.o)
INC_PATH = -I$(HADOOP_DEV_HOME)/include
LIB_PATH = -L$(HADOOP_DEV_HOME)/lib/native
LIBS = -lhadooppipes -lcrypto -lhadooputils -lpthread
#$?表示依赖项 $@表示目的项
$(PROGRAM):$(OBJS)
$(CC) $? -Wall $(LIB_PATH) $(LIBS) -g -O2 -o $@
$(OBJS):$(SRCS)
$(CC) $(CPPFLAGS) -c $(SRCS) $(INC_PATH)
.PHONY:clean
clean:
$(RM) $(PROGRAM) $(OBJS)
源数据:
Happiness is not about being immortal nor having food or rights in one's hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone's love when you need love
Happiness is not about being immortal nor having food or rights in one's hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone's love when you need love
Happiness is not about being immortal nor having food or rights in one's hand. It??s about having each tiny wish come true, or having something to eat when you are hungry or having someone's love when you need love
Happiness 3
It��s 3
about 6
are 3
being 3
come 3
each 3
eat 3
food 3
hand. 3
having 12
hungry 3
immortal 3
in 3
is 3
love 6
need 3
nor 3
not 3
one's 3
or 9
rights 3
someone's 3
something 3
tiny 3
to 3
true, 3
when 6
wish 3
you 6