（转）hadoop上运行c++程序步骤

最新推荐文章于 2021-11-14 22:58:15 发布

掌勺者

最新推荐文章于 2021-11-14 22:58:15 发布

阅读量467

点赞数

分类专栏：大数据

大数据专栏收录该内容

5 篇文章 0 订阅

订阅专栏

一。写一个程序保存为wordcount-simple.cpp：

#include "hadoop/Pipes.hh"
#include "hadoop/TemplateFactory.hh"
#include "hadoop/StringUtils.hh"

const std::string WORDCOUNT = "WORDCOUNT";
const std::string INPUT_WORDS = "INPUT_WORDS";
const std::string OUTPUT_WORDS = "OUTPUT_WORDS";

class WordCountMap: public HadoopPipes::Mapper { // Mapper类
public:
HadoopPipes::TaskContext::Counter* inputWords;

WordCountMap(HadoopPipes::TaskContext& context) {
inputWords = context.getCounter(WORDCOUNT, INPUT_WORDS);
}

void map(HadoopPipes::MapContext& context) {
std::vector<std::string> words = 
HadoopUtils::splitString(context.getInputValue(), " "); // 按空格进行单词分割
for(unsigned int i=0; i < words.size(); ++i) {
context.emit(words[i], "1"); // 单词作为key,value为1
}
context.incrementCounter(inputWords, words.size()); // 向map-reduce提交进度信息
}
};

class WordCountReduce: public HadoopPipes::Reducer { // reduce类
public:
HadoopPipes::TaskContext::Counter* outputWords;

WordCountReduce(HadoopPipes::TaskContext& context) {
outputWords = context.getCounter(WORDCOUNT, OUTPUT_WORDS);
}

void reduce(HadoopPipes::ReduceContext& context) {
int sum = 0;
while (context.nextValue()) {
sum += HadoopUtils::toInt(context.getInputValue()); // 统计单词出现的次数
}
context.emit(context.getInputKey(), HadoopUtils::toString(sum)); // 输出结果
context.incrementCounter(outputWords, 1); 
}
};

int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMap, 
WordCountReduce>()); // 运行任务
}

二。Makefile：

CC = g++ HADOOP_INSTALL = $(HADOOP_HOME) PLATFORM = Linux-i386-32 CPPFLAGS = -O2 -m32 -I$(HADOOP_INSTALL)/c++/$(PLATFORM)/include LDFLAGS += -lcrypto -lhadooppipes -lhadooputils -lpthread wordcount-simple: wordcount-simple.cpp $(CC) $(CPPFLAGS) $< -Wall -L$(HADOOP_INSTALL)/c++/$(PLATFORM)/lib $(LDFLAGS) -o $@

三。make生成可执行文件

四。将可执行文件编译上传到HDFS：

hadoop dfs -copyFromLocal ./wordcount-simple /home

五。创建配置文件：word.xml

<?xml version="1.0"?>
<configuration>
<property>
// Set the binary path on DFS
<name>hadoop.pipes.executable</name>
<value>/home/wordcount</value>
</property>
<property>
<name>hadoop.pipes.java.recordreader</name>
<value>true</value>
</property>
<property>
<name>hadoop.pipes.java.recordwriter</name>
<value>true</value>
</property>
</configuration>

六。创建本地文件hello.txt内容为：hello world

七。将hello.txt上传到dfs

hadoop dfs -copyFromLocal ./hello.txt /home

八。运行程序

hadoop pipes -conf ./word.xml -input /home/hello -output /home/result

（dfs会自动创建result目录保存结果）

掌勺者

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录