-
安装xshell、xftp、VMware,VMware的安装序列号为:5A02H-AU243-TZJ49-GTC7K-3C61N
-
在VMware中修改最大内存上限并启动HDP,待完成后根据提示使用xshell进行ssh连接,第一次进入需要修改密码,用户名:root 旧密码:hadoop
-
使用xftp替换/etc/hadoop/2.3.2.0-2950/0目录下的hdfs-site.xml 文件,完成后在xshell中输入“shutdown -r 0” 重启系统
-
解压cloudMR.zip, 按照要求编写TitleCount.java中的TODO部分。完成后使用xftp将cloudMR整个文件夹传输到HDP系统中
-
在xshell中依次输入以下命令启动程序
-
cd cloudMR/
-
chmod 777 submit.sh
-
./submit.sh
-
-
待运行结束后若看到以下结果则证明TitleCount成功执行
(若想详细了解向hadoop提交任务的方式,请查看submit.sh)
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner; -
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.List;
import java.util.StringTokenizer; -
// >>> Don't Change
public class TitleCount extends Configured implements Tool {
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new TitleCount(), args);
System.exit(res);
} -
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(this.getConf(), "Title Count");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class); -
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class); -
job.setMapperClass(TitleCountMap.class);
job.setReducerClass(TitleCountReduce.class); -
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1])); -
job.setJarByClass(TitleCount.class);
return job.waitForCompletion(true) ? 0 : 1;
} -
public static String readHDFSFile(String path, Configuration conf) throws IOException{
Path pt=new Path(path);
FileSystem fs = FileSystem.get(pt.toUri(), conf);
FSDataInputStream file = fs.open(pt);
BufferedReader buffIn=new BufferedReader(new InputStreamReader(file)); -
StringBuilder everything = new StringBuilder();
String line;
while( (line = buffIn.readLine()) != null) {
everything.append(line);
everything.append("\n");
}
return everything.toString();
} -
// <<< Don't Change
-
public static class TitleCountMap extends Mapper<Object, Text, Text, IntWritable> {
List<String> stopWords;
String delimiters; -
@Override
protected void setup(Context context) throws IOException,InterruptedException { -
Configuration conf = context.getConfiguration();
-
String stopWordsPath = conf.get("stopwords");
String delimitersPath = conf.get("delimiters"); -
this.stopWords = Arrays.asList(readHDFSFile(stopWordsPath, conf).split("\n"));
this.delimiters = readHDFSFile(delimitersPath, conf);
} -
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer stk = new StringTokenizer(value.toString(),delimiters);
while(stk.hasMoreTokens()){
String e = stk.nextToken().trim().toLowerCase();
if(stopWords.contains(e) == false){
context.write(new Text(e),new IntWritable(1));
}
} -
}
} -
public static class TitleCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for(IntWritable e : values){
sum += e.get();
}
context.write(key, new IntWritable(sum)); -
}
}