目录
使用java操作hdfs的api
我们来写几个demo,测试一下api:
package zl.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
public class HDFSApp {
private Configuration configuration = null;
private FileSystem fileSystem = null;
/**
* 配置
*
* @throws IOException
* @throws URISyntaxException
*/
@Before
public void config() throws IOException, URISyntaxException {
System.out.println("hdfs config before");
configuration = new Configuration();
//这里可以设置副本系数
configuration.set("dfs.replication", "1");
//这里针对外网链接测试,设置datanode的hostname通信
configuration.set("dfs.client.use.datanode.hostname", "true");
//端口号在 /hadoop/etc/hadoop/core-site.xml中配置
URI uri = new URI("hdfs://47.96.94.82:9000");
fileSystem = FileSystem.get(uri, configuration);
}
@Test
public void mkdir() throws IOException {
Path path = new Path("/hdfsapi/test");
boolean result = fileSystem.mkdirs(path);
System.out.println(result);
}
/**
* 删除文件
*
* @throws IOException
*/
@Test
public void removeFile() throws IOException {
Path path = new Path("/hdfsapi");
//递归删除 -r
boolean result = fileSystem.delete(path, true);
System.out.println(result);
}
/**
* 查看文件
*
* @throws IOException
*/
@Test
public void textFile() throws IOException {
Path path = new Path("/hdfsapi/a.txt");
FSDataInputStream fsDataInputStream = fileSystem.open(path);
IOUtils.copyBytes(fsDataInputStream, System.out, 1024);
}
/**
* 写入文件
*
* @throws Exception
*/
@Test
public void writeFile() throws Exception {
Path path = new Path("/hdfsapi/a.txt");
FSDataOutputStream out = fileSystem.create(path);
out.writeUTF("Hello wahaha ");
out.flush();
out.close();
}
/**
* 重命名
*
* @throws Exception
*/
@Test
public void renameFile() throws Exception {
Path oldPath = new Path("/hdfsapi/a.txt");
Path newPath = new Path("/hdfsapi/rename.txt");
boolean result = fileSystem.rename(oldPath, newPath);
System.out.println(result);
}
/**
* 上传本地文件到hdfs,带进度条
*
* @throws Exception
*/
@Test
public void copyLocalFileToHadoop() throws Exception {
//copy本地一个文件到hdfs,简单的
// Path oldPath = new Path("/etc/demo/a.txt");
// Path newPath = new Path("/hdfsapi/rename.txt");
// fileSystem.copyFromLocalFile(oldPath, newPath);
// 下面上传文件有进度条
final int[] i = {0};
FSDataOutputStream out = fileSystem.create(new Path("/hdfsapi/wuxi-app.zip"), new Progressable() {
public void progress() {
i[0]++;
System.out.println("上传中: " + i[0]);
}
});
InputStream in = new BufferedInputStream(new FileInputStream(new File("/Users/fuck/git/wuxi-app.zip")));
IOUtils.copyBytes(in, out, 4096);
}
@After
public void hdfsDown() {
configuration = null;
fileSystem = null;
}
}
博主用的是阿里云,而博主一直在本地idea上进行单元测试,所以这里可能在读文件的时候报错:Could not obtain block,或者在写文件的时候报:could only be written to 0 of the 1 minReplication nodes
特别注意下面这行配置,因为博主环境搭建在阿里云,通信使用阿里云内网ip,而博主现在使用本地的idea跑测试,所以这样的情况下,是无法跑通的,在读取和写入文件,会各种报错(这里坑了博主半天的事件排错)。最终博主发现,在写入或者读取的时候,namenode返回的是datanode的内网ip,你当然无法使用内网ip读取datanode的数据,所以加上下面这行代码,并且你要在你的client端(博主是mac电脑)hosts中配置节点信息,放上外网ip映射,这样,就可以跑通了(在生产环境当然不可能有外网ip啦),这里千万注意,本行配置虽然说的是hostname,但是你在本地hosts里面不是真的配置各个机器的hostname,而是你hadoop集群相互通信的域名,比如core-site.xml里面配置的"master",又比如workers里面配置的master、node1、node2(在hosts里面也配置了这三个相对应的ip),参考博主之前的搭建环境的文章。 这里博主在本地hosts中设置为:
ip master
ip node1
ip node2
configuration.set("dfs.client.use.datanode.hostname", "true");
hdfs写入数据的流程
上面讲到外网链接hadoop会报错,这一个坑博主在网上找到了不少这样的踩坑记录,大部分都是云上搭建hadoop集群,然后本地链接上去,调用namenode,还可以实现(安全组把端口开出去),但是通过namenode来和datanode交互就变得及其困难,因为集群内,NN与DN都是内网ip通信,我们先来看看一下写文件的过程:
client =>namenode=>client(block请求) =>namenode =>client(得知可写的block) =>datanode => datanode(s) =>client =>datanode
1.首先客户端请求namenode确立通信机制,然后NM返回给客户端,ok,没问题
2.客户端向NM发出写文件请求,NM获取Block以及需要写入的DN节点信息,返回给客户端
3.客户端找到相应的DN,建立通信机制(根据NN返回的DN信息,返回的是DN的内网IP,所以客户端如果在外网,通过内网IP去找DN,肯定是凉凉斯密达),然后返回给客户端,说OJBK了,来把。
4.客户端传输数据
So,各位老爷们,一定记住啦。外网通信到内网,记得使用hadoop集群通信所配置的域名,如果做测试,加入(没必要在配置文件,在代码里临时加一下):
configuration.set("dfs.client.use.datanode.hostname", "true");
然后你本地的hosts里面加入域名所对应的ip映射关系即可(如果还不明白,就直接加你hadoop服务器上hosts里面配置的域名与ip映射关系)
利用hdfs来实现词频统计
我们来写一段比较原始的,使用hdfs的api来统计wordcount(这里硬干,就不做任何封装了):
这里有个文件wordcount,内容是:
test hadoop
hello hadoop
hi hdfs
hadoop hdfs
test yarn
hi yarn
我们开始统计每个单词出现的次数:
package codemperor.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
/**
* hdfs wordcount
* 统计文件中的wordcount,然后结果输出到HDFS
*/
public class HDFSWordCount {
public static void main(String[] args) throws Exception {
/**********配置********/
Configuration configuration = new Configuration();
configuration = new Configuration();
//这里可以设置副本系数
configuration.set("dfs.replication", "1");
//这里针对外网链接测试,设置datanode的hostname通信
configuration.set("dfs.client.use.datanode.hostname", "true");
URI uri = new URI("hdfs://47.96.94.82:9000");
FileSystem fs = FileSystem.get(uri, configuration);
/**********缓存********/
Map<String, Integer> map = new HashMap<>();
/**********读文件********/
Path input = new Path("/hdfsapi/wordcount");
RemoteIterator<LocatedFileStatus> ri = fs.listFiles(input, true);
while (ri.hasNext()) {
LocatedFileStatus lf = ri.next();
FSDataInputStream fsDataInputStream = fs.open(lf.getPath());
BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream));
String line = "";
while ((line = reader.readLine()) != null) {
String[] row = line.split(" ");
for (String v : row) {
System.out.println(row[0] + ":" + row[1]);
Integer word = map.get(v);
if (word == null) {
map.put(v, 1);
} else {
word++;
map.put(v, word);
}
}
}
reader.close();
fsDataInputStream.close();
}
// System.out.println(map);
/**********输出结果********/
Path output = new Path("/hdfsapi/output/wordcount_result");
FSDataOutputStream out = fs.create(output);
for (Object key : map.keySet()) {
out.writeUTF(key.toString() + ":" + map.get(key) + "\n");
}
out.close();
System.out.println("统计成功");
/**********读取我们的结果文件看看********/
FSDataInputStream fsDataInputStream = fs.open(new Path("/hdfsapi/output/wordcount_result"));
IOUtils.copyBytes(fsDataInputStream, System.out, 1024);
fs.close();
}
}
使用MapReduce
使用MR来做wordcount
首先我们来了解一下数据到map,然后在到reduce的过程(我们以wordcount为例):
1.提交数据后,数据经过splitting进行切分,切分成多个数据块(当然是分布式分散在不同节点啦)
2.数据块提交mapping,做KV处理,然后继续下一步
3.Mapping的数据经过Shuffling,将所有相同的key的数据放在同一个块上
4.Reducing将所有相同的key的数据进行整合计算,最终得到结果
那么可以简化为:
现在有hello、hello、hello、world、world、codemperor,经过map之后变为:
(hello,1)(hello,1)(hello,1)(world,1)(world,1)(codemperor,1)
map将数据输出到reduce端,按照相同的key,分发到同一个reduce上,数据变为:
reduce1:(hello,1) (hello,1) (hello,1) => (hello,[1,1,1])
reduce1:(world,1) (world,1) => (world,[1,1])
reduce1:(codemperor,1) => (world,[1])
然后计算后得到最终结果
我们先创建:WordCountMapper:
package codemperor.mapreduce.wc;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* 使用MR来实现wordcount
* KEYIN: 每行数据的偏移量,比如第一行数据共20个字符,那么第一行偏移量是0,第二行偏移量是20
* VALUEIN:正行数据
* KEYOUT: 数据输出的key,比如词频统计,输出的key就是某个单词
* VALUEOUT:输出的value
* <p>
* Mapper后面4个类型不要跟java的类型,而是跟hadoop自带的可序列化以及反序列化的基础类型
*/
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] values = value.toString().split(" ");
for (String word : values) {
context.write(new Text(word), new IntWritable(1));
}
}
}
然后创建:WordCountReduce:
package codemperor.mapreduce.wc;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
/**
* wordcount rd
*/
public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
Iterator<IntWritable> iterator = values.iterator();
int count = 0;
while (iterator.hasNext()) {
IntWritable value = iterator.next();
count += value.get();
}
context.write(key, new IntWritable(count));
}
}
最后创建WordCountDriver来执行map和reduce:
package codemperor.mapreduce.wc;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 执行map和reduce
*/
@Slf4j
public class WordCountDriver {
public static void main(String[] args) throws Exception {
String HDFSURI = "hdfs://你的IP:9000";
/**********配置********/
Configuration configuration = new Configuration();
//这里针对外网链接测试,设置datanode的hostname通信
configuration.set("dfs.client.use.datanode.hostname", "true");
//编写作业
Job job = Job.getInstance(configuration, "WordCount");
log.info("asdasd");
job.setJarByClass(WordCountDriver.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReduce.class);
//设置map的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置reduce的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//作业输入和输出的参数
FileInputFormat.setInputPaths(job, new Path(HDFSURI + "/hdfsapi/wordcount")); //这里注意直接使用hdfs协议全路径
FileOutputFormat.setOutputPath(job, new Path(HDFSURI + "/wordcount/output"));//这里注意直接使用hdfs协议全路径
boolean result = job.waitForCompletion(true);
System.out.println(result);
}
}
最终查看一下文件结果:
结果是:
hadoop 3
hdfs 2
hello 1
hi 2
test 2
yarn 2
使用MR进行本地计算
根据上面代码可以得知,我们用java提交了一个作业,发给hadoop,hadoop根据作业指令开始计算,那么MapReduce必须要链接hdfs进行计算吗(Spark瞬间教你做人),当然不是,我们来稍微改一下代码,让它进行本地文件计算(当然是你机器的单节点), 我们将WordCountDriver里面的:
//作业输入和输出的参数
FileInputFormat.setInputPaths(job, new Path(HDFSURI + "/hdfsapi/wordcount")); //这里注意直接使用hdfs协议全路径
FileOutputFormat.setOutputPath(job, new Path(HDFSURI + "/wordcount/output"));//这里注意直接使用hdfs协议全路径
修改为
//下面是可以本地执行mr计算本地文件
FileInputFormat.setInputPaths(job, new Path("hdfs-demo/input/wordcount"));
FileOutputFormat.setOutputPath(job, new Path("hdfs-demo/output/result"));
再来执行一下,发现mr计算的是你本地的文件,然后输出在本地的文件夹内。
使用MR来统计日志
我们先来看一段日志样例:
我们来统计一个简单的:每天请求的成功次数、失败次数
首先,先定义我们自定义bean(这里不使用自带的基本类型):
这里需要注意:
我们类里面使用类三个int字段,因为是计算默认值,我们来赋给一个初始数据0,博主因为没有初始值,而在mapper里面也没有给所有字段初始化,导致reduce不执行,并且没有日志,所以提醒大家一点
package codemperor.mapreduce.accesslog.dailycount;
import lombok.Getter;
import lombok.Setter;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* 计算每日请求成功量与失败量
*/
@Getter
@Setter
public class DailyCountEntity implements Writable {
/**
* 成功次数
*/
private Integer success = 0;
/**
* 失败次数
*/
private Integer failed = 0;
/**
* 解析失败
*/
private Integer undefined = 0;
public DailyCountEntity() {
}
/**
* write和readFields执行,必须有值,因为是流的写入写出
* @param out
* @throws IOException
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(success);
out.writeInt(failed);
out.writeInt(undefined);
}
/**
* 读的顺序必须和上面写的顺序一致
*
* @param in
* @throws IOException
*/
@Override
public void readFields(DataInput in) throws IOException {
this.success = in.readInt();
this.failed = in.readInt();
this.undefined = in.readInt();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("{");
sb.append("\"success\":")
.append(success);
sb.append(",\"failed\":")
.append(failed);
sb.append(",\"undefined\":")
.append(undefined);
sb.append('}');
return sb.toString();
}
}
继续上我们自定义mapper:
package codemperor.mapreduce.accesslog.dailycount;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;
public class DailyCountMapper extends Mapper<LongWritable, Text, Text, DailyCountEntity> {
public Logger log = Logger.getLogger(DailyCountDriver.class);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//27.19.74.143 - - [30/May/2013:17:38:20 +0800] "GET /static/image/common/faq.gif HTTP/1.1" 200 1127
String val = value.toString();
log.info("[map] value: " + val);
String reduceKey = dateFormat(val);
String methodCode = getResult(val).trim();
DailyCountEntity dailyCountEntity = new DailyCountEntity();
if ("0".equals(methodCode)) {
dailyCountEntity.setUndefined(1);
} else if ("200".equals(methodCode)) {
dailyCountEntity.setSuccess(1);
} else {
dailyCountEntity.setFailed(1);
}
context.write(new Text(reduceKey), dailyCountEntity);
}
public String dateFormat(String val) {
String date = val.substring(val.indexOf("[") + 1, val.indexOf("]"));
SimpleDateFormat in = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss ZZZZZ", Locale.US);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:00:00");
try {
String result = sdf.format(in.parse(date));
log.info("[dateFormat] date: " + result);
return result;
} catch (ParseException e) {
log.error("", e);
}
return val;
}
/**
* 获取方法调用的code值
*
* @param val
* @return
*/
public String getResult(String val) {
// 27.19.74.143 - - [30/May/2013:17:38:20 +0800] "GET /static/image/common/faq.gif HTTP/1.1" 200 1127
//200前面有个空格
try {
val = val.substring(val.lastIndexOf("\"") + 2);
String[] valArr = val.split(" ");
log.info("[getResult] method code: " + valArr[0]);
return valArr[0];
} catch (Exception e) {
log.error("", e);
}
return "0";
}
}
然后是我们自定义的reduce:
package codemperor.mapreduce.accesslog.dailycount;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class DailyCountReduce extends Reducer<Text, DailyCountEntity, Text, DailyCountEntity> {
@Override
protected void reduce(Text key, Iterable<DailyCountEntity> values, Context context) throws IOException, InterruptedException {
System.out.println("begin reduce");
int success = 0;
int failed = 0;
int undefined = 0;
for (DailyCountEntity dailyCountEntity : values) {
success += dailyCountEntity.getSuccess();
failed += dailyCountEntity.getFailed();
undefined += dailyCountEntity.getUndefined();
}
DailyCountEntity dailyCountEntity = new DailyCountEntity();
dailyCountEntity.setFailed(failed);
dailyCountEntity.setSuccess(success);
dailyCountEntity.setUndefined(undefined);
context.write(key, dailyCountEntity);
}
}
最终我们编写自定义driver,来启动这次作业(这里博主就在本地执行了,如果需要走服务器,请参考上面的demo):
package codemperor.mapreduce.accesslog.dailycount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
/**
* 执行map和reduce
*/
public class DailyCountDriver {
public static Logger log = Logger.getLogger(DailyCountDriver.class);
public static void main(String[] args) throws Exception {
/**********配置********/
Configuration configuration = new Configuration();
log.info("daily count go go go");
//编写作业
Job job = Job.getInstance(configuration, "daily count");
job.setJarByClass(DailyCountDriver.class);
job.setMapperClass(DailyCountMapper.class);
job.setReducerClass(DailyCountReduce.class);
//设置map的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DailyCountEntity.class);
//设置reduce的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DailyCountEntity.class);
//下面是可以本地执行mr计算本地文件
FileInputFormat.setInputPaths(job, new Path("hdfs-demo/src/main/java/codemperor/mapreduce/accesslog/access_2013_05_30.log"));
FileOutputFormat.setOutputPath(job, new Path("hdfs-demo/output/daily_count_result"));
boolean result = job.waitForCompletion(true);
System.out.println(result);
}
}
执行之后,我们可以看到最终结果:
扩展之使用NullWritable来忽略key
上面的结果看的好不爽啊,想要存到数据库,可能又要解析了,有没有一种办法,输出的结果直接就是一个json,这样读取之后解析炒鸡方便,当然可以啦,我们在DailyCountReduce中,稍微修改一下输出:
将reduce输出改为NullWritable
context.write(NullWritable.get(), dailyCountEntity);
然后修改一下我们的bean:
/**
*
*/
private String date = "";
/**
* 成功次数
*/
private Integer success = 0;
/**
* 失败次数
*/
private Integer failed = 0;
/**
* 解析失败
*/
private Integer undefined = 0;
==记得readFields和write也要加哦。==
然后在跑一次,看到结果:
ok斯密达~
扩展之设置分区来写入不同文件
比如现在有个需求,要求我们成功调用的输出一个文件,失败调用的输出一个文件,剩下的输出到其他文件。那么如何做呢? 这就需要我们来手动设置分区啦
创建一个DailyCountPartitioner
package codemperor.mapreduce.accesslog.dailycount;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* 指定map之后数据key的切分规则,Partitioner参数是map的输出
*/
public class DailyCountPartitioner extends Partitioner<Text, DailyCountEntity> {
/**
* @param text
* @param dailyCountEntity
* @param numPartitions 作业的指定的reducer个数,决定了reduce输出文件的个数
* @return
*/
@Override
public int getPartition(Text text, DailyCountEntity dailyCountEntity, int numPartitions) {
if (dailyCountEntity.getFailed() == 1) {
//失败的一个文件
return 0;
} else if (dailyCountEntity.getSuccess() == 1) {
//成功的一个文件
return 1;
} else {
//剩下的全部放在另一个文件中
return 2;
}
}
}
然后在我们的DailyCountDriver中加入配置:
//设置分区和分区数量
job.setPartitionerClass(DailyCountPartitioner.class);
job.setNumReduceTasks(3);
DailyCountDriver代码如下:
package codemperor.mapreduce.accesslog.dailycount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
/**
* 执行map和reduce
*/
public class DailyCountDriver {
public static Logger log = Logger.getLogger(DailyCountDriver.class);
public static void main(String[] args) throws Exception {
/**********配置********/
Configuration configuration = new Configuration();
log.info("daily count go go go");
//编写作业
Job job = Job.getInstance(configuration, "daily count");
job.setJarByClass(DailyCountDriver.class);
job.setMapperClass(DailyCountMapper.class);
job.setReducerClass(DailyCountReduce.class);
//设置分区和分区数量
job.setPartitionerClass(DailyCountPartitioner.class);
job.setNumReduceTasks(3);
//设置map的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DailyCountEntity.class);
//设置reduce的输出类型
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(DailyCountEntity.class);
//下面是可以本地执行mr计算本地文件
FileInputFormat.setInputPaths(job, new Path("hdfs-demo/src/main/java/codemperor/mapreduce/accesslog/access_2013_05_31.log"));
FileOutputFormat.setOutputPath(job, new Path("hdfs-demo/output/daily_count_result"));
boolean result = job.waitForCompletion(true);
System.out.println(result);
}
}
我们手动设置了3个分区,然后最终运行之后,我们可以看到有三分文件输出:
里面就是最终结果啦~~啦啦啦~~~
附上代码仓库
所有相关例子都在里面:https://gitee.com/_madi/hadoop-demo.git