part reducejoin wc
maven导入的包
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
</dependencies>
创建方法 如果存在删除全部文件
package com.myh.common;
import org.apache.commons.io.FileUtils;
import java.io.File;
import java.io.IOException;
//查看输出目录是否存在存在就删除
public class DelLocalDriectory {
public static void del(String path) throws IOException {
try {
File file = new File(path);
if (file.exists()) {
FileUtils.forceDelete(file);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
第一种最简单的wordcount 一段文字 根据" " 统计string 以及出现次数
遇到的问题少一个斜杠 会 抱奇怪的错误
无法输出
main方法
package com.myh.wc;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WcDemo {
public static void main(String[] args) throws Exception {
Configuration cfg = new Configuration();
// cfg.set("mapreduce.framework.name","yarn");
//准备1个空任务
Job job = Job.getInstance(cfg);
//任务的入口类
job.setJarByClass(WcDemo.class);
//给任务起名字
job.setJobName("wc");
//设置输入文件路径
FileInputFormat.addInputPath(job, new Path("file:///e:/word.txt"));
//设置输出文件路径
FileOutputFormat.setOutputPath(job, new Path("file:///e:/res1"));
//将你的mapper代码嵌入到整个mapreduce框架流程中
job.setMapperClass(WcMapper.class);
//设置mapper文件输出的信息类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//将你的mapper代码嵌入到整个mapreduce框架流程中
job.setReducerClass(WcReduce.class);
//设置reduce文件输出的信息类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//启动整个流程
job.waitForCompletion(true);
}
}
package com.myh.wc;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* 分片的數據處理
* Mapper<LongWritable, Text,Text, IntWritable>
* 前兩個是輸入 local hdfs文件 ---》1.行號 2.每行的字符串
* 後兩個是輸出 輸出到環狀緩衝區 1.key 2.value
*/
public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
IntWritable one = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//將一行字符串進行分割
String[] words = value.toString().split(" ");
//循環將所有的單詞和個數寫入環狀緩衝區
for (String word : words) {
context.write(new Text(word),one);
}
}
}
package com.myh.wc;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @Description:
* @author:myh
* @date: 2021/9/16 0:39
*/
public class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
//计算这一组中有多少个1
int num = 0;
for (IntWritable i : values) {
num += i.get();
}
//向你的输出文件进行输出
context.write(key, new IntWritable(num));
}
}
使用的word.txt内容
i hadoop
sasa dsa
i haddoop
i haddoop
i haddoop
i haddoopi haddoopi
i haddoop
i haddoop
i haddoop
i haddoop
i haddoopi haddoopi
i haddoopi haddoop
i haddoop
i haddoop
i haddoopi haddoopi
i haddoopi haddoop
i haddoop
i haddoop
i haddoopi haddoopi
i haddoop
结果输出
第二种 统计不同的人 id加操作名 的统计
package com.njbdqn.actcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class AcDemo {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(AcDemo.class);
job.setJobName("ac");
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.100.195:9000/logs/202001"));
FileOutputFormat.setOutputPath(job, new Path("file:///e:/res1"));
job.setMapperClass(AcMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(UserAction.class);
job.setReducerClass(AcReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);
}
}
package com.njbdqn.actcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AcMapper extends Mapper<LongWritable, Text,Text,UserAction> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//读取行数据 amd,16G,2.7GHz,2020-01-02#00:06:24,COLLECT,400283,goods.html,228778,FIREFOX,90
String[] infos = value.toString().split(",");
//准备输出键
String outKey = infos[5]+infos[4];
//准备输出值
UserAction ua = new UserAction(outKey,1);
//将数据传给环形缓冲区
context.write(new Text(outKey),ua);
}
}
package com.njbdqn.actcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class AcReduce extends Reducer<Text,UserAction,Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<UserAction> values, Context context) throws IOException, InterruptedException {
//安装用户id和用户acttype已经分好的组 进行动作统计400CLICK 400COLLECT
int num=0;
for (UserAction ua:values) {
num+=ua.getCnt();
}
context.write(key,new IntWritable(num));
}
}
package com.njbdqn.actcount;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* 自定义输入输出类型
*/
public class UserAction implements WritableComparable<UserAction> {
private String useridAndAct;
private int cnt;
public UserAction() {
}
public UserAction(String useridAndAct, int cnt) {
this.useridAndAct = useridAndAct;
this.cnt = cnt;
}
@Override
public int compareTo(UserAction o) {
return 0;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(useridAndAct);
out.writeInt(cnt);
}
@Override
public void readFields(DataInput in) throws IOException {
this.useridAndAct = in.readUTF();
this.cnt = in.readInt();
}
public String getUseridAndAct() {
return useridAndAct;
}
public void setUseridAndAct(String useridAndAct) {
this.useridAndAct = useridAndAct;
}
public int getCnt() {
return cnt;
}
public void setCnt(int cnt) {
this.cnt = cnt;
}
}
结果
第三种 横向显示结果
package com.myh.actcount2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class Ac2Demo {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(Ac2Demo.class);
job.setJobName("ac2");
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.100.195:9000/logs/202001"));
FileOutputFormat.setOutputPath(job, new Path("file:///e:/res2"));
job.setMapperClass(Ac2Mapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(UserAction.class);
job.setReducerClass(Ac2Reduce.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(UserAction.class);
job.waitForCompletion(true);
}
}
package com.myh.actcount2;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @Description:
* @author:myh
* @date: 2021/9/17 0:15
*/
public class Ac2Mapper extends Mapper<LongWritable, Text, Text, UserAction> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//读取行数据 amd,16G,2.7GHz,2020-01-02#00:06:24,COLLECT,400283,goods.html,228778,FIREFOX,90
String[] infos = value.toString().split(",");
UserAction ua = new UserAction();
ua.setUserid(infos[5]);
switch (infos[4]) {
case "BROWSE":
ua.setBrowse(1);
break;
case "CLICK":
ua.setClick(1);
break;
case "COLLECT":
ua.setCollect(1);
break;
default:
ua.setBuy(1);
}
context.write(new Text(infos[5]),ua);
}
}
package com.myh.actcount2;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @Description:
* @author:myh
* @date: 2021/9/17 0:29
*/
public class Ac2Reduce extends Reducer<Text, UserAction, NullWritable, UserAction> {
@Override
protected void reduce(Text key, Iterable<UserAction> values, Context context) throws IOException, InterruptedException {
//把所有同组的UserAction的四个值加起来
int browse = 0, click = 0, buy = 0, collect = 0;
for (UserAction ua : values) {
browse += ua.getBrowse();
click += ua.getClick();
buy += ua.getBuy();
collect += ua.getCollect();
}
UserAction out = new UserAction(key.toString(), browse, click, buy, collect);
context.write(NullWritable.get(), out);
}
}
package com.myh.actcount2;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @Description:
* @author:myh
* @date: 2021/9/17 0:13
*/
public class UserAction implements WritableComparable<UserAction> {
private String userid;
private int browse;
private int click;
private int collect;
private int buy;
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public int getBrowse() {
return browse;
}
public void setBrowse(int browse) {
this.browse = browse;
}
public int getClick() {
return click;
}
public void setClick(int click) {
this.click = click;
}
public int getCollect() {
return collect;
}
public void setCollect(int collect) {
this.collect = collect;
}
public int getBuy() {
return buy;
}
public void setBuy(int buy) {
this.buy = buy;
}
public UserAction(String userid, int browse, int click, int collect, int buy) {
this.userid = userid;
this.browse = browse;
this.click = click;
this.collect = collect;
this.buy = buy;
}
public UserAction() {
}
@Override
public int compareTo(UserAction o) {
return 0;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(userid);
out.writeInt(browse);
out.writeInt(click);
out.writeInt(collect);
out.writeInt(buy);
}
@Override
public void readFields(DataInput in) throws IOException {
this.userid = in.readUTF();
this.browse = in.readInt();
this.click = in.readInt();
this.collect = in.readInt();
this.buy = in.readInt();
}
@Override
public String toString() {
return userid+","+browse+","+click+","+collect+","+buy;
}
}
结果
第三种mapjoin
package com.myh.mapjoin;
import com.myh.common.DelLocalDriectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class UoMapDemo {
public static void main(String[] args) throws Exception {
DelLocalDriectory.del("e:/res4");
Job job = Job.getInstance(new Configuration());
job.setJarByClass(UoMapDemo.class);
job.setJobName("uo");
FileInputFormat.addInputPath(job, new Path("e:/data"));//order.txt
FileOutputFormat.setOutputPath(job, new Path("e:/res4"));
//将user.txt小文件 发送到每个Mapper节点上 不超过25M
job.addCacheFile(new Path("e:/users.txt").toUri());
job.setMapperClass(UoMapMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(UserOrder.class);
job.setReducerClass(UoMapReduce.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(UserOrder.class);
job.setNumReduceTasks(2);
job.waitForCompletion(true);
}
}
package com.myh.mapjoin;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
/**
* @Description:
* @author:myh
* @date: 2021/9/17 1:12
*/
public class UoMapMapper extends Mapper<LongWritable, Text, Text, UserOrder> {
Map<String, String> users = new HashMap<>();
//只会在mapper节点运行task任务时 执行一次 后面就再也不执行了 有点类似于类中的静态方法
@Override
protected void setup(Context context) throws IOException, InterruptedException {
//获取缓存文件
URI[] files = context.getCacheFiles();
if (null != files && files.length > 0) {
//获取缓存文件的路径
String path = files[0].getPath();
//利用BufferedReader 读缓存文件 并把文件内容分割后 userid username 存放到users中
BufferedReader br = new BufferedReader(new FileReader(path));
String line = "";
while ((line = br.readLine()) != null) {//1,zhangsan,40
String[] us = line.split(",");
users.put(us[0], us[1]);//{1=>zhangsan,2=>lisi,3=>wangwu}
}
br.close();
}
}
//每读一条语句 都会执行一次
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//将对应的order信息和users中的信息对比生成你要的UserOrder
String[] split = value.toString().split(",");//1,1,50
UserOrder uo = new UserOrder();
uo.setUserid(split[1]);
uo.setMoney(Integer.parseInt(split[2]));
uo.setUsername(users.get(split[1]));
context.write(new Text(split[1]), uo);//{userid:1,username:zs,money:50}
}
}
package com.myh.mapjoin;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class UoMapReduce extends Reducer<Text, UserOrder, NullWritable, UserOrder> {
@Override
protected void reduce(Text key, Iterable<UserOrder> values, Context context) throws IOException, InterruptedException {
int count = 0;
UserOrder uoout = null;
for (UserOrder uo : values) {
count += uo.getMoney();
if (uoout == null) {
uoout = uo;
}
}
uoout.setMoney(count);
context.write(NullWritable.get(), uoout);
}
}
package com.myh.mapjoin;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @Description:
* @author:myh
* @date: 2021/9/17 1:09
*/
public class UserOrder implements WritableComparable<UserOrder> {
private String userid;
private String username;
private int money;
public UserOrder() {
}
public UserOrder(String userid, String username, int money) {
this.userid = userid;
this.username = username;
this.money = money;
}
@Override
public int compareTo(UserOrder o) {
return 0;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(userid);
dataOutput.writeUTF(username);
dataOutput.writeInt(money);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.userid= dataInput.readUTF();
this.username=dataInput.readUTF();
this.money=dataInput.readInt();
}
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public int getMoney() {
return money;
}
public void setMoney(int money) {
this.money = money;
}
@Override
public String toString() {
return String.format("%s,%s,%d",userid,username,money);
}
}
使用文件
结果
第四种mpcombiner
代码
package com.njbdqn.mpcombiner;
import com.njbdqn.common.DelLocalDriectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class UoMapDemo {
public static void main(String[] args) throws Exception{
DelLocalDriectory.del("e:/res6");
Job job = Job.getInstance(new Configuration());
job.setJarByClass(UoMapDemo.class);
job.setJobName("uo1");
FileInputFormat.addInputPath(job,new Path("e:/data"));//order.txt
FileOutputFormat.setOutputPath(job,new Path("e:/res6"));
//将user.txt小文件 发送到每个Mapper节点上 不超过25M
job.addCacheFile(new Path("e:/users.txt").toUri());
job.setMapperClass(UoMapMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(UserOrder1.class);
// job.setReducerClass(UoMapReduce.class);
// job.setOutputKeyClass(NullWritable.class);
// job.setOutputValueClass(UserOrder.class);
// 在每个mapper节点上聚合一次
// 后期你再用mapreduce统计数据时就少很多网络传输数据
job.setCombinerClass(UoMapReduce.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(UserOrder1.class);
//设置你有几个reduce节点并发工作 默认情况下是1
job.setNumReduceTasks(2);
job.waitForCompletion(true);
}
}
package com.njbdqn.mpcombiner;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
public class UoMapMapper extends Mapper<LongWritable, Text,Text, UserOrder1> {
Map<String,String> users = new HashMap<String,String>();
//只会在mapper节点运行task任务时 执行一次 后面就再也不执行了 有点类似于类中的静态方法
@Override
protected void setup(Context context) throws IOException, InterruptedException {
//获取缓存文件
URI[] files = context.getCacheFiles();
if(null!=files && files.length>0 ) {
//获取缓存文件的路径
String path = files[0].getPath();
//利用BufferedReader 读缓存文件 并把文件内容分割后 userid username 存放到users中
BufferedReader br = new BufferedReader(new FileReader(path));
String line="";
while((line=br.readLine())!=null){//1,zhangsan,40
String[] us = line.split(",");
users.put(us[0],us[1]);//{1=>zhangsan,2=>lisi,3=>wangwu}
}
br.close();
}
}
//每读一条语句 都会执行一次
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//将对应的order信息和users中的信息对比生成你要的UserOrder
String[] split = value.toString().split(",");//1,1,50
UserOrder1 uo = new UserOrder1();
uo.setUserid(split[1]);
uo.setMoney(Integer.parseInt(split[2]));
uo.setUsername(users.get(split[1]));
context.write(new Text(split[1]),uo);//{userid:1,username:zs,money:50}
}
}
package com.njbdqn.mpcombiner;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
//拿reduce当combiner
public class UoMapReduce extends Reducer<Text, UserOrder1, Text, UserOrder1> {
@Override
protected void reduce(Text key, Iterable<UserOrder1> values, Context context) throws IOException, InterruptedException {
int count=0;
UserOrder1 uoout = new UserOrder1();
for (Iterator<UserOrder1> it = values.iterator(); it.hasNext();){
UserOrder1 uo = it.next();
count+=uo.getMoney();
if (uoout.getUserid()==null) {
uoout.setUserid(uo.getUserid());
uoout.setUsername(uo.getUsername());
}
}
uoout.setMoney(count);
System.out.println(key.toString()+"=========>"+uoout);
context.write(key,uoout);
}
}
package com.njbdqn.mpcombiner;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class UserOrder1 implements WritableComparable<UserOrder1> {
private String userid;
private String username;
private int money;
public UserOrder1() {
}
public UserOrder1(String userid, String username, int money) {
this.userid = userid;
this.username = username;
this.money = money;
}
@Override
public int compareTo(UserOrder1 o) {
return 0;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(userid);
dataOutput.writeUTF(username);
dataOutput.writeInt(money);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.userid = dataInput.readUTF();
this.username = dataInput.readUTF();
this.money = dataInput.readInt();
}
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public int getMoney() {
return money;
}
public void setMoney(int money) {
this.money = money;
}
@Override
public String toString() {
return String.format("%s,%s,%d",userid,username,money);
}
}
效果
第五种partition part
代码
package com.myh.part;
import com.myh.common.DelLocalDriectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class OrderDemo {
public static void main(String[] args) throws Exception {
DelLocalDriectory.del("e:/res7");
Job job = Job.getInstance(new Configuration());
job.setJarByClass(OrderDemo.class);
job.setJobName("partition");
FileInputFormat.addInputPath(job, new Path("e:/data"));
FileOutputFormat.setOutputPath(job, new Path("e:/res7"));
job.setMapperClass(OrderMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Orders.class);
job.setPartitionerClass(OrderPartitioner.class);
job.setNumReduceTasks(2);
job.waitForCompletion(true);
}
}
package com.myh.part;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class OrderMapper extends Mapper<LongWritable, Text, Text, Orders> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] spt = value.toString().split(",");
Orders od = new Orders(spt[1], Integer.parseInt(spt[2]));
context.write(new Text(spt[1]), od);
}
}
package com.myh.part;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* @Description:
* @author:myh
* @date: 2021/9/17 1:56
*/
public class OrderPartitioner extends Partitioner<Text, Orders> {
@Override
public int getPartition(Text text, Orders orders, int i) {
int num = Integer.parseInt(text.toString());
if (num <= 2) {
return 0;
} else {
return 1;
}
}
}
package com.myh.part;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @Description:
* @author:myh
* @date: 2021/9/17 1:53
*/
public class Orders implements WritableComparable<Orders> {
private String userid;
private int money;
public Orders() {
}
public Orders(String userid, int money) {
this.userid = userid;
this.money = money;
}
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public int getMoney() {
return money;
}
public void setMoney(int money) {
this.money = money;
}
@Override
public String toString() {
return "Orders{" +
"userid='" + userid + '\'' +
", money=" + money +
'}';
}
@Override
public int compareTo(Orders o) {
return 0;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(userid);
dataOutput.writeInt(money);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.userid = dataInput.readUTF();
this.money = dataInput.readInt();
}
}
结果
第六种reducejoin累加统计
代码
package com.myh.reducejoin;
import com.myh.common.DelLocalDriectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @Description:
* @author:myh
* @date: 2021/9/17 2:09
*/
public class UoDemo {
public static void main(String[] args) throws Exception {
DelLocalDriectory.del("e:/res3");
Job job = Job.getInstance(new Configuration());
job.setJarByClass(UoDemo.class);
job.setJobName("uo");
FileInputFormat.addInputPath(job,new Path("e:/data"));
FileOutputFormat.setOutputPath(job,new Path("e:/res3"));
job.setMapperClass(UoMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(UserOrder.class);
job.setReducerClass(UoReduce.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(UserOrder.class);
job.waitForCompletion(true);
}
}
package com.myh.reducejoin;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
public class UoMapper extends Mapper<LongWritable, Text, Text, UserOrder> {
//context 容器上下文 管理Mapper容器和Reduce容器的 管家
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//输入文件不同填充的属性也不一样
//1,zhangsan,40 1,1,50
String[] infos = value.toString().split(",");
//获取文件名
String fileName = ((FileSplit) context.getInputSplit()).getPath().toString();
UserOrder uo = new UserOrder();
//e:/data/users.txt e:/data/order.txt //这个目录下两个文件 这种情况不是两个txt文件放两个地方了 放一起了
if (fileName.indexOf("order.txt") != -1) {
uo.setUserid(infos[1]);
uo.setMoney(Integer.parseInt(infos[2]));
uo.setUsername("");
} else {
uo.setUserid(infos[0]);
uo.setUsername(infos[1]);
}
context.write(new Text(uo.getUserid()), uo);
}
}
package com.myh.reducejoin;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class UoReduce extends Reducer<Text, UserOrder, NullWritable, UserOrder> {
@Override
protected void reduce(Text key, Iterable<UserOrder> values, Context context) throws IOException, InterruptedException {
//把每组钱加起来
String username = "";
int count = 0;
for (UserOrder uo : values) {
count += uo.getMoney();
if (username.equals("") && uo.getUsername() != "") {
username = uo.getUsername();
}
}
UserOrder od = new UserOrder(key.toString(), username, count);
context.write(NullWritable.get(), od);
}
}
package com.myh.reducejoin;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class UserOrder implements WritableComparable<UserOrder> {
private String userid;
private String username;
private int money;
public UserOrder() {
}
public UserOrder(String userid, String username, int money) {
this.userid = userid;
this.username = username;
this.money = money;
}
@Override
public int compareTo(UserOrder o) {
return 0;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(userid);
dataOutput.writeUTF(username);
dataOutput.writeInt(money);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.userid = dataInput.readUTF();
this.username = dataInput.readUTF();
this.money = dataInput.readInt();
}
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public int getMoney() {
return money;
}
public void setMoney(int money) {
this.money = money;
}
@Override
public String toString() {
return String.format("%s,%s,%d",userid,username,money);
}
}
放置文件
结果
累加统计