注:技术交流可以加我VX:k-loop,昵称:默读者。
需求:
有一个order.txt文件,存放位置(D:\test\order.txt)
里面存放的内容如下。第一列orderId 订单id,第二列orderName 商品名称,第三列price 价格。
1 小米10pro 5999
1 华为mate30 4999
2 味全乳酸菌 4.5
2 优酸乳 10.9
4 大碗宽面 12
2 娃哈哈 2.5
3 小浣熊干脆面 0.9
4 康师傅香辣牛肉面 23.3
4 今麦郎老坛酸菜牛肉面 22.88
3 小当家干脆面 0.5
4 汤达人酸汤牛肉面 29.9
1 华为p40 6999
1 魅族16s 2999
1,取每个订单里价格最高的一条记录
2,取每个订单里前两条价格最高的记录
3,取每个订单里价格最低的记录
第一步,idea中新建一个maven项目
pom.xml配置如下
<build>
<!--自定义jar包名称-->
<finalName>wordcount</finalName>
<plugins>
<!--tomcat插件-->
<plugin>
<groupId>org.apache.tomcat.maven</groupId>
<artifactId>tomcat7-maven-plugin</artifactId>
<version>2.2</version>
<configuration>
<!--配置端口号-->
<port>8080</port>
<!--配置项目路径-->
<path>/day28_hdfs</path>
<!--编码-->
<uriEncoding>UTF-8</uriEncoding>
<useBodyEncodingForURI>true</useBodyEncodingForURI>
</configuration>
</plugin>
<!-- java编译插件 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<!--maven 打jar 包插件配置-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<!--存档-->
<archive>
<!--配置清单-->
<manifest>
<!-- 是否创建ClassPath清单条目-->
<addClasspath>true</addClasspath>
<!-- classpath 前缀 -->
<classpathPrefix>lib/</classpathPrefix>
<!-- 程序主函数入口 -->
<mainClass>com.heima.mr.wordcount.WordCountClient</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<!--junit-->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.4</version>
</dependency>
<!--lombok-->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.16.10</version>
</dependency>
</dependencies>
第二步,编写OrderBean类
package com.heima.test02.order;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class OrderBean implements WritableComparable<OrderBean>{
private long orderId;
private String orderName;
private double price;
public void set(long orderId,String orderName,double price){
this.orderId=orderId;
this.orderName=orderName;
this.price=price;
}
//重写排序
//订单orderId升序排序,price降序排序
@Override
public int compareTo(OrderBean o) {
return this.orderId>o.orderId?1:(this.orderId<o.orderId?-1:this.price>o.price?-1:(this.price<o.price?1:0));
}
//序列化
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(this.orderId);
dataOutput.writeUTF(this.orderName);
dataOutput.writeDouble(this.price);
}
//反序列化
@Override
public void readFields(DataInput dataInput) throws IOException {
this.orderId = dataInput.readLong();
this.orderName = dataInput.readUTF();
this.price = dataInput.readDouble();
}
}
第四步,编写OrderMapper类
package com.heima.test02.order;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class OrderMapper extends Mapper<LongWritable,Text,OrderBean,NullWritable> {
OrderBean k = new OrderBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] strings = value.toString().split(" ");
k.set(Long.parseLong(strings[0]),strings[1],Double.parseDouble(strings[2]));
context.write(k,NullWritable.get());
}
}
第五步,编写OrderGroup
package com.heima.test02.order;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class OrderGroup extends WritableComparator {
//无参构造
protected OrderGroup(){
super(OrderBean.class,true);
}
//重写reduce阶段的分组方法,orderId订单id一致的分为一组
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean a1 = (OrderBean)a;
OrderBean b1 = (OrderBean)b;
return a1.getOrderId()==b1.getOrderId()?0:1;
}
}
第六步,编写OrderReducer
package com.heima.test02.order;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class OrderReducer extends Reducer<OrderBean,NullWritable,OrderBean,NullWritable> {
@Override
protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
//1,取每个订单里价格最高的一条记录
//context.write(key,NullWritable.get());
//2,取每个订单里前两条价格最高的记录
/*int nums = 0;
for (NullWritable value : values) {
if(nums<2){
context.write(key, NullWritable.get());
nums++;
}else{
break;
}
}*/
//3,取每个订单里价格最低的记录
for (NullWritable value : values) {
}
context.write(key,NullWritable.get());
}
}
第七步,编写OrderMaster
package com.heima.test02.order;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class OrderMaster {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(OrderMaster.class);
job.setMapperClass(OrderMapper.class);
job.setReducerClass(OrderReducer.class);
job.setMapOutputKeyClass(OrderBean.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
//设置reduce阶段的分组类
job.setGroupingComparatorClass(OrderGroup.class);
FileInputFormat.setInputPaths(job,new Path("D:\\test\\order.txt"));
FileOutputFormat.setOutputPath(job,new Path("D:\\test\\order"));
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}