- 自定义数据类型实现实现接口WritableComparable
- 输入数据格式,第一列为班级号,第二列为姓名,第三列为学号
1 小明 10 3 大明 11
1 小王 12 4 大王 13
1 小张 14 6 大张 15
3.输出数据格式 同一个班级在一起
1 学号:14姓名小张 学号:12姓名小王 学号:10姓名小明
3 学号:11姓名大明
4 学号:13姓名大王
6 学号:15姓名大张
4.定义user类
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
/**
* @author DELL_pc
* @date 2017年3月15日
*/
public class User implements WritableComparable<User>{
int id;
String name;
int classid;
/**
*
*/
public User() {
// TODO Auto-generated constructor stub
}
/**
* @param id
* @param name
* @param classid
*/
public User(int classid,String name,int id ) {
super();
this.id = id;
this.name = name;
this.classid = classid;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getClassid() {
return classid;
}
public void setClassid(int classid) {
this.classid = classid;
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeInt(id);
out.writeUTF(name);
out.writeInt(classid);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
id=in.readInt();
name=in.readUTF();
classid=in.readInt();
}
@Override
public int compareTo(User o) {
// TODO Auto-generated method stub
return 1;
}
@Override
public String toString() {
return "User [id=" + id + ", name=" + name + ", classid=" + classid + "]";
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + classid;
result = prime * result + id;
result = prime * result + ((name == null) ? 0 : name.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
User other = (User) obj;
if (classid != other.classid)
return false;
if (id != other.id)
return false;
if (name == null) {
if (other.name != null)
return false;
} else if (!name.equals(other.name))
return false;
return true;
}
}
5.定义map
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* @author DELL_pc
* @date 2017年3月15日
*/
public class MyMapper extends Mapper<LongWritable, Text, IntWritable, User>{
IntWritable cla=new IntWritable();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, User>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
StringTokenizer str=new StringTokenizer(value.toString(),"\t");
while (str.hasMoreTokens()) {
String con=str.nextToken();
StringTokenizer sTokenizer=new StringTokenizer(con);
while (sTokenizer.hasMoreTokens()) {
int classid=Integer.valueOf(sTokenizer.nextToken());
String name=sTokenizer.nextToken();
int id=Integer.valueOf(sTokenizer.nextToken());
User user=new User(classid, name, id);
cla.set(classid);
context.write(cla, user);
}
}
}
}
6.定义reducer
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* @author DELL_pc
* @date 2017年3月15日
*/
public class MyRedecer extends Reducer<IntWritable, User, IntWritable, Text>{
Text text=new Text();
@Override
protected void reduce(IntWritable key, Iterable<User> user,
Reducer<IntWritable, User, IntWritable, Text>.Context context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
StringBuffer stringBuffer=new StringBuffer();
int classid=0;
IntWritable intWrit=new IntWritable();
for (User user2 : user) {
classid=user2.getClassid();
stringBuffer.append("学号:"+user2.getId()+"姓名"+user2.getName()+"\t");
}
intWrit.set(classid);
text.set(stringBuffer.toString());
context.write(intWrit,text);
}
}
7.定义主函数
/**
* @author DELL_pc
* @date 2017年3月15日
*/
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class UserDemo implements Tool {
private Configuration conf=null;
@Override
public Configuration getConf() {
// TODO Auto-generated method stub
return this.conf;
}
@Override
public void setConf(Configuration that) {
// TODO Auto-generated method stub
this.conf=that;
this.conf.set("fs.defaultFS", "hdfs://192.168.59.130:8020");
}
@Override
public int run(String[] arg0) throws Exception {
// TODO Auto-generated method stub
Configuration conf=this.conf;
Job job=Job.getInstance(conf,"usercount");
//输入
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.addInputPath(job, new Path("/user/lei/wordcount/"));
//map
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(User.class);
//3
//4
job.setReducerClass(MyRedecer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path("/user/lei/wordcount/out"));
return job.waitForCompletion(true) ? 0:1;
}
public static void main(String[] args) throws Exception {
//运行程序
long startdate=System.currentTimeMillis();
ToolRunner.run(new UserDemo(), args);
long enddate=System.currentTimeMillis();
long costdate=enddate-startdate;
System.out.println("花费时间"+costdate+"ms");
}
}
8.计数器结果
INFO - Counters: 38
File System Counters
FILE: Number of bytes read=656
FILE: Number of bytes written=470578
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=162
HDFS: Number of bytes written=144
HDFS: Number of read operations=15
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Map-Reduce Framework
Map input records=3
Map output records=6
Map output bytes=120
Map output materialized bytes=138
Input split bytes=116
Combine input records=0
Combine output records=0
Reduce input groups=4
Reduce shuffle bytes=138
Reduce input records=6
Reduce output records=4
Spilled Records=12
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=45
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=473956352
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=81
File Output Format Counters
Bytes Written=144