目录:
一.四个Java项目代码:
1.FriendMapper.java代码:
package com.aliyun.odps.friend;
import java.io.IOException;
import java.util.Arrays;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.mapred.MapperBase;
//好友推荐的Map阶段,FriendMapper类继承MapperBase类
public class FriendMapper extends MapperBase {
private Record key;
private Record value;
//setup方法* @param context* @throws IOException
@Override
public void setup(TaskContext context) throws IOException {
key = context.createMapOutputKeyRecord();
value = context.createMapOutputValueRecord();
System.out.println("TaskID:" + context.getTaskID().toString());
}
//map方法* @param recordNum* @param record* @param context* @throws IOException
@Override
public void map(long recordNum, Record record, TaskContext context) throws IOException {
//将输入表中读取的第一列数据写入变量
String user = record.get(0).toString();
//将输入表中读取第二列数据和第一列数据写入变量all,并且第一列数据和第二列数据之间使用“空格”分割
String all = user + " " + record.get(1).toString();
//将输入表中每条数据拆分,拆分键为“空格”,拆分成String类型的数组arr
String[] arr = all.split(" ");
Arrays.sort(arr);
int len = arr.length;
//开始Map阶段:将读取的数组中的数据,并依次两两进行比较,若相同则value值设为0,否则value值设为1。
for (int i=0; i<len-1; ++i) {
for(int j=i+1; j<len; ++j) {
key.set(new Object[] {arr[i] + " " + arr[j]});
if(arr[i].equals(user)||arr[j].equals(user)) {
value.set(new Object[] {0L});
context.write(key, value);
}else {
value.set(new Object[] {1L});
context.write(key,value);
}}}}
//cleanup方法* @param context* @throws IOException*/
@Override
public void cleanup(TaskContext context) throws IOException {
}
}
2.好友推荐的combiner阶段FriendCombiner.java代码:
package com.aliyun.odps.friend;
import java.io.IOException;
import java.util.Iterator;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.mapred.ReducerBase;
//好友推荐的Combiner阶段,FriendCombiner类继承ReducerBase类
public class FriendCombiner extends ReducerBase {
private Record result;
//etup方法* @param context* @throws IOException
@Override
public void setup(TaskContext context) throws IOException {
result=context.createMapOutputValueRecord();
}
//reduce方法* @param key* @param values* @param context* @throws IOException*/
@Override
public void reduce(Record key, Iterator<Record> values, TaskContext context) throws IOException {
long count= 0;
//进行Combine操作
while (values.hasNext()) {
Record val = values.next();
if( 0 == (Long)val.get(0) ) {
count = 0;
break;
}
count += (Long)val.get(0);
}
result.set(new Object[] { count });
context.write(key, result);
}
//cleanup方法* @param context* @throws IOException*/
@Override
public void cleanup(TaskContext context) throws IOException {
}
}
3.好友推荐的rdeucer阶段FriendReducer.java代码
package com.aliyun.odps.friend;
import java.io.IOException;
import java.util.Iterator;
import com.aliyun.odps.io.LongWritable;
import com.aliyun.odps.io.Text;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.mapred.ReducerBase;
//好友推荐的Reduce阶段,FriendReducer类继承ReducerBase类
public class FriendReducer extends ReducerBase {
private LongWritable sum = new LongWritable();
private Text user1 = new Text();
private Text user2 = new Text();
private Record result = null;
//setup方法* @param context* @throws IOException
@Override
public void setup(TaskContext context) throws IOException {
result = context.createOutputRecord();
}
//reduce方法* @param key* @param values* @param context* @throws IOException*/
@Override
public void reduce(Record key, Iterator<Record> values, TaskContext context) throws IOException {
System.out.println(key.get(0));
int count = 0;
while (values.hasNext()) {Record val = values.next();
if( 0 == (Long)val.get(0) ) {
count = 0;break;
}count += (Long)val.get(0);
}
// 仅输出结果不为0的数据,也就是两个有共同好友的用户。
if(count > 0) {sum.set(count);
String user=key.get(0).toString();
String[] users = user.split(" ");
user1.set(users[0]);user2.set(users[1]);
result.set(0, user1);result.set(1, user2);
result.set(2, sum);context.write(result);
}
}
//*** cleanup方法* @param context* @throws IOException*/
@Override
public void cleanup(TaskContext context) throws IOException {
}
}
4.运行ODPS的MapReduce项目FriendDriver.java代码
package com.aliyun.odps.friend;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.mapred.JobClient;
import com.aliyun.odps.mapred.RunningJob;
import com.aliyun.odps.mapred.conf.JobConf;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.mapred.utils.SchemaUtils;
//Driver类
public class FriendDriver {
//主程序方法* @param args* @throws OdpsException
public static void main(String[] args) throws OdpsException {
JobConf job = new JobConf();
// 指定映射输出类型
job.setMapOutputKeySchema(SchemaUtils.fromString("friends:string"));
job.setMapOutputValueSchema(SchemaUtils.fromString("cnt:bigint"));
// 指定输入输出表
InputUtils.addTable(TableInfo.builder().tableName(args[0]).build(), job);
OutputUtils.addTable(TableInfo.builder().tableName(args[1]).build(), job);
// 设置MapReduce的Map类
job.setMapperClass(FriendMapper.class);
// 设置MapReduce的Combiner类
job.setCombinerClass(FriendCombiner.class);
// 设置MapReduce的Reduce类
job.setReducerClass(FriendReducer.class);
RunningJob rj = JobClient.runJob(job);
rj.waitForCompletion();
}
}
二.创建的两个文件(注意文件两边是双下划线__):
1.frient_out_schema_
project=example_project
table=friend_out
columns=userA:STRING,userB:STRING,cnt:BIGINT
frient_out\data
A,B C D
B,A C
C,A B D
D,A C E
E,D
2.frient_in_schema_
project=example_project
table=friend_in
columns=uid:STRING,friends:STRING
三.odps实验室里面创建云端文件代码
create_table
--创建系统dual
drop table if exists dual;
--如project中不存在此伪表,则需创建并初始化数据
create table dual(id bigint);
--向系统伪表初始化数据
insert overwrite table dual select count(*)from dual;
--创建好友推荐MapReduce的数据输入表.其中uid表示某个用户;friends表示uid用户的好友
create table friend_in (uid string, friends string);
--创建好友推荐MapReduce的数据输出表.其中userA表示某个用户;userB表示不是userA的用户,cnt表示userA和userB之间的共同好友数。
create table friend_out (userA string, userB string, cnt bigint);
friend_mapreduce
--@resource_reference{"friend.jar"}
jar -resources friend.jar -classpath ./friend.jar com.aliyun.odps.friend.FriendDriver friend_in friend_out
--最后运行(查询前100条有两个相同好友的数据)得到结果。
SELECT * FROM friend_out WHERE cnt>2 order by cnt desc limit 100;