需要引入的类包:mongo-java-driver-2.11.2.jar、mongo-hadoop-core_1.0.4-1.1.0.jar
一、从MongoDB上读数据,进行MapReduce后,把结果在在HDFS上。
1、Job的配置启动类:
package com.test.similarity.dataimport;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.log4j.Logger;
import com.test.similarity.util.Constants;
import com.test.similarity.util.HdfsUtil;
import com.mongodb.hadoop.MongoInputFormat;
import com.mongodb.hadoop.util.MongoConfigUtil;
/**
* 从mongondb中读取数据,导入到指定的hdfs路径上.
* @author 907897
*/
public class ImportJob {
private static final Logger logger = Logger.getLogger(ImportJob.class);
public boolean run(String import_source,String import_data){
try{
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf,import_source);
conf.set("mapred.job.tracker", Constants.MAPRED_JOB_TRACKER);
conf.set("fs.default.name", Constants.FS_DEFAULT_NAME);
Job job = new Job(conf, "MongoDBJob job");
// job 处理类配置
job.setJarByClass(ImportJob.class);
job.setMapperClass(ImportMapper.class);
job.setReducerClass(ImportReducer.class);
FileSystem fileSys = FileSystem.get(conf);
// job 类型配置
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(MongoInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// 判断输出路径relation_data是否已存在。如存在:则删除之。
if (HdfsUtil.checkFileExist(fileSys, import_data)) {
logger.info("输出路径:" + import_data + "已存在。程序已对其进行删除。");
HdfsUtil.deleteFileOrPath(fileSys, import_data);
}
// job 输入输出路径配置
FileOutputFormat.setOutputPath(job, new Path(import_data));
// job 运行
boolean runOK = job.waitForCompletion(true) ? true : false;
return runOK;
}catch(Exception e){
e.printStackTrace();
}
return false;
}
}
2、对应的Mapper处理函数
package com.test.similarity.dataimport;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.bson.BSONObject;
import com.test.similarity.asset.AssetUtil;
import com.test.similarity.asset.AssetVO;
public class ImportMapper extends Mapper<Object, BSONObject, Text, Text> {
public void map(Object key, BSONObject value, Context context) throws IOException, InterruptedException{
// System.out.println("value:"+value);
AssetVO assetVO = AssetUtil.trunBSONObjectToAssetVO(value);
value = null;
context.write(new Text(assetVO.getAssetId()), new Text(assetVO.toString()));
}
}
3、对应的Reducer处理函数
package com.test.similarity.dataimport;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class ImportReducer extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
Iterator<Text> iterator = values.iterator();
while(iterator.hasNext()) {
context.write(iterator.next(),null);
}
}
}
二、从HDFS上读数据,通过MapReduce处理后,结果存放于MongoDB中。
1、Job的配置启动类:
package com.test.similarity.dataexport;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import com.test.similarity.util.Constants;
import com.mongodb.hadoop.MongoOutputFormat;
import com.mongodb.hadoop.util.MongoConfigUtil;
public class ExportJob {
//private static Log logger = LogFactory.getLog(ExportJob.class);
public boolean run(String relation_data,String export_dest){
try{
Configuration conf = new Configuration();
MongoConfigUtil.setOutputURI(conf,export_dest);
conf.set("mapred.job.tracker", Constants.MAPRED_JOB_TRACKER);
conf.set("fs.default.name", Constants.FS_DEFAULT_NAME);
Job job = new Job(conf, "MongoDBJob job");
// job 配置
job.setJarByClass(ExportJob.class);
job.setMapperClass(ExportMapper.class);
job.setReducerClass(ExportReducer.class);
job.setOutputKeyClass(Text.class); // 输出的key类型,在OutputFormat会检查
job.setOutputValueClass(Text.class);// 输出的value类型,在OutputFormat会检查
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(MongoOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(relation_data));
// job 运行
boolean runOK = job.waitForCompletion(true) ? true : false;
return runOK;
}catch(Exception e){
e.printStackTrace();
}
return false;
}
}
2、对应的Mapper处理函数
package com.test.similarity.dataexport;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class ExportMapper extends Mapper<Text, Text, Text, Text> {
//private static Log logger = LogFactory.getLog(ExportMapper.class);
public void map(Text key, Text value, Context context ) throws IOException, InterruptedException{
System.out.println("key:"+key.toString());
System.out.println("value:"+value.toString());
context.write(key, value);
}
}
3、对应的Reducer处理函数
package com.test.similarity.dataexport;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.bson.types.ObjectId;
import com.test.similarity.util.Constants;
import com.test.similarity.util.DateUtil;
import com.test.similarity.util.StringUtil;
import com.mongodb.BasicDBObject;
import com.mongodb.hadoop.io.BSONWritable;
public class ExportReducer extends Reducer<Text, Text, Text, BSONWritable>{
private static final String splitFlag = Constants.FIELD_SPLIT; // ;
private static final String keyValueFlag = Constants.KEY_VALUE_SIGN; // =
private static final String itemSplitFlag = Constants.ITEM_SPLIT_SIGN; // #@#
public void reduce(Text key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
String text = key.toString();
if (StringUtil.isNull(text)) {
return;
}
BSONWritable bsonWritable = new BSONWritable();
BasicDBObject document = new BasicDBObject();
String[] result = text.toString().trim().split(itemSplitFlag);
int pos = 0;
String content = "";
document.put("_id",new ObjectId());
for (int i = 0; i < result.length; i++) {
content = result[i];
pos = content.indexOf(keyValueFlag);
document.put(content.substring(0, pos), content.substring(pos + 1));
}
// document.put("createTime", DateUtil.getCurDate(1));
bsonWritable.setDoc(document);
// System.out.println("bsonWritable:"+bsonWritable);
context.write(key, bsonWritable);
}
}