路还得继续!
我重新构想了思路,直接用JAVA的API获取HBASE的历史数据,然后传递给R,这样可以避免了R通过thrift调用!
于是我重写了第二个版本的MapReduce,如下:
package mytest;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
public class MapRLanguage2 {
public static int mapNum = 12;
public static final String RDIR_H="hdfs://bd110:9000/user/hadoop/";
public static final String RDIR_L="/home/hadoop/yibei/R/";
public static class RMapper extends Mapper<Object, Text, NullWritable, NullWritable>{
HTable table = null;
public void setup(Context context
) throws IOException, InterruptedException {
Configuration hbaseConf = HBaseConfiguration.create();
table = new HTable(hbaseConf, "kpinfo");
}
public void cleanup(Context context
) throws IOException, InterruptedException {
table.close();
}
//每个文件只有一行
public void map(Object key, Text value, Context context ) throws IOException, InterruptedException {
String args[] = value.toString().split("\\|");
//先准备存入的文件名称 例如:[Map].cell.kpi
InputSplit inputSplit = context.getInputSplit();
String fileName = ((FileSplit)inputSplit).getPath().getName();
//获取数据存入文件中
String cellarr[] = args[2].split(",");
String kpiarr[] = args[3].split(",");
//获取开始时间
String datedir = context.getConfiguration().get("datedir");
for(int i=0;i<cellarr.length;i++){
//创建文件
for(int j=0;j<kpiarr.length;j++){
File ldir = new File(RDIR_L+datedir+"/");
ldir.mkdirs();
File lfile = new File(RDIR_L+datedir+"/"+fileName+"."+cellarr[i]+"."+kpiarr[j]);
lfile.createNewFile();
}
}
//访问HBASE,获取历史历史数据
Scan scan = new Scan();
sc