HBase 1.1.4 map reduce 表与表之间

最新推荐文章于 2024-07-28 16:45:11 发布

bobboy007

最新推荐文章于 2024-07-28 16:45:11 发布

阅读量114

点赞数

文章标签：大数据 java

本文链接：https://blog.csdn.net/bobboy007/article/details/84775843

版权

若表中有数据，不会被覆盖。

若提示class not found,找到对应包，放到hadoop/share/hadoop/common/下

这是我做hadoop和hbase各种测试时复制过来的包列表

hadoop-common-2.5.2.jar

hadoop-nfs-2.5.2.jar

hbase-common-1.1.4.jar

hbase-protocol-1.1.4.jar

htrace-core-3.1.0-incubating.jar

mysql-connector-java-5.1.38-bin.jar

hadoop-common-2.5.2-tests.jar

hbase-client-1.1.4.jar

hbase-hadoop-compat-1.1.4.jar

hbase-server-1.1.4.jar

metrics-core-2.2.0.jar

netty-all-4.0.23.Final.jar

import java.io.IOException; 
import java.util.Iterator; 
import java.util.StringTokenizer; 
 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.hbase.HBaseConfiguration; 
import org.apache.hadoop.hbase.HColumnDescriptor; 
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.KeyValue; 

import org.apache.hadoop.hbase.client.HBaseAdmin;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; 
import org.apache.hadoop.hbase.mapreduce.TableReducer; 
import org.apache.hadoop.hbase.util.Bytes; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.io.NullWritable; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 

 
 /*
  * hbase 表 map reduce算数
  * */
public class HBaseTableMapReduce { 
 
  /* 实现 Map 类    
 
   *   输出 类型  字符串
   *   输出值类型   数字
  */
  public static class Map extends 
  TableMapper <Text, IntWritable>{ 
	   
    private final static IntWritable one = new IntWritable(1); 
    private Text word = new Text(); 
 /*
  * 参数固定
  * */
    public void map(ImmutableBytesWritable row, Result values,Context context) 
    				throws IOException, InterruptedException { 
            String count = new String(values.getValue(Bytes.toBytes("content"), Bytes.toBytes("count"))); 
            word.set(count);
            context.write(word, one);
    } 
  } 
 
  /* 实现 Reduce 类
   * map 的输出类型
   * map 的输出值类型
   * tablereduce 输出类型是null,
   * 输出值类型 put 
  */
  public static class Reduce extends 
      TableReducer<Text, IntWritable, ImmutableBytesWritable> { 
	    
	  
	   
	  
    public void reduce(Text key, Iterable<IntWritable> values, 
        Context context) throws IOException, InterruptedException { 
 
     	
      int sum = 0; 
 
      Iterator<IntWritable> iterator = values.iterator(); 
      while (iterator.hasNext()) { 
        sum += iterator.next().get(); 
      }  
      // Put 实例化，每个词存一行 
      Put put = new Put(Bytes.toBytes(("wordcount_"+key).toString())); 
      // 列族为 content，列修饰符为 count，列值为数目 
      put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), 
          Bytes.toBytes(String.valueOf(sum))); 
      
      context.write(null, put); 
    } 
  
     
  } 
  // 创建 HBase 数据表 
  public static void createHBaseTable(String tableName)  
throws IOException { 
    // 创建表描述 
    HTableDescriptor htd = new HTableDescriptor(tableName); 
    // 创建列族描述 
    HColumnDescriptor col = new HColumnDescriptor("content"); 
    htd.addFamily(col); 
 
    // 配置 HBase 
    Configuration conf = HBaseConfiguration.create(); 
 
    //conf.set("hbase.zookeeper.quorum","127.0.0.1"); 
    //conf.set("hbase.zookeeper.property.clientPort", "2181"); 
    HBaseAdmin hAdmin = new HBaseAdmin(conf); 
 
    if (hAdmin.tableExists(tableName)) { 
      System.out.println("该数据表已经存在。"); 
   //   hAdmin.disableTable(tableName); 
   //   hAdmin.deleteTable(tableName); 
    }else { 
  
    	System.out.println("创建表：" + tableName); 
    	hAdmin.createTable(htd);
    }
  } 
 
 
  public static void main(String[] args) throws Exception { 
    String sourceTable = "wordcount"; 
   String targetTable = "wordcount_mapreduce";
   createHBaseTable(targetTable);
    // 第二步：进行 MapReduce 处理 
    // 配置 MapReduce 
    Configuration conf = new Configuration(); 
    // 这几句话很关键 
   // conf.set("mapred.job.tracker", "master:9001"); 
    //conf.set("hbase.zookeeper.quorum","master"); 
    //conf.set("hbase.zookeeper.property.clientPort", "2181"); 
 
 
    Job job = new Job(conf, "New Word Count"); 
    job.setJarByClass(HBaseTableMapReduce.class); 
 
    Scan scan = new Scan();
    scan.setCaching(100);        // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false);  // don't set to true for MR jobs
    
    TableMapReduceUtil.initTableMapperJob(
    		sourceTable,      // input table
    		scan,	          // Scan instance to control CF and attribute selection
    		Map.class,   // mapper class
    		Text.class,
    		IntWritable.class,        // mapper output value
    		job);
    
   TableMapReduceUtil.initTableReducerJob(
    		targetTable,      // output table
    		Reduce.class,             // reducer class
    		job);
    
   
 
 
    System.exit(job.waitForCompletion(true) ? 0 : 1); 
 
  } 
}

bobboy007

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
HBase 1.1.4 map reduce 表与表之间

若表中有数据，不会被覆盖。若提示class not found,找到对应包，放到hadoop/share/hadoop/common/下这是我做hadoop和hbase各种测试时复制过来的包列表hadoop-common-2.5.2.jar hadoop-nfs-2.5.2.jar hbase-common-1.1.4.jar hbase-protocol-...
复制链接

扫一扫