一、概述
- Mapreduce对原始日志进行清洗:ip解析成城市,新日志存储到hbase中
- 安装hbase,本案HMaster:192.169.224.104,HRegionServer:192.169.224.105。
- 因为是在本地运行程序,所以添加host:
192.169.224.104 node1
192.169.224.105 node2
二、代码
1、hbase中创建表
create 'hadoop_log', 'log'
2、入口(HdfsToHBaseRunner)
package com.cfl.etl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 数据清洗:从hdfs中将ip转换为城市,存储到HBase
* @author chenfenli
*
*/
public class HdfsToHBaseRunner implements Tool{
// hfds地址
private static final String hdfsUrl = "hdfs://192.169.224.104:8020";
// hdfs目录
private static final String hdfsCatalog = "/flume/events/";
// hbase中zookeeper地址
private static final String zooKeeperUrl = "192.169.224.