hbase 使用 (快速多维检索)-准实时搜索

最新推荐文章于 2024-01-23 02:36:27 发布

mr_维

最新推荐文章于 2024-01-23 02:36:27 发布

阅读量1.6k

点赞数

分类专栏： hbase

本文链接：https://blog.csdn.net/wei3381209/article/details/25709751

版权

hbase 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

1. 创建表（./bin/hbase shell）

create 't_user', {NAME => 'info'}
describe 't_user'

2. 生成数据（1000w行）

2.1 generate in hadoop file

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Random;
import org.apache.hadoop.fs.FSDataOutputStream;

public class UserDataSimulation {
	public void simulate(String name, long count) throws IOException {
		HdfsUtil hdfs = new HdfsUtil();
		String uri = "hdfs://ip:9000/" + name;
		hdfs.deleteHdfsFile(uri);
		hdfs.createHdfsFile(uri);
		FSDataOutputStream os = hdfs.getOutputStream(uri);
		Writer hdfsOut = new OutputStreamWriter(os, "utf-8");
		StringBuffer sb = new StringBuffer();

		long simCount = count;
		int sbLine = 0;
		String userid;
		String username;
		String transId;
		String mPointId;
		Random rand = new Random(50);

		for (int i = 0; i < simCount; i++) {
			sbLine++;
			userid = "id" + String.format("%010d", i);
			username = "name" + String.format("%010d", i);
			transId = "transId" + String.format("%010d", rand.nextInt(50));
			mPointId = "pointId" + String.format("%010d", i);
			sb.append(userid).append("\t").append(username).append("\t")
					.append(transId).append("\t").append(mPointId)
					.append("\r\n");

			if (sbLine == 20000) {
				hdfsOut.write(sb.toString());
				sb.setLength(0);
				sbLine = 0;
				System.out.println("LineNum:=" + i);
			}
		}
		hdfsOut.write(sb.toString());
		hdfsOut.close();
		os.close();
		System.out.println("finished, total LineNum:=" + simCount);
	}

	public static void main(String[] args) throws IOException {
		UserDataSimulation ds = new UserDataSimulation();
		ds.simulate(args[0], Long.parseLong(args[1]));
	}
}

2.2 设定hbase hadoop jar

cp /opt/hbasedir/lib/hbase*.jar /opt/hadoopdir/share/hadoop/common/lib/

2.3 编写map reduce 导入程序

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;


public class SampleUploader {
    
    static Configuration conf = HBaseConfiguration.create();
    static HTablePool pool  = new HTablePool(conf, 3);
    static HTableInterface table = pool.getTable("t_user");

    private static final String NAME = "SampleUploader";
    static class Uploader extends
            Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

        private long checkpoint = 10000;
        private long count = 0;
        

        @SuppressWarnings("deprecation")
        @Override
        public void map(LongWritable key, Text line, Context context)
                throws IOException {

            String[] values = line.toString().split("\t");
            if (values.length != 4) {
                return;
            }
            
            // Extract each value
            byte[] row = Bytes.toBytes(values[0]);
            byte[] username = Bytes.toBytes(values[1]);
            byte[] transid = Bytes.toBytes(values[2]);
            byte[] pointid = Bytes.toBytes(values[3]);

            // Create Put
            Put put = new Put(row);
            put.add(Bytes.toBytes("info"), Bytes.toBytes("username"), username);
            put.add(Bytes.toBytes("info"), Bytes.toBytes("transid"), transid);
            put.add(Bytes.toBytes("info"), Bytes.toBytes("pointid"), pointid);

            table.put(put);
            count++;
            if(count==1000)
            {
               table.flushCommits();
               count=0;
            }
        }
    }

    /**
     * Job configuration.
     */
    public static Job configureJob(Configuration conf, String[] args)
            throws IOException {
        Path inputPath = new Path(args[0]);
        String tableName = args[1];
        Job job = new Job(conf, NAME + "_" + tableName);
        job.setJarByClass(Uploader.class);
        FileInputFormat.setInputPaths(job, inputPath);
        
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(Uploader.class);
        
        HdfsUtil hdfs=new HdfsUtil();
        hdfs.deleteHdfsFile("hdfs://ip:9000/test/mrout");
        
        FileOutputFormat.setOutputPath(job, new Path("/test/mrout"));
        
        job.setNumReduceTasks(0);
        return job;
    }

    /**
     * Main entry point.
     * 
     * @param args
     *            The command line parameters.
     * @throws Exception
     *             When running the job fails.
     */
    public static void main(String[] args) throws Exception {
        JobConf conf = new JobConf();
        conf.setNumMapTasks(10);
        String[] otherArgs = new GenericOptionsParser(conf, args)
                .getRemainingArgs();
        if (otherArgs.length != 2) {
            System.err
                    .println("Wrong number of arguments: " + otherArgs.length);
            System.err.println("Usage: " + NAME + " <input> <tablename>");
            System.exit(-1);
        }
        Job job = configureJob(conf, otherArgs);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
        pool.putTable(table);
    }
}

为了保证运行参数的设定，把hbase-site.xml文件放到 src目录下，并打入到jar包中

2.4 创建表生成coprocessor : ./hbase shell

disable 't_user'
drop 't_user'
create 't_user', 'info'
alter 't_user', METHOD => 'table_att','coprocessor'=>'hdfs://ip:9000/test/hbasedemo.jar|power.MyCP||'
put 't_user' , 'a111' ,'info:transid','tbbb'
describe 't_user'

hadoop fs -rm -r /test/xxx.jar
hadoop fs -copyFromLocal /opt/jar/xxx.jar /test

coprocess as follows:

public class MyCP extends BaseRegionObserver {

	@Override
	public void postPut(ObserverContext<RegionCoprocessorEnvironment> e,
			Put put, WALEdit edit, Durability durability) throws IOException {
		super.postPut(e, put, edit, durability);  
		byte[] rowKey = put.getRow();
		String rowKeyStr = new String(rowKey);
		List<Cell> lstCell=put.get(Bytes.toBytes("info"), Bytes.toBytes("transid"));
		String val="";
		for(Cell c:lstCell)
		{
			val=Bytes.toString(c.getValue());
		}
		SolrTest solr=new SolrTest();
		solr.test(rowKeyStr, val);
	}
}

2.5 start solr 4.8

java -jar ./example/start.jar

2.6 在集群上运行 hadoop jar xxx.jar

2.7 open ie to search: http://ip:8983/solr

备注：

100w数据的条件查询，10ms内。