Hbase利用多线程插入和读取数据（读取.CSV文件并写入到Hbase中。）

最新推荐文章于 2024-01-18 02:11:35 发布

天，取个名字咋就这么困难！

最新推荐文章于 2024-01-18 02:11:35 发布

阅读量807

点赞数

分类专栏： Hbase 文章标签： hbase插入和读取数据 CSV文件

本文链接：https://blog.csdn.net/weixin_42369418/article/details/103521763

版权

Hbase 专栏收录该内容

13 篇文章 1 订阅

订阅专栏

HBaseUtilss类


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

/**
 * @ClassName: BigTableHbaseTest
 * @author: zhl
 * @date: 2019/12/5  9:35
 */
class HBaseUtilss{

    ThreadLocal<List<Put>> threadLocal = new ThreadLocal<List<Put>>();
    HBaseAdmin admin = null;
    Connection conn = null;


    private static HBaseUtilss instance = null;

    public static synchronized HBaseUtilss getInstance() {
        if (null == instance) instance = new HBaseUtilss();
        return instance;
    }


    private HBaseUtilss() {
        Configuration configuration = new Configuration();
        configuration.set("hbase.zookeeper.quorum", "zhl:2181");
        configuration.set("hbase.rootdir", "hdfs://zhl:9000/hbase");
        try {
            conn = ConnectionFactory.createConnection(configuration);
            admin = new HBaseAdmin(configuration);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    /**
     * 根据表名获取到HTable实例
     */
    public HTable getTable(String tableName) {

        HTable table = null;
        try {
            // table = new HTable(configuration, tableName);
            final TableName tname = TableName.valueOf(tableName);
            table = (HTable) conn.getTable(tname);

        } catch (IOException e) {
            e.printStackTrace();
        }

        return table;
    }

    /**
     * 添加单条记录到HBase表
     *
     * @param tableName HBase表名
     * @param rowkey    HBase表的rowkey
     * @param cf        HBase表的columnfamily
     * @param column    HBase表的列key
     * @param value     写入HBase表的值value
     */
    public void put(String tableName, String rowkey, String cf, String column, String value) {

        HTable table = getTable(tableName);
        Put put = new Put(Bytes.toBytes(rowkey));
        put.add(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
        try {
            table.put(put);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    
    /**
     * 批量添加记录到HBase表，同一线程要保证对相同表进行添加操作！
     *
     * @param tableName HBase表名
     * @param rowkey    HBase表的rowkey
     * @param cf        HBase表的columnfamily
     * @param column    HBase表的列key
     * @param value     写入HBase表的值value
     */
    public   void bulkput(String tableName, String rowkey, String cf, String column, String value) {

        try {
            List<Put> list = threadLocal.get();
            if (list == null) {
                list = new ArrayList<Put>();
            }
            Put put = new Put(Bytes.toBytes(rowkey));
            put.add(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
            list.add(put);
            if (list.size() >= 188) {
                HTable table = getTable(tableName);
                table.put(list);
               list.clear();
            } else {
               threadLocal.set(list);
            }
             // table.flushCommits();
    } catch (IOException e) {
           e.printStackTrace();
       }
}
    public static  void main(String[] args) {
        //获取csv里的数据
        final List<String> list = new CsvUtilData().readCsv("C:/scadadata/2019/10/TB001-10  00.csv");
        long    start = System.currentTimeMillis();
        //获取表头数据
        final String[] head  = list.get(0).split(",");
        new Thread(){
            public void run(){
                for(int i = 1;i <list.size()/3;i++)
                {
                    String[] spliststr = list.get(i).split(",");
                    for(int j=0;j<spliststr.length;j++){
                        //soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);
                        HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);

                    }
                }
                System.out.println(System.currentTimeMillis() - start);
            }

        }.start();

    new Thread(new Runnable() {
            public void run() {
                //获取表头数据
                String[] head1  = list.get(0).split(",");
                for (int k = list.size()/3; k < list.size()/3*2; k++)
                {
                    String[] spliststr = list.get(k).split(",");
                    for(int j = 0;j < spliststr.length; j++)
                    {
                        //soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);s
                        HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);
                    }
                }
                System.out.println(System.currentTimeMillis() - start);
            }
        }).start();

        new Thread(new Runnable() {
            public void run() {
                //获取表头数据
                String[] head1  = list.get(0).split(",");
                for (int k = list.size()/3*2; k < list.size(); k++)
                {
                    String[] spliststr = list.get(k).split(",");
                    for(int j = 0;j < spliststr.length; j++)
                    {
                        //soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);s
                        HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);
                    }
                }
                System.out.println(System.currentTimeMillis() - start);
            }
        }).start();
    }
}

读取CSV文件类

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

public class CsvUtilData
{
	public  ArrayList<String> readCsv(String filepath) {

		File csv = new File(filepath); // CSV文件路径
		csv.setReadable(true);//设置可读
		csv.setWritable(true);//设置可写
		BufferedReader br = null;
		try {
			br = new BufferedReader(new FileReader(csv));
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		}
		String line = "";
		String everyLine = "";
		ArrayList<String> allString = new ArrayList<String>();
		try {
			while ((line = br.readLine()) != null) // 读取到的内容给line变量
			{
				everyLine = line;
				allString.add(everyLine);
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		return allString;
	}
}

读取数据类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @ClassName: ExportHbase
 * @author: zhl
 * @date: 2019/12/6  14:36
 */
public class ExportHbase {

    public static Configuration conf = null;
    public static Connection connection = null;
    public static Admin admin = null;
    static {
        conf = HBaseConfiguration.create();

        conf.set("hbase.zookeeper.quorum","zhl:2181");
        conf.set("hbase.master", "zhl:60010");
        conf.set("hbase.zookeeper.property.clientPort", "2181");



        conf = HBaseConfiguration.create(conf);
        try {
            connection = ConnectionFactory.createConnection(conf);
            admin = connection.getAdmin();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

        public static List<String> qurryTableTestBatch (String tableName,List < String > rowkeyList) throws IOException {
            List<Get> getList = new ArrayList();
            List<String> list =  new ArrayList<>();
            int i = 0;
            Table table = null;// 获取表
            try {
                table = connection.getTable(TableName.valueOf(tableName));
            } catch (IOException e) {
                e.printStackTrace();
            }
            for (String rowkey : rowkeyList) {//把rowkey加到get里，再把get装到list中
                Get get = new Get(Bytes.toBytes(rowkey));
                getList.add(get);
            }
            Result[] results = table.get(getList);//重点在这，直接查getList<Get>
            for (Result result : results) {//对返回的结果集进行操作
                for (Cell kv : result.rawCells()) {
                    String value = Bytes.toString(CellUtil.cloneValue(kv));
                    list.add(value);   i++;
                }

            }
            System.out.println(i);
            return list;
        }

    public static void main(String[] args) {
        List<String> rowkeyList = new ArrayList<String>();
        List<String> list = new ArrayList<String>();
        int j = 0;
        long start = System.currentTimeMillis();
        //获取csv里的数据
        List<String> list1 = new ArrayList<String>();
        list1  = new CsvUtilData().readCsv("C:/scadadata/2019/10/TB001-10  00.csv");
        for(int i = 1;i <list1.size();i++)
        {
            String[] spliststr = list1.get(i).split(",");
            rowkeyList.add(spliststr[3]);
        }
      //  String[] spliststr = list1.get(1).split(",");
       //    rowkeyList.add(spliststr[3]);
        try {
            list = ExportHbase.qurryTableTestBatch("stu5",rowkeyList);
        } catch (IOException e) {
            e.printStackTrace();
        }
     //   System.out.println(System.currentTimeMillis() - start);

        System.out.println(list.size());


    }
    }

运行效率，插入128000条数据花了6秒-7秒，存在可以进行优化的地方。

天，取个名字咋就这么困难！

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
1
评论
Hbase利用多线程插入和读取数据（读取.CSV文件并写入到Hbase中。）

HBaseUtilss类import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;import java.io...
复制链接

扫一扫