Hbase利用多线程插入和读取数据(读取.CSV文件并写入到Hbase中。)

HBaseUtilss类


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

/**
 * @ClassName: BigTableHbaseTest
 * @author: zhl
 * @date: 2019/12/5  9:35
 */
class HBaseUtilss{

    ThreadLocal<List<Put>> threadLocal = new ThreadLocal<List<Put>>();
    HBaseAdmin admin = null;
    Connection conn = null;


    private static HBaseUtilss instance = null;

    public static synchronized HBaseUtilss getInstance() {
        if (null == instance) instance = new HBaseUtilss();
        return instance;
    }


    private HBaseUtilss() {
        Configuration configuration = new Configuration();
        configuration.set("hbase.zookeeper.quorum", "zhl:2181");
        configuration.set("hbase.rootdir", "hdfs://zhl:9000/hbase");
        try {
            conn = ConnectionFactory.createConnection(configuration);
            admin = new HBaseAdmin(configuration);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    /**
     * 根据表名获取到HTable实例
     */
    public HTable getTable(String tableName) {

        HTable table = null;
        try {
            // table = new HTable(configuration, tableName);
            final TableName tname = TableName.valueOf(tableName);
            table = (HTable) conn.getTable(tname);

        } catch (IOException e) {
            e.printStackTrace();
        }

        return table;
    }

    /**
     * 添加单条记录到HBase表
     *
     * @param tableName HBase表名
     * @param rowkey    HBase表的rowkey
     * @param cf        HBase表的columnfamily
     * @param column    HBase表的列key
     * @param value     写入HBase表的值value
     */
    public void put(String tableName, String rowkey, String cf, String column, String value) {

        HTable table = getTable(tableName);
        Put put = new Put(Bytes.toBytes(rowkey));
        put.add(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
        try {
            table.put(put);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    
    /**
     * 批量添加记录到HBase表,同一线程要保证对相同表进行添加操作!
     *
     * @param tableName HBase表名
     * @param rowkey    HBase表的rowkey
     * @param cf        HBase表的columnfamily
     * @param column    HBase表的列key
     * @param value     写入HBase表的值value
     */
    public   void bulkput(String tableName, String rowkey, String cf, String column, String value) {

        try {
            List<Put> list = threadLocal.get();
            if (list == null) {
                list = new ArrayList<Put>();
            }
            Put put = new Put(Bytes.toBytes(rowkey));
            put.add(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
            list.add(put);
            if (list.size() >= 188) {
                HTable table = getTable(tableName);
                table.put(list);
               list.clear();
            } else {
               threadLocal.set(list);
            }
             // table.flushCommits();
    } catch (IOException e) {
           e.printStackTrace();
       }
}
    public static  void main(String[] args) {
        //获取csv里的数据
        final List<String> list = new CsvUtilData().readCsv("C:/scadadata/2019/10/TB001-10  00.csv");
        long    start = System.currentTimeMillis();
        //获取表头数据
        final String[] head  = list.get(0).split(",");
        new Thread(){
            public void run(){
                for(int i = 1;i <list.size()/3;i++)
                {
                    String[] spliststr = list.get(i).split(",");
                    for(int j=0;j<spliststr.length;j++){
                        //soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);
                        HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);

                    }
                }
                System.out.println(System.currentTimeMillis() - start);
            }

        }.start();

    new Thread(new Runnable() {
            public void run() {
                //获取表头数据
                String[] head1  = list.get(0).split(",");
                for (int k = list.size()/3; k < list.size()/3*2; k++)
                {
                    String[] spliststr = list.get(k).split(",");
                    for(int j = 0;j < spliststr.length; j++)
                    {
                        //soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);s
                        HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);
                    }
                }
                System.out.println(System.currentTimeMillis() - start);
            }
        }).start();

        new Thread(new Runnable() {
            public void run() {
                //获取表头数据
                String[] head1  = list.get(0).split(",");
                for (int k = list.size()/3*2; k < list.size(); k++)
                {
                    String[] spliststr = list.get(k).split(",");
                    for(int j = 0;j < spliststr.length; j++)
                    {
                        //soldTicket("stu5",spliststr[3]+j, "info", head[headlength++], str);s
                        HBaseUtilss.getInstance().bulkput("stu5",spliststr[3]+j, "info", head[j], spliststr[j]);
                    }
                }
                System.out.println(System.currentTimeMillis() - start);
            }
        }).start();
    }
}

读取CSV文件类

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

public class CsvUtilData
{
	public  ArrayList<String> readCsv(String filepath) {

		File csv = new File(filepath); // CSV文件路径
		csv.setReadable(true);//设置可读
		csv.setWritable(true);//设置可写
		BufferedReader br = null;
		try {
			br = new BufferedReader(new FileReader(csv));
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		}
		String line = "";
		String everyLine = "";
		ArrayList<String> allString = new ArrayList<String>();
		try {
			while ((line = br.readLine()) != null) // 读取到的内容给line变量
			{
				everyLine = line;
				allString.add(everyLine);
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		return allString;
	}
}

读取数据类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @ClassName: ExportHbase
 * @author: zhl
 * @date: 2019/12/6  14:36
 */
public class ExportHbase {

    public static Configuration conf = null;
    public static Connection connection = null;
    public static Admin admin = null;
    static {
        conf = HBaseConfiguration.create();

        conf.set("hbase.zookeeper.quorum","zhl:2181");
        conf.set("hbase.master", "zhl:60010");
        conf.set("hbase.zookeeper.property.clientPort", "2181");



        conf = HBaseConfiguration.create(conf);
        try {
            connection = ConnectionFactory.createConnection(conf);
            admin = connection.getAdmin();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

        public static List<String> qurryTableTestBatch (String tableName,List < String > rowkeyList) throws IOException {
            List<Get> getList = new ArrayList();
            List<String> list =  new ArrayList<>();
            int i = 0;
            Table table = null;// 获取表
            try {
                table = connection.getTable(TableName.valueOf(tableName));
            } catch (IOException e) {
                e.printStackTrace();
            }
            for (String rowkey : rowkeyList) {//把rowkey加到get里,再把get装到list中
                Get get = new Get(Bytes.toBytes(rowkey));
                getList.add(get);
            }
            Result[] results = table.get(getList);//重点在这,直接查getList<Get>
            for (Result result : results) {//对返回的结果集进行操作
                for (Cell kv : result.rawCells()) {
                    String value = Bytes.toString(CellUtil.cloneValue(kv));
                    list.add(value);   i++;
                }

            }
            System.out.println(i);
            return list;
        }

    public static void main(String[] args) {
        List<String> rowkeyList = new ArrayList<String>();
        List<String> list = new ArrayList<String>();
        int j = 0;
        long start = System.currentTimeMillis();
        //获取csv里的数据
        List<String> list1 = new ArrayList<String>();
        list1  = new CsvUtilData().readCsv("C:/scadadata/2019/10/TB001-10  00.csv");
        for(int i = 1;i <list1.size();i++)
        {
            String[] spliststr = list1.get(i).split(",");
            rowkeyList.add(spliststr[3]);
        }
      //  String[] spliststr = list1.get(1).split(",");
       //    rowkeyList.add(spliststr[3]);
        try {
            list = ExportHbase.qurryTableTestBatch("stu5",rowkeyList);
        } catch (IOException e) {
            e.printStackTrace();
        }
     //   System.out.println(System.currentTimeMillis() - start);

        System.out.println(list.size());


    }
    }

运行效率,插入128000条数据花了6秒-7秒,存在可以进行优化的地方。

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
你可以使用 HBase 的 Java API 来实现批量插入 JSON 数据。下面是一个示例代码: ```java import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; import org.json.JSONArray; import org.json.JSONObject; import java.io.IOException; public class HBaseJsonBulkInsert { private static final String TABLE_NAME = "your_table_name"; private static final String COLUMN_FAMILY = "your_column_family"; private static final String COLUMN_QUALIFIER = "your_column_qualifier"; public static void main(String[] args) { Configuration conf = HBaseConfiguration.create(); try (Connection connection = ConnectionFactory.createConnection(conf); Table table = connection.getTable(TableName.valueOf(TABLE_NAME))) { JSONArray jsonArray = new JSONArray("your_json_array"); // 将你的 JSON 数据转换为 JSONArray 对象 Put[] puts = new Put[jsonArray.length()]; for (int i = 0; i < jsonArray.length(); i++) { JSONObject jsonObject = jsonArray.getJSONObject(i); String rowKey = jsonObject.getString("id"); // 假设你的 JSON 数据有一个字段名为 id,用作行键 String value = jsonObject.toString(); // 将整个 JSON 对象转换为字符串,作为值 Put put = new Put(Bytes.toBytes(rowKey)); put.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes(COLUMN_QUALIFIER), Bytes.toBytes(value)); puts[i] = put; } table.put(puts); // 批量插入数据 System.out.println("Data inserted successfully."); } catch (IOException e) { e.printStackTrace(); } } } ``` 请将 `your_table_name`、`your_column_family`、`your_column_qualifier` 替换为你实际的表名、列族名和列限定符名。将 `your_json_array` 替换为你的 JSON 数组变量名或直接写入 JSON 数组的字符串。 这个示例使用 HBase 的 Java API 将 JSON 数组的每个对象转换为适当的 HBase Put 对象,并使用 `table.put()` 方法进行批量插入

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值