geomesa hbase demo

yostkevin

已于 2024-05-11 14:08:06 修改

阅读量18

点赞数

分类专栏：大数据文章标签： hbase linq 数据库

于 2020-04-23 17:24:45 首次发布

本文链接：https://blog.csdn.net/u014384314/article/details/105709030

版权

大数据专栏收录该内容

33 篇文章 0 订阅

订阅专栏

参考:

https://www.geomesa.org/documentation/user/process.html#proximity-process

Geomesa集成HBase：Geotools DataStore API 读写 HBase_geomesa集成hbase:geotools datastore api 读写 hbase-CSDN博客

1.数据入库

从kafka消费模拟过车数据->geomesa特定格式->hbase

kafka producer

msg:

{"plateNo":"渝K0E1G2","plateColor":5,"tollgateID":"50010500001211000928","passTime":"1583020940000","speed":"70.0"}

kafka consumer

/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.service;

import com.alibaba.fastjson.JSONObject;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;

import java.util.*;

/**
 * description
 *
 * @author yuwei [yu.wei@unisinsight.com]
 * @date 2020/04/21 11:21
 * @since 1.0
 */
public class VehicleKafkaHbase {

    public static void main(String[] args) {
        getFromKafka();
    }

    public static void getFromKafka() {
        Properties props = new Properties();
        props.put("bootstrap.servers", "192.168.108.126:9092");
        //每个消费者分配独立的组号
        props.put("group.id", "hbaseConsumer");
        //如果value合法，则自动提交偏移量
        props.put("enable.auto.commit", "true");
        //设置多久一次更新被消费消息的偏移量
        props.put("auto.commit.interval.ms", "1000");
        //设置会话响应的时间，超过这个时间kafka可以选择放弃消费或者消费下一条消息
        props.put("session.timeout.ms", "30000");
        //自动重置offset
        props.put("auto.offset.reset", "earliest");
        props.put("key.deserializer",
                "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer",
                "org.apache.kafka.common.serialization.StringDeserializer");

        KafkaConsumer<String, String> consumer =
                new KafkaConsumer<String, String>(props);

        // stranger_event  viid_vehicle_topic2
        String topicName = "viid_vehicle_topic2";
        consumer.subscribe(Collections.singletonList(topicName));

        int count = 0;

        try {
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(100);
                List<JSONObject> list = new ArrayList<>();
                for (ConsumerRecord<String, String> record : records) {
                    // 向hbase插入数据
                    list.add(JSONObject.parseObject(record.value()));
                    count++;
                    if (list.size() == 500) {
                        // insert into hbase
                        VehicleToHbase.dealMsg(topicName, list);
                        System.out.println(list.get(0).toString());
                        // clear list
                        list.clear();
                        Thread.sleep(1 * 1000);
                    }
                    if (count == 2000 * 10000) {
                        return;
                    }
                }
                // 异步提交
                consumer.commitAsync();

            }
        } catch (Exception e) {
            System.out.println("Unexpected error");
        } finally {
            try {
                // 同步提交
                consumer.commitSync();
            } finally {
                consumer.close();
            }
        }
    }
}

hbaseUtil

SimpleFeature封装的数据中，主键是feature ID(__fid__),这里是自定义uuid，不是数据自身的ID(gid,fid等)。

/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.service;

import cn.hutool.core.lang.Snowflake;
import cn.hutool.core.util.IdUtil;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.cli.*;
import org.geotools.data.*;
import org.geotools.factory.Hints;
import org.geotools.feature.simple.SimpleFeatureBuilder;
import org.geotools.filter.identity.FeatureIdImpl;
import org.joda.time.DateTime;
import org.locationtech.geomesa.hbase.data.HBaseDataStoreFactory;
import org.locationtech.geomesa.utils.interop.SimpleFeatureTypes;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.*;

/**
 * description
 *
 * @author yuwei [yu.wei@unisinsight.com]
 * @date 2020/04/21 12:26
 * @since 1.0
 */
public class VehicleToHbase {
    private static final Logger LOGGER = LoggerFactory.getLogger(VehicleToHbase.class);
    private static Snowflake snowflake = IdUtil.createSnowflake(1, 1);
    private static DataStore dataStore = null;
    private static SimpleFeatureType sft = null;
    private static List<SimpleFeature> geoMsg = null;

    public static void main(String[] args) {
        //
        // dealMsg("dsads", null);
    }


    /**
     * kafka数据入hbase
     *
     * @param topicName topicName
     * @param list      list msg
     */
    public static void dealMsg(String topicName, List<JSONObject> list) {
        // create DataStore
        String tableName = topicName.substring(0, topicName.length() - 1);
        if (dataStore == null) {
            try {
                dataStore = createHbaseDataStore(tableName);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (ParseException e) {
                e.printStackTrace();
            }

            // get SimpleFeatureType
            // 指定hbase列名 自定义是否需要对其建立二级索引
            // geomesa会(基于空间点、线、面+dtg date类型的时间)自动构建空间索引和时空索引
            String spec = "passTime:Long,dtg:Date,plateNo:String:index=true,plateColor:Integer:index=true,tollgateID:String:index=true,speed:Double,*geom:Point:srid=4326,uuid:String";
            //String spec = "passTime:Long,dtg:Date,plateNo:String,plateColor:Integer,tollgateID:String,speed:Double,*geom:Point:srid=4326,uuid:String";
            sft = SimpleFeatureTypes.createType("mySft", spec);
            // 将空间点序列化为16个字节存储以节省存储空间(不指定的化默认32个字节)
            sft.getDescriptor("geom").getUserData().put("precision", "6");
            // 启用FeatureID索引,也可以在后面做
            //sft.getUserData().put("geomesa.fid.uuid", "true");
            // https://www.geomesa.org/documentation/user/datastores/index_config.html#attribute-options
            //sft.getUserData().put("geomesa.indices.enabled", "z3:geom:dtg,attr:tollgateID:plateNo:plateColor:passTime");
            System.out.println(sft.getTypeName());

            // 提前创建Schema
            createSchema(dataStore, sft);
        }
        // 将从kafka获取的json数据封装为geomesa特定的格式
        try {
            geoMsg = transformMsg(sft, list);
        } catch (Exception e) {
            e.printStackTrace();
        }


        // write to hbase
        try {
            writeFeatures(dataStore, sft, geoMsg);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * createSchema
     *
     * @param dataStore
     * @param sft
     */
    public static void createSchema(DataStore dataStore, SimpleFeatureType sft) {
        try {
            dataStore.createSchema(sft);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 将从kafka获取的json数据封装为geomesa特定的格式
     *
     * @param sft sft
     * @param msgs msgs
     * @return List<SimpleFeature>
     */
    public static List<SimpleFeature> transformMsg(SimpleFeatureType sft, List<JSONObject> msgs) {
        List<SimpleFeature> features = null;
        if (features == null) {
            features = new ArrayList<>();

            // 设定日期的格式，此处需要用到正则表达式
            DateTimeFormatter dateFormat = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss", Locale.CHINA);

            // 使用geotools当中的SimpleFeatureType来生成我们需要的Feature
            SimpleFeatureBuilder builder = new SimpleFeatureBuilder(sft);

            for (JSONObject msg : msgs) {
                // 根据tollgateID生成其对应的经纬度
                String tollgateID = msg.getString("tollgateID");
                int s = tollgateID.length() - 4;
                int e = tollgateID.length();
                String longitude = "106.49" + String.valueOf(Double.parseDouble(tollgateID.substring(s, e)));
                String latitude = "29.65" + String.valueOf(Double.parseDouble(tollgateID.substring(s, e)));
                longitude = longitude.substring(0, longitude.length() - 2);
                latitude = latitude.substring(0, latitude.length() - 2);

                builder.set("passTime", msg.getLong("passTime"));
                builder.set("dtg", Date.from(LocalDateTime.parse(TimeUtil.timestampToString(msg.getLong("passTime")), dateFormat).toInstant(ZoneOffset.ofHours(8))));
                builder.set("plateNo", msg.getString("plateNo"));
                builder.set("tollgateID", msg.getString("tollgateID"));
                builder.set("plateNo", msg.getString("plateNo"));
                builder.set("speed", msg.getDouble("speed"));
                builder.set("geom", "POINT (" + longitude + " " + latitude + ")");
                String uuid = "vehicle" + snowflake.nextIdStr();
                builder.set("uuid", uuid);
                // 向Geotools确认要用用户自行设定的id，也可以不指定Geotools会默认为每条记录随机生成一个uuid
                builder.featureUserData(Hints.USE_PROVIDED_FID, Boolean.TRUE);
                // 设定featureId，此处是自定义UUID
                features.add(builder.buildFeature(uuid));
            }
        }
        features = Collections.unmodifiableList(features);
        return features;
    }

    /**
     * create DataStore
     *
     * @param catalog hbase tableName
     * @return DataStore
     * @throws IOException    Exception
     * @throws ParseException Exception
     */
    public static DataStore createHbaseDataStore(String catalog) throws IOException, ParseException {
        Map<String, String> params = getDataStoreParams(catalog);
        return DataStoreFinder.getDataStore(params);
    }

    /**
     * getDataStorePara
     *
     * @param catalog hbase tableName
     * @return DataStorePara
     * @throws ParseException Exception
     */
    public static Map<String, String> getDataStoreParams(String catalog) throws ParseException {
        String[] args = {"--hbase.catalog", catalog, "--hbase.zookeepers", "192.168.108.60:2181"};
        Options options = createOptions(new HBaseDataStoreFactory().getParametersInfo());
        CommandLine command = parseArgs(VehicleToHbase.class, options, args);
        Map<String, String> params = new HashMap<>();
        // 检查Option里面有没有值，如果没有，就会赋值
        for (Option opt : options.getOptions()) {
            String value = command.getOptionValue(opt.getLongOpt());
            if (value != null) {
                params.put(opt.getArgName(), value);
            }
        }
        return params;
    }

    /**
     * createOptions
     *
     * @param parameters parameters
     * @return Options
     */
    public static Options createOptions(DataAccessFactory.Param[] parameters) {
        Options options = new Options();
        for (DataAccessFactory.Param p : parameters) {
            if (!p.isDeprecated()) {
                Option opt = Option.builder(null)
                        .longOpt(p.getName())
                        .argName(p.getName())
                        .hasArg()
                        .desc(p.getDescription().toString())
                        .required(p.isRequired())
                        .build();
                options.addOption(opt);
            }
        }
        return options;
    }

    /**
     * parseArgs
     *
     * @param caller  Class
     * @param options options
     * @param args    args
     * @return CommandLine
     * @throws ParseException Exception
     */
    public static CommandLine parseArgs(Class<?> caller, Options options, String[] args) throws ParseException {
        try {
            return new DefaultParser().parse(options, args);
        } catch (ParseException e) {
            System.err.println(e.getMessage());
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp(caller.getName(), options);
            throw e;
        }
    }

    public static void writeFeatures(DataStore datastore, SimpleFeatureType sft, List<SimpleFeature> features) throws IOException {
        if (features.size() > 0) {
            LOGGER.debug(" begin write. begin time:" + DateTime.now().toString("yyyy-MM-dd HH:mm:ss") + " msg count: " + features.size());
            try (FeatureWriter<SimpleFeatureType, SimpleFeature> writer =
                         datastore.getFeatureWriterAppend(sft.getTypeName(), Transaction.AUTO_COMMIT)) {
                for (SimpleFeature feature : features) {
                    try {
                        SimpleFeature toWrite = writer.next();
                        toWrite.setAttributes(feature.getAttributes());
                        ((FeatureIdImpl) toWrite.getIdentifier()).setID(feature.getID());
                        toWrite.getUserData().put(Hints.USE_PROVIDED_FID, Boolean.TRUE);
                        toWrite.getUserData().putAll(feature.getUserData());
                        writer.write();

                    } catch (Exception e) {
                        LOGGER.debug("Invalid GDELT record: " + e.toString() + " " + feature.getAttributes());
                    }
                }
                LOGGER.debug("write success. end time:" + DateTime.now().toString("yyyy-MM-dd HH:mm:ss") + " msg count: " + features.size());

            }
        }
    }
}

2. 数据查询

/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.service.kafka_geomesa_hbase.hbase;

import com.alibaba.fastjson.JSONObject;
import com.service.kafka_geomesa_hbase.hbase.VehicleToHbase;
import org.apache.commons.cli.ParseException;
import org.geotools.data.*;
import org.geotools.filter.FilterFactoryImpl;
import org.geotools.filter.text.cql2.CQLException;
import org.geotools.filter.text.ecql.ECQL;
import org.locationtech.geomesa.hbase.data.HBaseFeatureWriter;
import org.locationtech.geomesa.index.conf.QueryHints;
import org.opengis.feature.Property;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import org.opengis.feature.type.Name;
import org.opengis.filter.Filter;
import org.opengis.filter.sort.SortBy;

import java.io.IOException;
import java.util.*;

/**
 * description
 *
 * @author yuwei [yu.wei@unisinsight.com]
 * @date 2020/04/22 08:56
 * @since 1.0
 */
public class TestVehicleLogic {
    private static DataStore dataStore = null;

    public static void main(String[] args) {

        String tableName = "viid_vehicle_topic";
        getDataStore(tableName);

        testMaxMin();
        testCount();
        testGroupByCount();
        testLikePlateNoGroupByCount();
        getTestQueries();
    }

    /**
     * 时空查询
     */
    public static void getTestQueries() {
        List<Query> queries = null;
        if (queries == null) {
            try {
                queries = new ArrayList<>();

                String during = "dtg DURING 2020-02-01T00:00:00.000Z/2020-04-11T00:00:00.000Z";
                String bbox = "bbox(geom,106.49000,29.55312,106.5000,29.75612)";
                String plateNoFilter = "plateNo" + " LIKE '" + "渝F0C2D3" + "'";
                //时空查询
                long stime = System.currentTimeMillis();
                Query query = new Query("mySft", ECQL.toFilter(bbox + " AND " + during + " AND " + plateNoFilter));
                //query.getHints().put(QueryHints.STATS_STRING(), "GroupBy(\"plateNo\",Count())");
                Map<String, Object> map = queryFeatureDatas(dataStore, query, 10000, 1);
                List<Map<String, String>> data = (List<Map<String, String>>) map.get("data");
                for (Map<String, String> m : data) {
                    JSONObject jsonObject = new JSONObject();
                    for (Map.Entry<String, String> entry : m.entrySet()) {
                        jsonObject.put(entry.getKey(), entry.getValue());
                    }
                    // System.out.println(jsonObject.toString());
                }
                System.out.println("结果条数: " + data.size() + ",时空过滤查询耗时:" + (System.currentTimeMillis() - stime));
                // {"unionId":"stat","stats":"{\"count\":14669200}","geom":"POINT (0 0)"}
                // 耗时12473ms
                //空间查询
                stime = System.currentTimeMillis();
                query = new Query("mySft", ECQL.toFilter(bbox + " AND " + plateNoFilter));
                //query.getHints().put(QueryHints.STATS_STRING(), "GroupBy(\"plateNo\",Count())");

                map = queryFeatureDatas(dataStore, query, 100, 1);
                data = (List<Map<String, String>>) map.get("data");
                for (Map<String, String> m : data) {
                    JSONObject jsonObject = new JSONObject();
                    for (Map.Entry<String, String> entry : m.entrySet()) {
                        jsonObject.put(entry.getKey(), entry.getValue());
                    }
                    // System.out.println(jsonObject.toString());
                }
                System.out.println("结果条数: " + data.size() + ",空间过滤查询耗时:" + (System.currentTimeMillis() - stime));
                //时间过滤查询
                stime = System.currentTimeMillis();
                query = new Query("mySft", ECQL.toFilter(during + " AND " + plateNoFilter));
                // query.getHints().put(QueryHints.STATS_STRING(), "GroupBy(\"plateNo\",Count())");

                map = queryFeatureDatas(dataStore, query, 100, 1);
                data = (List<Map<String, String>>) map.get("data");
                for (Map<String, String> m : data) {
                    JSONObject jsonObject = new JSONObject();
                    for (Map.Entry<String, String> entry : m.entrySet()) {
                        jsonObject.put(entry.getKey(), entry.getValue());
                    }
                    // System.out.println(jsonObject.toString());
                }
                System.out.println("结果条数: " + data.size() + ",时间过滤查询耗时:" + (System.currentTimeMillis() - stime));
                // basic spatio-temporal query with projection down to a few attributes
            } catch (CQLException e) {
                throw new RuntimeException("Error creating filter:", e);
            }
        }
    }

    public static void getDataStore(String topicName) {
        if (dataStore == null) {
            try {
                dataStore = VehicleToHbase.createHbaseDataStore(topicName);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (ParseException e) {
                e.printStackTrace();
            }
        }
        return;
    }

    public static void testCount() {
        Query query = new Query("mySft");
        long stime = System.currentTimeMillis();
        query.getHints().put(QueryHints.STATS_STRING(), "Count()");
        Map<String, Object> map = queryFeatureDatas(dataStore, query, 100, 1);
        List<Map<String, String>> data = (List<Map<String, String>>) map.get("data");
        for (Map<String, String> m : data) {
            JSONObject jsonObject = new JSONObject();
            for (Map.Entry<String, String> entry : m.entrySet()) {
                jsonObject.put(entry.getKey(), entry.getValue());
            }
            System.out.println(jsonObject.toString());
        }
        System.out.println(System.currentTimeMillis() - stime);
    }

    public static void testGroupByCount() {
        Query query = new Query("mySft");
        long stime = System.currentTimeMillis();
        query.getHints().put(QueryHints.STATS_STRING(), "GroupBy(\"plateNo\",Count())");
        Map<String, Object> map = queryFeatureDatas(dataStore, query, 100, 1);
        List<Map<String, String>> data = (List<Map<String, String>>) map.get("data");
        for (Map<String, String> m : data) {
            JSONObject jsonObject = new JSONObject();
            for (Map.Entry<String, String> entry : m.entrySet()) {
                jsonObject.put(entry.getKey(), entry.getValue());
            }
            System.out.println(jsonObject.toString());
        }
        System.out.println(System.currentTimeMillis() - stime);
    }

    public static void testLikePlateNoGroupByCount() {
        Query query = null;
        try {
            //query = new Query("mySft");
            query = new Query("mySft",
                    ECQL.toFilter("plateNo" + " like '%" + "渝F" + "%'"));
        } catch (Exception e) {
            e.printStackTrace();
        }
        long stime = System.currentTimeMillis();
        //Query query = new Query(typeName, ECQL.toFilter(queryCQL));
        FilterFactoryImpl ff = new FilterFactoryImpl();
        // 设置最大返回条数
        //query.setMaxFeatures(5);
        query.getHints().put(QueryHints.STATS_STRING(), "GroupBy(\"plateNo\",Count())");
        //query.setSortBy(new SortBy[]{new SortByImpl(ff.property("passTime"), SortOrder.DESCENDING)});

        Map<String, Object> map = queryFeatureDatas(dataStore, query, 100, 1);
        List<Map<String, String>> data = (List<Map<String, String>>) map.get("data");
        for (Map<String, String> m : data) {
            JSONObject jsonObject = new JSONObject();
            for (Map.Entry<String, String> entry : m.entrySet()) {
                jsonObject.put(entry.getKey(), entry.getValue());
            }
            System.out.println(jsonObject.toString());
        }
        System.out.println(System.currentTimeMillis() - stime);
    }

    public static void testMaxMin() {
        Query query = new Query("mySft");
        long stime = System.currentTimeMillis();
        query.getHints().put(QueryHints.STATS_STRING(), "MinMax(\"passTime\")");
        Map<String, Object> map = queryFeatureDatas(dataStore, query, 100, 1);
        List<Map<String, String>> data = (List<Map<String, String>>) map.get("data");
        for (Map<String, String> m : data) {
            JSONObject jsonObject = new JSONObject();
            for (Map.Entry<String, String> entry : m.entrySet()) {
                jsonObject.put(entry.getKey(), entry.getValue());
            }
            System.out.println(jsonObject.toString());
        }
        System.out.println(System.currentTimeMillis() - stime);
    }

    /**
     * 查询数据：精确查询、模糊查询、分页查询。
     * 字段名称：property.getName()  或者 property.getType().getName()
     * 字段值： feature.getAttribute(property.getName()) 或者 property.getValue()
     *
     * @param dataStore
     * @param query     查询条件
     * @throws IOException
     */
    public static Map<String, Object> queryFeatureDatas(DataStore dataStore, Query query,
                                                        Integer pageSize, Integer pageNum) {
        if (pageSize == null || pageSize < 1) {
            pageSize = 10;
        }
        if (pageNum == null || pageNum < 1) {
            pageNum = 1;
        }
        //封装结果数据
        Map<String, Object> rowDatas = new HashMap<>();
        List<Map<String, String>> datas = new ArrayList<>();
        Set<String> fields = new HashSet<>();
        if (dataStore == null) {
            rowDatas.put("data", datas);
            rowDatas.put("fields", fields);
            rowDatas.put("unionId", null);
            return rowDatas;
        }
        try (FeatureReader<SimpleFeatureType, SimpleFeature> reader =
                     dataStore.getFeatureReader(query, Transaction.AUTO_COMMIT)) {
            // loop through all results
            long totalLoopTimes = pageSize * pageNum;
            long startFrom = pageSize * (pageNum - 1) + 1;
            //System.out.println(reader.hasNext());
            for (long count = 1; count <= totalLoopTimes; count++) {
                if (reader.hasNext()) {
                    //开始收集结果,读取一行的数据。feature:ScalaSimpleFeature
                    SimpleFeature feature = reader.next();
                    if (count >= startFrom) {
                        //每行数据的主键值
                        String id = feature.getID();
                        //封装一行的数据
                        Map<String, String> oneRowData = new HashMap<>();
                        //先把唯一的feature ID装进去
                        oneRowData.put("unionId", id);
                        Collection<Property> properties = feature.getProperties();
                        for (Property p : properties) {
                            //property: AttributeImpl
                            //AttributeImpl attrImpl = (AttributeImpl)p;
                            //获取字段名称
                            Name name = p.getName();
                            String pName = name.toString();
                            //获取字段值
                            Object attrObj = feature.getAttribute(name);
                            String pValue = attrObj == null ? null : attrObj.toString();
                            oneRowData.put(pName, pValue);
                            // use geotools data utilities to get a printable string
                            // System.out.println(String.format("%02d", n) + " " + DataUtilities.encodeFeature(feature));
                        }
                        fields.addAll(oneRowData.keySet());
                        datas.add(oneRowData);
                    }
                }
            }
        } catch (IOException e) {
            System.out.println(e.getMessage());
        } catch (NullPointerException e) {
            System.out.println(e.getMessage());
        }
        rowDatas.put("data", datas);
        rowDatas.put("fields", fields);
        return rowDatas;
    }

    /**
     * 根据指定的条件，删除一条或多条满足条件的数据。
     * 【注意：一般使用主键进行精确删除！！】
     *
     * @param tableName
     * @param fieldValues 字段名称和值
     * @return 删除的数据条数
     */
    public int deleteRecords(String tableName, Map<String, Object> fieldValues) {
        int count = 0;
        DataStore ds = dataStore;
        if (ds == null) {
            return count;
        }
        if (fieldValues != null) {
            List<Query> queries = new ArrayList<>();
            //获取全称的表名
            tableName = "default:" + tableName;
            //根据字段名称和值，构建查询条件
            Set<Map.Entry<String, Object>> entries = fieldValues.entrySet();
            for (Map.Entry<String, Object> entry : entries) {
                String field = entry.getKey();
                Object value = entry.getValue();
                try {
                    Filter filter = ECQL.toFilter(field + " = '" + value.toString() + "'");
                    queries.add(new Query(tableName, filter));
                } catch (CQLException e) {
                    e.printStackTrace();
                }
            }
            //根据查询条件，查询对应的features
            List<SimpleFeature> simpleFeatures = queryFeatures(ds, queries);
            //删除指定的features记录
            String typeName = "mySft";
            count = removeFeatures(ds, typeName, simpleFeatures);
        }
        return count;
    }

    /**
     * 根据指定条件查询SimpleFeature对象。
     *
     * @param dataStore
     * @param queries   (多个)查询条件
     * @return SimpleFeature对象的列表
     */
    private List<SimpleFeature> queryFeatures(DataStore dataStore, List<Query> queries) {
        List<SimpleFeature> results = new ArrayList<>();
        if (dataStore == null) {
            return results;
        }
        for (Query query : queries) {
            System.out.println("Running query " + ECQL.toCQL(query.getFilter()));
            if (query.getPropertyNames() != null) {
                System.out.println("Returning attributes " + Arrays.asList(query.getPropertyNames()));
            }
            if (query.getSortBy() != null) {
                SortBy sort = query.getSortBy()[0];
                System.out.println("Sorting by " + sort.getPropertyName() + " " + sort.getSortOrder());
            }
            // 提交查询，获取匹配查询条件的features，并遍历。
            // try语句确保reader可以关闭。
            try (FeatureReader<SimpleFeatureType, SimpleFeature> reader =
                         dataStore.getFeatureReader(query, Transaction.AUTO_COMMIT)) {
                long n = 0;
                while (reader.hasNext()) {
                    SimpleFeature feature = reader.next();
                    results.add(feature);
                    n += 1;
                }
                System.out.println("Returned " + n + " total queried features");
            } catch (IOException e) {
                System.out.println(e.getMessage());
            } catch (NullPointerException e) {
                System.out.println(e.getMessage());
            }
        }
        return results;
    }

    /**
     * 删除若干条feature数据。
     *
     * @param datastore
     * @param typeName  feature type name
     * @param features  封装的需要删除的数据列表
     * @return 删除的条数
     */
    private int removeFeatures(DataStore datastore, String typeName, List<SimpleFeature> features) {
        int count = 0;
        if (datastore == null) {
            return count;
        }
        try (FeatureWriter<SimpleFeatureType, SimpleFeature> writer =
                     datastore.getFeatureWriter(typeName, Transaction.AUTO_COMMIT)) {
            HBaseFeatureWriter hbaseWriter = (HBaseFeatureWriter) writer;
            for (SimpleFeature feature : features) {
                hbaseWriter.removeFeature(feature);
                /* 以下方式行不通，数据并没有被删除：
                SimpleFeature next = writer.next();
                next = feature;
                writer.remove();*/
                //next.setAttributes(feature.getAttributes());
                count += 1;
            }
        } catch (IOException e) {
            System.out.println(e.getMessage());
        } catch (NullPointerException e) {
            System.out.println(e.getMessage());
        }
        System.out.println("Delete " + count + " features successfully!");
        return count;
    }
}

geomesa测试调研结果：

geomesa测试总结

总数据量14669200条过车模拟数据，数据分布在20天之间。入hbase时候 建有时空索引、车牌索引、卡口索引

查询：
1.查询最大最小值：耗时11s
查询最大最小point:耗时25s

2.对全量数据查询总条数：耗时12s

3.对全量数据group by车牌后求count:耗时77s

4.对全量数据根据车牌模糊过滤然后group by车牌后求count:耗时12s

5.对全量数据根据单个字段排序：GC overhead limit exceeded

6.对全量数据根据车牌精确查询：耗时1.6s
6.对全量数据根据车牌模糊查询：耗时2s

7.对全量数据根据车牌精确过滤然后按单个字段排序：耗时1.6s

8.对全量数据根据车牌模糊过滤然后按单个字段排序：耗时26s

9.对全量数据做时空查询(dtg+geom:point):耗时2.5s 查询区间指定为全量区间与非全量区间耗时无差别，加上车牌过滤条件耗时增加5秒左右，对查询结果group by 车牌 之后变为48s

10.对全量数据做空间查询(geom:point):耗时0.2s 查询区间指定为全量区间与非全量区间耗时无差别，加上车牌过滤条件耗时无差别，对查询结果group by 车牌 之后变为41s

11.对全量数据做时间查询(dtg时间):耗时0.2s 查询区间指定为全量区间与非全量区间耗时无差别，加上车牌过滤条件耗时增加5秒左右，对查询结果group by 车牌 之后变为70s

12.没有count(distinct column)的功能

总结：
优点:对于单纯过滤类查询(where条件过滤)，直接走索引，查询速度很快。
缺点：对于统计分析、排序类查询，非常耗时，耗时的趋势与过滤后的数据量成正相关。

适用场景：
基于以上优缺点，该方法适用于带各种条件的检索，比如查询判断某具体人车是否在某时空出现过、某具体人车的所有轨迹信息、在某时空下共有哪些人车出现过、共有哪些人车的轨迹信息。
不适用于统计分析，如分组聚合、复杂逻辑判断、长时空区间数据排序等。
当前技战法都需要做统计分析，因而不适用于当前技战法。

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.yw</groupId>
    <artifactId>geomesa</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <geomesa.version>2.1.0</geomesa.version>
        <gt.version>20.0</gt.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <scala.abi.version>2.11</scala.abi.version>
        <scala.version>2.11.7</scala.version>
        <!-- environment specific variables-->
        <zookeeper.version>3.4.9</zookeeper.version>
        <hadoop.version>2.6.0</hadoop.version>
        <hbase.hadoop.version>2.6.0</hbase.hadoop.version>
        <kafka.version>1.0.0</kafka.version>
        <hbase.version>1.4.4</hbase.version>
        <spark.version>2.4.0</spark.version>
    </properties>


    <dependencies>
        <dependency>
            <groupId>io.netty</groupId>
            <artifactId>netty-all</artifactId>
            <version>4.1.33.Final</version>
        </dependency>
        <dependency>
            <groupId>io.netty</groupId>
            <artifactId>netty</artifactId>
            <version>3.6.2.Final</version>
        </dependency>
        <dependency>
            <groupId>commons-cli</groupId>
            <artifactId>commons-cli</artifactId>
            <version>1.4</version>
        </dependency>

        <!--<dependency>
            <groupId>org.locationtech.geomesa</groupId>
            <artifactId>geomesa-hbase-tools_2.11</artifactId>
            <version>${geomesa.version}</version>
        </dependency>
        <dependency>
            <groupId>org.locationtech.geomesa</groupId>
            <artifactId>geomesa-hbase-datastore_${scala.abi.version}</artifactId>
            <version>${geomesa.version}</version>
        </dependency>-->
        <dependency>
            <groupId>org.locationtech.geomesa</groupId>
            <artifactId>geomesa-hbase-spark-runtime_${scala.abi.version}</artifactId>
            <version>${geomesa.version}</version>
        </dependency>

        <dependency>
            <groupId>org.locationtech.geomesa</groupId>
            <artifactId>geomesa-spark-core_2.11</artifactId>
            <version>${geomesa.version}</version>
        </dependency>
        <dependency>
            <groupId>org.locationtech.geomesa</groupId>
            <artifactId>geomesa-spark-jts_2.11</artifactId>
            <version>${geomesa.version}</version>
        </dependency>
        <!--<dependency>
            <groupId>org.locationtech.geomesa</groupId>
            <artifactId>geomesa-index-api_2.11</artifactId>
            <version>${geomesa.version}</version>
        </dependency>-->
        <dependency>
            <groupId>org.postgresql</groupId>
            <artifactId>postgresql</artifactId>
            <version>42.2.5</version>
        </dependency>
        <!--<dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
        </dependency>-->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-annotations</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-protocol</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-auth</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.zookeeper</groupId>
            <artifactId>zookeeper</artifactId>
            <version>${zookeeper.version}</version>
        </dependency>
      <!--  <dependency>
            <groupId>org.geotools</groupId>
            <artifactId>gt-opengis</artifactId>
            <version>${gt.version}</version>
        </dependency>-->
        <!--<dependency>
            <groupId>org.geotools</groupId>
            <artifactId>gt-data</artifactId>
            <version>${gt.version}</version>
        </dependency>-->
     <!--   <dependency>
            <groupId>org.geotools</groupId>
            <artifactId>gt-epsg-hsql</artifactId>
            <version>${gt.version}</version>
        </dependency>-->
        <!--
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-catalyst_${scala.abi.version}</artifactId>
             <version>${spark.version}</version>
         </dependency>-->
     <!--   <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.abi.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_${scala.abi.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>-->
        <!-- <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-yarn_${scala.abi.version}</artifactId>
             <version>${spark.version}</version>
         </dependency>-->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.32</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>1.1.1</version>
        </dependency>
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>4.6.3</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.3</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
            <!--打jar包-->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.unisinsight.service.flink_window.flinkETL</mainClass>
                                </transformer>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                    <resource>reference.conf</resource>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

maven settings.xml

<?xml version="1.0" encoding="UTF-8"?>

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

<!--
 | This is the configuration file for Maven. It can be specified at two levels:
 |
 |  1. User Level. This settings.xml file provides configuration for a single user,
 |                 and is normally provided in ${user.home}/.m2/settings.xml.
 |
 |                 NOTE: This location can be overridden with the CLI option:
 |
 |                 -s /path/to/user/settings.xml
 |
 |  2. Global Level. This settings.xml file provides configuration for all Maven
 |                 users on a machine (assuming they're all using the same Maven
 |                 installation). It's normally provided in
 |                 ${maven.conf}/settings.xml.
 |
 |                 NOTE: This location can be overridden with the CLI option:
 |
 |                 -gs /path/to/global/settings.xml
 |
 | The sections in this sample file are intended to give you a running start at
 | getting the most out of your Maven installation. Where appropriate, the default
 | values (values used when the setting is not specified) are provided.
 |
 |-->
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">
  <!-- localRepository
   | The path to the local repository maven will use to store artifacts.
   |
   | Default: ${user.home}/.m2/repository
  <localRepository>/path/to/local/repo</localRepository>
  -->
<localRepository>E:\.m2\repository</localRepository>
  <!-- interactiveMode
   | This will determine whether maven prompts you when it needs input. If set to false,
   | maven will use a sensible default value, perhaps based on some other setting, for
   | the parameter in question.
   |
   | Default: true
  <interactiveMode>true</interactiveMode>
  -->

  <!-- offline
   | Determines whether maven should attempt to connect to the network when executing a build.
   | This will have an effect on artifact downloads, artifact deployment, and others.
   |
   | Default: false
  <offline>false</offline>
  -->

  <!-- pluginGroups
   | This is a list of additional group identifiers that will be searched when resolving plugins by their prefix, i.e.
   | when invoking a command line like "mvn prefix:goal". Maven will automatically add the group identifiers
   | "org.apache.maven.plugins" and "org.codehaus.mojo" if these are not already contained in the list.
   |-->
  <pluginGroups>
    <!-- pluginGroup
     | Specifies a further group identifier to use for plugin lookup.
    <pluginGroup>com.your.plugins</pluginGroup>
    -->
  </pluginGroups>

  <!-- proxies
   | This is a list of proxies which can be used on this machine to connect to the network.
   | Unless otherwise specified (by system property or command-line switch), the first proxy
   | specification in this list marked as active will be used.
   |-->
  <proxies>
    <!-- proxy
     | Specification for one proxy, to be used in connecting to the network.
     |
    <proxy>
      <id>optional</id>
      <active>true</active>
      <protocol>http</protocol>
      <username>proxyuser</username>
      <password>proxypass</password>
      <host>proxy.host.net</host>
      <port>80</port>
      <nonProxyHosts>local.net|some.host.com</nonProxyHosts>
    </proxy>
    -->
	<proxy>
      <id>optional</id>
      <active>true</active>
      <protocol>http</protocol>
      <username>xxxx</username>
      <password>xxxxx</password>
      <host>cqproxy01.unisinsight.com</host>
      <port>8080</port>
      <nonProxyHosts>127.0.0.1|192.168.*</nonProxyHosts>
    </proxy>
  </proxies>

  <!-- servers
   | This is a list of authentication profiles, keyed by the server-id used within the system.
   | Authentication profiles can be used whenever maven must make a connection to a remote server.
   |-->
  <servers>
    <!-- server
     | Specifies the authentication information to use when connecting to a particular server, identified by
     | a unique name within the system (referred to by the 'id' attribute below).
     |
     | NOTE: You should either specify username/password OR privateKey/passphrase, since these pairings are
     |       used together.
     |
    <server>
      <id>deploymentRepo</id>
      <username>repouser</username>
      <password>repopwd</password>
    </server>
    -->

    <!-- Another sample, using keys to authenticate.
    <server>
      <id>siteServer</id>
      <privateKey>/path/to/private/key</privateKey>
      <passphrase>optional; leave empty if not used.</passphrase>
    </server>
    -->
  </servers>

  <!-- mirrors
   | This is a list of mirrors to be used in downloading artifacts from remote repositories.
   |
   | It works like this: a POM may declare a repository to use in resolving certain artifacts.
   | However, this repository may have problems with heavy traffic at times, so people have mirrored
   | it to several places.
   |
   | That repository definition will have a unique id, so we can create a mirror reference for that
   | repository, to be used as an alternate download site. The mirror site will be the preferred
   | server for that repository.
   |-->
  <mirrors>
    <!-- mirror
     | Specifies a repository mirror site to use instead of a given repository. The repository that
     | this mirror serves has an ID that matches the mirrorOf element of this mirror. IDs are used
     | for inheritance and direct lookup purposes, and must be unique across the set of mirrors.
     |
    <mirror>
      <id>mirrorId</id>
      <mirrorOf>repositoryId</mirrorOf>
      <name>Human Readable Name for this Mirror.</name>
      <url>http://my.repository.com/repo/path</url>
    </mirror>
     -->

	<mirror>
		<id>unisinsight</id>
		<name>unisinsight maven</name>
		<url>http://192.168.110.26:8088/repository/maven-public/</url>
		<mirrorOf>*</mirrorOf>
	</mirror>

  </mirrors>

  <!-- profiles
   | This is a list of profiles which can be activated in a variety of ways, and which can modify
   | the build process. Profiles provided in the settings.xml are intended to provide local machine-
   | specific paths and repository locations which allow the build to work in the local environment.
   |
   | For example, if you have an integration testing plugin - like cactus - that needs to know where
   | your Tomcat instance is installed, you can provide a variable here such that the variable is
   | dereferenced during the build process to configure the cactus plugin.
   |
   | As noted above, profiles can be activated in a variety of ways. One way - the activeProfiles
   | section of this document (settings.xml) - will be discussed later. Another way essentially
   | relies on the detection of a system property, either matching a particular value for the property,
   | or merely testing its existence. Profiles can also be activated by JDK version prefix, where a
   | value of '1.4' might activate a profile when the build is executed on a JDK version of '1.4.2_07'.
   | Finally, the list of active profiles can be specified directly from the command line.
   |
   | NOTE: For profiles defined in the settings.xml, you are restricted to specifying only artifact
   |       repositories, plugin repositories, and free-form properties to be used as configuration
   |       variables for plugins in the POM.
   |
   |-->
  <profiles>
    <!-- profile
     | Specifies a set of introductions to the build process, to be activated using one or more of the
     | mechanisms described above. For inheritance purposes, and to activate profiles via <activatedProfiles/>
     | or the command line, profiles have to have an ID that is unique.
     |
     | An encouraged best practice for profile identification is to use a consistent naming convention
     | for profiles, such as 'env-dev', 'env-test', 'env-production', 'user-jdcasey', 'user-brett', etc.
     | This will make it more intuitive to understand what the set of introduced profiles is attempting
     | to accomplish, particularly when you only have a list of profile id's for debug.
     |
     | This profile example uses the JDK version to trigger activation, and provides a JDK-specific repo.
    <profile>
      <id>jdk-1.4</id>

      <activation>
        <jdk>1.4</jdk>
      </activation>

      <repositories>
        <repository>
          <id>jdk14</id>
          <name>Repository for JDK 1.4 builds</name>
          <url>http://www.myhost.com/maven/jdk14</url>
          <layout>default</layout>
          <snapshotPolicy>always</snapshotPolicy>
        </repository>
      </repositories>
    </profile>
    -->

    <!--
     | Here is another profile, activated by the system property 'target-env' with a value of 'dev',
     | which provides a specific path to the Tomcat instance. To use this, your plugin configuration
     | might hypothetically look like:
     |
     | ...
     | <plugin>
     |   <groupId>org.myco.myplugins</groupId>
     |   <artifactId>myplugin</artifactId>
     |
     |   <configuration>
     |     <tomcatLocation>${tomcatPath}</tomcatLocation>
     |   </configuration>
     | </plugin>
     | ...
     |
     | NOTE: If you just wanted to inject this configuration whenever someone set 'target-env' to
     |       anything, you could just leave off the <value/> inside the activation-property.
     |
    <profile>
      <id>env-dev</id>

      <activation>
        <property>
          <name>target-env</name>
          <value>dev</value>
        </property>
      </activation>

      <properties>
        <tomcatPath>/path/to/tomcat/instance</tomcatPath>
      </properties>
    </profile>
    -->
  </profiles>

  <!-- activeProfiles
   | List of profiles that are active for all builds.
   |
  <activeProfiles>
    <activeProfile>alwaysActiveProfile</activeProfile>
    <activeProfile>anotherAlwaysActiveProfile</activeProfile>
  </activeProfiles>
  -->
</settings>

yostkevin

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
geomesa hbase demo

1.数据入库从kafka消费模拟过车数据->geomesa特定格式->hbasekafka producermsg:{"plateNo":"渝K0E1G2","plateColor":5,"tollgateID":"50010500001211000928","passTime":"1583020940000","speed":"70.0"}kafka consu...
复制链接

扫一扫