hbase 读写空间数据(四)-空间查询优化

23 篇文章 0 订阅
12 篇文章 1 订阅

上一篇空间查询不涉及到任何得效率优化, 本篇先做一个简单得效率优化,

仅仅针对点数据的(PS:  线面数据还没搞定), 具体原理如下:

1: hbase 得索引都在rowkey上, rowkey在hbase中是排序的, 查询如果指定Start rowkey 那么在大数据量情况下可以快速定位到数据在那些节点上, 这有点类似 跳表得多级索引, 例如  1-10亿个排序点值,  如果我指定从5亿位置开始那么将减少一半搜索区间.

2: 点数据目前实验代码采用geohash(网上虽然一艘一大堆, 但是这里要注意, 对于空间数据, 网上geohash的精度不够, 最多是12位, 这样精度在几十米, 显然无法适应GIS专业行业需求, 所以建议找一下精度更加高的Geohash编码实现), 如果给一个范围查找在这个范围内的点集合, 我们只需要找到这个范围对应的rowkey范围即可, 最后是直接遍历这个范围的rowkey还是, 直接设置startrowkey和endrowkey, 大数据我还没实验, 目前小数据都是可以, 区别不大. 找rowkey范围示意图如下:

图上是一个编码顺序图, 红框子是范围, 但是红框子计算出来的rowkey其实如果只取15和192将比红框子范围大的数据都会进去,   具体实施得时候要看数据情况,  是给一个startrowkey 和endrowkey, 还是给一堆呢?  反正我这里有不是在Geometry列簇中还有xmin,ymin,xmax,ymax 吗?  联合起来用一下不就可以了, 至于对于点直接取15,26,27,30,31 ....192这样全部取出来我觉得也是可以得,  只是我没测过, 因为我觉得慢.

下面放一下测试代码:

package hbasedatabase;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.Cell.Type;
import org.apache.hadoop.hbase.CellBuilder;
import org.apache.hadoop.hbase.CellBuilderFactory;
import org.apache.hadoop.hbase.CellBuilderType;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.locationtech.jts.io.ParseException;
import org.locationtech.jts.io.WKBReader;
import org.locationtech.jts.io.WKTWriter;
import com.geostar.kernel.GsAny;
import com.geostar.kernel.GsBox;
import com.geostar.kernel.GsBox3D;
import com.geostar.kernel.GsConnectProperty;
import com.geostar.kernel.GsEnvelope;
import com.geostar.kernel.GsFeature;
import com.geostar.kernel.GsFeatureClass;
import com.geostar.kernel.GsFeatureCursor;
import com.geostar.kernel.GsField;
import com.geostar.kernel.GsFieldType;
import com.geostar.kernel.GsFieldVector;
import com.geostar.kernel.GsFields;
import com.geostar.kernel.GsGeoDatabase;
import com.geostar.kernel.GsGeohash;
import com.geostar.kernel.GsGeometry;
import com.geostar.kernel.GsGeometryBlob;
import com.geostar.kernel.GsGeometryFactory;
import com.geostar.kernel.GsSpatialQueryFilter;
import com.geostar.kernel.GsSqliteGeoDatabaseFactory;
import com.geostar.kernel.GsWKTOGCWriter;

public class featureRowkeySpatialQuery {

	private static final String TABLE_NAME = "RES1_4M_P";
	static GsFeatureClass pfcsClass = null;
	static GsGeohash ghGeohash = null;// new GsGeohash();
	static Configuration conf = null;
	static TableName tableName = null;
	static Table table = null;
	static Connection connection = null;
	static Admin admin = null;
	// 测试数据
	static String familygeos[] = { "GEOMETRY", "INFO" };
	static String family1[] = { "GEOMETRY", "XMIN", "YMIN", "XMAX", "YMAX" };
	static String family1FieldsType[] = { "eGeometryType", "eDoubleType", "eDoubleType", "eDoubleType", "eDoubleType" };
	static String family2[] = {};
	static GsFieldType family2FieldsType[] = {};
	static HashMap<String, HashMap<String, String>> map = new HashMap<String, HashMap<String, String>>();

	static HashMap<String, byte[]> valuesArrayList = new HashMap<String, byte[]>();

	static void InitConf() throws IOException, ParseException {
		System.loadLibrary("gsjavaport");
		com.geostar.kernel.GsKernel.Initialize();
		System.out.println("GsKernel loadlibrary succeed");
		Configuration cnf = new Configuration();

		cnf.set("hbase.zookeeper.quorum", "127.0.0.1:2181");
		cnf.set("hbase.zookeeper.property.clientPort", "2181");
		conf = HBaseConfiguration.create(cnf);
		connection = ConnectionFactory.createConnection(conf);
		admin = connection.getAdmin();
		tableName = TableName.valueOf(TABLE_NAME);
		table = connection.getTable(tableName);
		InitFeatureData();
	}

	static void InitFeatureData() throws ParseException {
		GsSqliteGeoDatabaseFactory factory = new GsSqliteGeoDatabaseFactory();
		GsConnectProperty connectProperty = new GsConnectProperty("D:\\source\\kernel\\testdata\\400sqlite");
		GsGeoDatabase pDatabase = factory.Open(connectProperty);
		pfcsClass = pDatabase.OpenFeatureClass(TABLE_NAME);
		ghGeohash = new GsGeohash();

		HashMap<String, String> family1t = new HashMap<String, String>();
		HashMap<String, String> family2t = new HashMap<String, String>();
		for (int i = 0; i < family1.length; i++) {
			family1t.put(family1[i], family1FieldsType[i]);
		}
		GsFields fdsFields = pfcsClass.Fields();
		GsFieldVector fdsFieldVector = fdsFields.getFields();
		for (int i = 0; i < fdsFieldVector.size(); i++) {
			GsField fdField = fdsFieldVector.get(i);
			String na = fdField.getName();
			if (na.equals("GEOMETRY"))
				continue;

			family2t.put(fdField.getName(), fdField.getType().toString());
		}

		map.put(familygeos[0], family1t);
		map.put(familygeos[1], family2t);

	}

	static void Exit() throws IOException {
//	  admin.disableTable(tableName);
//	  admin.close();
//	  table.close();
		connection.close();
	}


	
	public static void main(String... args) throws IOException, ParseException {
		InitConf();
		NewVersionTestCreateTable();
		Inserttest();

		long begintime1 = System.currentTimeMillis();
		SearchData(108, 20, 115, 26);
		System.out.println("SearchData(108,20,115,26); " + (System.currentTimeMillis() - begintime1) + " ms");

		begintime1 = System.currentTimeMillis();
		SearchData(108, 25, 115, 27);
		System.out.println(" SearchData(108,25,115,27); " + (System.currentTimeMillis() - begintime1) + " ms");
		Exit();
	}

	public static void NewVersionTestCreateTable() throws IOException {
		if (admin.tableExists(tableName)) {
			if (!admin.isTableDisabled(tableName))
				admin.disableTable(tableName);
			admin.deleteTable(tableName);
			System.out.println("Table is  exist. delete and  create it");
			// return;
		}
		// 创建表
		TableDescriptorBuilder tbuilder = TableDescriptorBuilder.newBuilder(tableName);
		ColumnFamilyDescriptor familygeo = ColumnFamilyDescriptorBuilder.newBuilder(familygeos[0].getBytes()).build();
		ColumnFamilyDescriptor familyinfo = ColumnFamilyDescriptorBuilder.newBuilder(familygeos[1].getBytes()).build();
		List<ColumnFamilyDescriptor> families = new ArrayList<>();
		families.add(familygeo);
		families.add(familyinfo);
		TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName).setColumnFamilies(families)
				.build();
		admin.createTable(tableDescriptor);

	}

	public static void Inserttest() throws IOException {
		if (!admin.tableExists(tableName)) {
			System.out.println("Table does not exist.");
			System.exit(-1);
		}
		table = connection.getTable(tableName);
		CellBuilder cellbuilder = CellBuilderFactory.create(CellBuilderType.DEEP_COPY);

		GsSqliteGeoDatabaseFactory factory = new GsSqliteGeoDatabaseFactory();

		GsFeatureCursor pCursor = pfcsClass.Search();
		GsFeature pFeature = pCursor.Next();

		ArrayList<Put> arrayList = new ArrayList<Put>();

		int nCommiT = 0;
		int RegionServerCount = 9;
		GsFields fdsFields = pfcsClass.Fields();
		GsFieldVector fdsFieldVector = fdsFields.getFields();
		do {
			if (pFeature == null)
				break;

			// 这里是点只用编码一次
			int partl = (int) (pFeature.OID() % ((long) RegionServerCount));
			GsGeometry pGeometry = pFeature.Geometry();
			GsBox3D box3d = pGeometry.Envelope();
			String minString = ghGeohash.Forward(box3d.getXMin(), box3d.getYMin(), 18);
		
			byte[] rowkey = Bytes.toBytes(minString + pFeature.OID());
			Put put = new Put(rowkey);
			// 写几何
			int buffersize = (int) pGeometry.GeometryBlobPtr().BufferSize();

			byte[] bBlob = new byte[buffersize];
			pGeometry.GeometryBlobPtr().CopyToArray(bBlob, bBlob.length);
			for (int i = 0; i < family1.length; i++) {
				cellbuilder.setRow(rowkey);
				cellbuilder.setFamily(Bytes.toBytes("GEOMETRY"));
				if (0 == i) {
					cellbuilder.setQualifier(Bytes.toBytes(family1[i]));
					cellbuilder.setValue(bBlob);
				} else {
					cellbuilder.setQualifier(Bytes.toBytes(family1[i]));
					switch (i) {
					case 1:
						cellbuilder.setValue(Bytes.toBytes(box3d.getXMin()));
						break;
					case 2:
						cellbuilder.setValue(Bytes.toBytes(box3d.getYMin()));
						break;
					case 3:
						cellbuilder.setValue(Bytes.toBytes(box3d.getXMax()));
						break;
					case 4:
						cellbuilder.setValue(Bytes.toBytes(box3d.getYMax()));
						break;
					default:
						break;
					}
				}
				cellbuilder.setType(Type.Put);
				Cell cl = cellbuilder.build();
				put.add(cl);
			}

			// 写属性
			for (Map.Entry<String, HashMap<String, String>> kvEntry : map.entrySet()) {
				HashMap<String, String> vvkv = kvEntry.getValue();
				String keyString = kvEntry.getKey();
				if (keyString.equals("GEOMETRY"))
					continue;

				for (Map.Entry<String, String> kss : vvkv.entrySet()) {
					String colmunString = kss.getKey();

					int index = fdsFields.FindField(colmunString);
					GsFieldType type = fdsFieldVector.get(index).getType();
					cellbuilder.setRow(rowkey);
					cellbuilder.setFamily(Bytes.toBytes(keyString));
					cellbuilder.setQualifier(Bytes.toBytes(colmunString));
					cellbuilder.setType(Type.Put);

					switch (type) {
					case eErrorType:
						break;
					/// \brief BOOL类型
					case eBoolType:
						cellbuilder.setValue(Bytes.toBytes(pFeature.ValueInt(index)));
						break;
					/// \brief 32位的整型
					case eIntType:
					case eUIntType:
						cellbuilder.setValue(Bytes.toBytes(pFeature.ValueInt(index)));
						break;
					/// \brief 64位的整型
					case eInt64Type:
					case eUInt64Type:
						cellbuilder.setValue(Bytes.toBytes(pFeature.ValueInt64(index)));
						break;
					/// \brief 字符串类型
					case eStringType:
						cellbuilder.setValue(Bytes.toBytes(pFeature.ValueString(index)));
						break;
					/// \brief 二进制类型
					case eBlobType:
						GsAny anyblob = pFeature.ValueBlob(index);
						byte[] bBlob1 = new byte[anyblob.ValueSize()];
						anyblob.AsBlob(bBlob1);
						cellbuilder.setValue(bBlob1);
						break;
					/// \brief 浮点型
					case eFloatType:
						cellbuilder.setValue(Bytes.toBytes(pFeature.ValueFloat(index)));
						break;
					/// \brief 双精度浮点型
					case eDoubleType:
						cellbuilder.setValue(Bytes.toBytes(pFeature.ValueDouble(index)));
						break;
					/// \brief 几何类型
					case eGeometryType:
						break;
					/// \brief 日期类型
					case eDateType:
						cellbuilder.setValue(Bytes.toBytes(pFeature.ValueDateTime(index).getTime()));
						break;

					default:
						break;
					}

					Cell cl = cellbuilder.build();
					put.add(cl);
				}
			}
			arrayList.add(put);
			if (nCommiT > 1000) {
				// 插入数据
				table.put(arrayList);
				System.out.println("Insert nCommiT-" + nCommiT);
				nCommiT = 0;
				arrayList.clear();
			}
		} while (pCursor.Next(pFeature));

		// 插入数据
		table.put(arrayList);
		System.out.println("Insert end");
	}
	static FilterList IntersectFilter(double xmin, double ymin, double xmax, double ymax) {
		FilterList filters1 = new FilterList(FilterList.Operator.MUST_PASS_ALL);
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("XMIN"),
				CompareOperator.LESS, Bytes.toBytes(xmax)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("YMIN"),
				CompareOperator.LESS, Bytes.toBytes(ymax)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("XMAX"),
				CompareOperator.GREATER, Bytes.toBytes(xmin)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("YMAX"),
				CompareOperator.GREATER, Bytes.toBytes(ymin)));

		return filters1;
	}

	static FilterList WithInFilter(double xmin, double ymin, double xmax, double ymax) {
		FilterList filters1 = new FilterList(FilterList.Operator.MUST_PASS_ALL);
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("XMIN"),
				CompareOperator.LESS, Bytes.toBytes(xmin)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("YMIN"),
				CompareOperator.LESS, Bytes.toBytes(ymin)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("XMAX"),
				CompareOperator.GREATER, Bytes.toBytes(xmax)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("YMAX"),
				CompareOperator.GREATER, Bytes.toBytes(ymax)));
		return filters1;
	}

	static FilterList ContiansFilter(double xmin, double ymin, double xmax, double ymax) {
		FilterList filters1 = new FilterList(FilterList.Operator.MUST_PASS_ALL);
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("XMIN"),
				CompareOperator.GREATER, Bytes.toBytes(xmin)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("YMIN"),
				CompareOperator.GREATER, Bytes.toBytes(ymin)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("XMAX"),
				CompareOperator.LESS, Bytes.toBytes(xmax)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("YMAX"),
				CompareOperator.LESS, Bytes.toBytes(ymax)));
		return filters1;
	}


	static FilterList DisjoinFilter(double xmin, double ymin, double xmax, double ymax) {
		FilterList filters1 = new FilterList(FilterList.Operator.MUST_PASS_ONE);
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("XMIN"),
				CompareOperator.GREATER, Bytes.toBytes(xmax)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("YMIN"),
				CompareOperator.GREATER, Bytes.toBytes(ymax)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("XMAX"),
				CompareOperator.LESS, Bytes.toBytes(xmin)));
		filters1.addFilter(new SingleColumnValueFilter(Bytes.toBytes("GEOMETRY"), Bytes.toBytes("YMAX"),
				CompareOperator.LESS, Bytes.toBytes(ymin)));

		return filters1;
	}

	public static void SearchData(double xmin, double ymin, double xmax, double ymax)
			throws IOException, ParseException {
		if (!admin.tableExists(tableName)) {
			System.out.println("Table does not exist.");
			System.exit(-1);
		}
		table = connection.getTable(tableName);

		Scan scan1 = new Scan();
		String fString = ghGeohash.Forward(xmin, ymin, 18);
		byte[] start = Bytes.add(Bytes.toBytes(fString), Bytes.toBytes(Integer.MIN_VALUE));
		String feString = ghGeohash.Forward(xmax, ymax, 18);
		byte[] end = Bytes.add(Bytes.toBytes(feString), Bytes.toBytes(Integer.MAX_VALUE));
		scan1.withStartRow(start, true);
		scan1.withStopRow(end, true);
		scan1.setFilter(IntersectFilter(xmin, ymin, xmax, ymax));
		ResultScanner scanner1 = table.getScanner(scan1);

		// 打印行的值
		int ncount = 0;
		for (org.apache.hadoop.hbase.client.Result res : scanner1) {
			//这里不用服务端精确查询, 只用box查询返回结果后,通过本地过滤即可
			printrow(res, false);
			ncount++;
		}
		// 关闭释放资源
		scanner1.close();
		System.out.println("search end ,get" + ncount + " features");

		GsBox box = new GsBox(xmin, ymin, xmax, ymax);
		GsSpatialQueryFilter pFilter = new GsSpatialQueryFilter(new GsEnvelope(box));
		pFilter.FilterType("ANYINTERACT");
		long n = pfcsClass.FeatureCount(pFilter);
		System.out.println("GsKernel search end ,get" + n + " features");

	}

	static WKBReader wkbReader = new WKBReader();
	static WKTWriter wktWriter = new WKTWriter();
	static GsWKTOGCWriter gskwtWriter = null;

	static void printrow(org.apache.hadoop.hbase.client.Result rtResult, boolean print)
			throws ParseException, UnsupportedEncodingException {
		if (null == gskwtWriter) {
			gskwtWriter = new GsWKTOGCWriter();
		}
		// System.out.println(rtResult);
		// System.out.println(new String(rtResult.getRow()).toString());

		for (Map.Entry<String, HashMap<String, String>> kvEntry : map.entrySet()) {
			{
				HashMap<String, String> vvkv = kvEntry.getValue();
				String keyString = kvEntry.getKey();
				for (Map.Entry<String, String> kss : vvkv.entrySet()) {
					String typeString = kss.getValue();
					String colmunString = kss.getKey();
					Cell lengthCells = rtResult.getColumnLatestCell(Bytes.toBytes(keyString),
							Bytes.toBytes(colmunString));
					if (null == lengthCells)
						continue;
					switch (typeString) {
					case "eGeometryType":
						// byte[] vv = lengthCells.getValueArray();
						if (print) {
							int offset = lengthCells.getValueOffset();
							int len = lengthCells.getValueLength();
							byte[] vvv = Arrays.copyOfRange(lengthCells.getValueArray(), offset, len);
							GsGeometryBlob blob = new GsGeometryBlob();
							blob.Copy(vvv, len);

							GsGeometry pGeometry = GsGeometryFactory.CreateGeometryFromBlob(blob);
							gskwtWriter.Write(new GsEnvelope(pGeometry.Envelope()));
							String geometrystr = gskwtWriter.WKT();
							System.out.println(colmunString + "--" + geometrystr);
						}
						break;
					case "eDoubleType":
						byte[] v = lengthCells.getValueArray();
						if (print)
							System.out.println(colmunString + "--"
									+ String.format("%.8f", (Bytes.toDouble(v, lengthCells.getValueOffset()))));
						break;
					case "eInt64Type":
					case "eUInt64Type":
						byte[] v1 = lengthCells.getValueArray();
						if (print)
							System.out.println(colmunString + "--"
									+ (Bytes.toLong(v1, lengthCells.getValueOffset(), lengthCells.getValueLength())));
						break;
					case "eStringType":
						byte[] v11 = lengthCells.getValueArray();
						if (print)
							System.out.println(colmunString + "--" + new String(
									Bytes.toString(v11, lengthCells.getValueOffset(), lengthCells.getValueLength())));
						break;
					case "eIntType":
					case "eUIntType":
						byte[] v111 = lengthCells.getValueArray();
						if (print)
							System.out.println(colmunString + "--" + (Bytes.toInt(v111, lengthCells.getValueOffset())));
						break;
					case "eFloatType":
						byte[] v1111 = lengthCells.getValueArray();
						if (print)
							System.out.println(
									colmunString + "--" + (Bytes.toFloat(v1111, lengthCells.getValueOffset())));
						break;
					default:
						break;
					}
				}
			}
		}
	}
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值