需要Spark分析的HBase表:
客户入金表 customer_in_list
流水号: serial_num客户号: customer_num
入金货币:currency_in
入金金额:amount_in
日期 :date_in
时间 :time_in
分析结果表 result_analysis
客户号: customer_num
金额总计:amount_count
需求:将客户入金表,按客户号对入金金额统计,结果存入分析结果表,并更新客户排名到Redis
Spark代码如下 :
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.util.Base64; import org.apache.hadoop.hbase.util.Bytes; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import scala.Tuple2; import java.io.IOException; import java.util.*; public class SparkToHbase { public static void main(String[] args) { SparkConf sparkconf = new SparkConf(); sparkconf.setAppName("test").setMaster("local"); JavaSparkContext jsc = new JavaSparkContext(sparkconf); Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "node02,node03,node04"); final Scan scan = new Scan(); scan.addFamily(Bytes.toBytes("fileInfo")); scan.addColumn(Bytes.toBytes("fileInfo"), Bytes.toBytes("customer_num")); scan.addColumn(Bytes.toBytes("fileInfo"), Bytes.toBytes("amount_in")); try { ClientProtos.Scan proto = ProtobufUtil.toScan(scan); String tableName = "customer_in_list"; conf.set(TableInputFormat.INPUT_TABLE, tableName); String scanToString = Base64.encodeBytes(proto.toByteArray()); conf.set(TableInputFormat.SCAN, scanToString); } catch (IOException e) { e.printStackTrace(); } //读HBase中数据转化成RDD JavaPairRDD<ImmutableBytesWritable, Result> hbaseRDD = jsc.newAPIHadoopRDD(conf, TableInputFormat.class, ImmutableBytesWritable.class, Result.class); JavaPairRDD<String, Double> mapToPair1 = hbaseRDD.mapToPair(new PairFunction<Tuple2<ImmutableBytesWritable, Result>, String, Double>() { @Override public Tuple2<String, Double> call(Tuple2<ImmutableBytesWritable, Result> resultTuple2) throws Exception { byte[] o1 = resultTuple2._2.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("customer_num")); byte[] o2 = resultTuple2._2.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("amount_in")); return new Tuple2<String, Double>(new String(o1), new Double(new String(o2))); } }); JavaPairRDD<String, Double> reduceByKey = mapToPair1.reduceByKey(new Function2<Double, Double, Double>() { @Override public Double call(Double aDouble, Double aDouble2) throws Exception { return aDouble + aDouble2; } }); JavaPairRDD<Double, String> mapToPair2 = reduceByKey.mapToPair(new PairFunction<Tuple2<String, Double>, Double, String>() { @Override public Tuple2<Double, String> call(Tuple2<String, Double> stringDoubleTuple2) throws Exception { return new Tuple2<Double, String>(stringDoubleTuple2._2, stringDoubleTuple2._1); } }); JavaPairRDD<Double, String> sortByKey = mapToPair2.sortByKey(false); JavaPairRDD<String, Double> result = sortByKey.mapToPair(new PairFunction<Tuple2<Double, String>, String, Double>() { @Override public Tuple2<String, Double> call(Tuple2<Double, String> doubleStringTuple2) throws Exception { return new Tuple2<String, Double>(doubleStringTuple2._2, doubleStringTuple2._1); } }); JavaPairRDD<String, String> result1 = result.mapToPair(new PairFunction<Tuple2<String, Double>, String, String>() { @Override public Tuple2<String, String> call(Tuple2<String, Double> stringDoubleTuple2) throws Exception { return new Tuple2<String, String>(stringDoubleTuple2._1, new String(String.valueOf(stringDoubleTuple2._2))); } }); /* result.foreach(new VoidFunction<Tuple2<String, Double>>() { @Override public void call(Tuple2<String, Double> stringDoubleTuple2) throws Exception { System.out.println(stringDoubleTuple2); } }); */ //插入Hbase List<Tuple2<String, String>> collect = result1.collect(); for (Tuple2 tuple : collect) { System.out.println(tuple._1() + " " + tuple._2()); try { Table table = HBaseConn.getTable("result_analysis"); Put put = new Put(Bytes.toBytes((String) tuple._1)); put.addColumn(Bytes.toBytes("fileInfo"), Bytes.toBytes("customer_num"), Bytes.toBytes((String) tuple._1)); put.addColumn(Bytes.toBytes("fileInfo"), Bytes.toBytes("amount_count"), Bytes.toBytes((String) tuple._2)); table.put(put); } catch (IOException e) { e.printStackTrace(); } } //插入redis List<Tuple2<String, String>> collect1 = result1.collect(); for (Tuple2 tuple2 :collect1){ RedisTest.add("key2",Double.parseDouble((String) tuple2._2), String.valueOf(tuple2._1)); } jsc.close(); } }
Redis工具类(Jedis直连方式)
import redis.clients.jedis.Jedis; public class RedisTest { public static boolean add(String key, Double v1, String v2) { //spark Jedis jedis = new Jedis("192.168.198.21",7003); jedis.zadd(key,v1,v2); return true; } }
HBase工具类: https://blog.csdn.net/Godlike77/article/details/80913252
pom.xml:
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.yt.test</groupId> <artifactId>sparkTest</artifactId> <version>1.0-SNAPSHOT</version> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.6</source> <target>1.6</target> </configuration> </plugin> </plugins> </build> <dependencies> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> <version>1.6.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase</artifactId> <version>1.2.4</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>1.2.4</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-common</artifactId> <version>1.2.4</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.5</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.2.4</version> </dependency> </dependencies> </project>
转载附上链接: https://blog.csdn.net/Godlike77/article/details/80912928