spark sql实现客户订单的做外连接查询java实现

首先用记事本准备两张表数据:

customers.txt:

1,smith,12
2,bob,13
3,alex,14
4,alice,15
6,mike,26

orders.txt:

1,t001,100.9,1
2,t002,100.3,1
3,t001,100.6,2
4,t003,100.7,2
5,t001,100.1,2
6,t003,100.1,3
7,t002,100.1,3
8,t001,100.1,3
9,t002,100.1,3

 

上传到hadoop集群上:

[hadoop]> hdfs dfs -put customers.txt /user/hadoop/data

[hadoop]> hdfs dfs -put orders.txt /user/hadoop/data

具体的spark sql环境搭建参见我的博客上一篇文章:

spark sql在scala与java中的代码实现  https://blog.csdn.net/nengyu/article/details/95870479    

在IDEA中编写代码:

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.*;

import static org.apache.hadoop.yarn.webapp.hamlet.HamletSpec.Scope.row;

/**
 * 使用spark sql实现两张表的左外连接
 */
public class SparkSQLDemoJava {
    public static void main(String[] args) {
        SparkSession spark=SparkSession.builder().appName("sparkSQL").master("local").enableHiveSupport().getOrCreate();
        //创建java spark上下文
        JavaSparkContext sc=new JavaSparkContext(spark.sparkContext());

        //cust
        JavaRDD<String> c_rdd1=sc.textFile("/user/hadoop/data2/customers.txt");
        JavaRDD<Row> c_rdd2=c_rdd1.map(new Function<String, Row>() {
            public Row call(String line) throws Exception{
                String[] arr=line.split(",");
                Integer id=Integer.parseInt(arr[0]);
                String name=arr[1];
                Integer age=Integer.parseInt(arr[2]);

                return RowFactory.create(id,name,age);
            }
        });
        StructField[] c_fields=new StructField[3];
        c_fields[0]=new StructField("id", DataTypes.IntegerType,false, Metadata.empty());
        c_fields[1]=new StructField("name",DataTypes.StringType,true,Metadata.empty());
        c_fields[2]=new StructField("age",DataTypes.IntegerType,true,Metadata.empty());
        StructType c_type=new StructType(c_fields);
        Dataset<Row> c_df1=spark.createDataFrame(c_rdd2,c_type);
        //注册临时视图
        c_df1.createOrReplaceTempView("_cust");

        //order
        JavaRDD<String> o_rdd1=sc.textFile("/user/hadoop/data2/orders.txt");
        JavaRDD<Row> o_rdd2=o_rdd1.map(new Function<String, Row>() {
            public Row call(String line) throws Exception{
                String[] arr=line.split(",");
                Integer id=Integer.parseInt(arr[0]);
                String orderno=arr[1];
                Float price=Float.parseFloat(arr[2]);
                Integer cid=Integer.parseInt(arr[3]);

                return RowFactory.create(id,orderno,price,cid);
            }
        });
        StructField[] o_fields=new StructField[4];
        o_fields[0]=new StructField("id", DataTypes.IntegerType,false, Metadata.empty());
        o_fields[1]=new StructField("orderno",DataTypes.StringType,true,Metadata.empty());
        o_fields[2]=new StructField("price",DataTypes.FloatType,true,Metadata.empty());
        o_fields[3]=new StructField("cid", DataTypes.IntegerType,false, Metadata.empty());
        StructType o_type=new StructType(o_fields);
        Dataset<Row> o_df1=spark.createDataFrame(o_rdd2,o_type);
        //注册临时视图
        o_df1.createOrReplaceTempView("_order");

        spark.sql("select * from _cust").show();
        spark.sql("select * from _order").show();
        String sql="select c.id,c.name,ifnull(o._sum,0) total_price from _cust c left outer join (select cid,sum(price) _sum from _order group by cid) o on c.id=o.cid";
        spark.sql(sql).show(1000,false);
    }
}

运行结果如下:

+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|smith| 12|
|  2|  bob| 13|
|  3| alex| 14|
|  4|alice| 15|
|  6| mike| 26|
+---+-----+---+

+---+-------+-----+---+
| id|orderno|price|cid|
+---+-------+-----+---+
|  1|   t001|100.9|  1|
|  2|   t002|100.3|  1|
|  3|   t001|100.6|  2|
|  4|   t003|100.7|  2|
|  5|   t001|100.1|  2|
|  6|   t003|100.1|  3|
|  7|   t002|100.1|  3|
|  8|   t001|100.1|  3|
|  9|   t002|100.1|  3|
| 10|   t002|129.1|  5|
+---+-------+-----+---+

+---+-----+------------------+
|id |name |total_price       |
+---+-----+------------------+
|1  |smith|201.20000457763672|
|6  |mike |0.0               |
|3  |alex |400.3999938964844 |
|4  |alice|0.0               |
|2  |bob  |301.3999938964844 |
+---+-----+------------------+
 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值