Spark2.0+ElasticSearch开发用户画像实战笔记:(5)

ES mapping 设计思路及JavaBean实现

标签ETL代码实现

package cn.imooc.bigdata.sparkestag.etl.es;

import cn.imooc.bigdata.sparkestag.support.SparkUtils;
import lombok.Data;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.elasticsearch.spark.sql.api.java.JavaEsSparkSQL;
import org.scalatest.events.SeeStackDepthException;

import java.io.Serializable;
import java.util.List;

/**
 * @author bywind
 */
public class EsMappingEtl {

    public static void main(String[] args) {
        SparkSession session = SparkUtils.initSession();
        etl(session);
    }

    private static void etl(SparkSession session) {

        Dataset<Row> member = session.sql("select id as memberId,phone,sex,member_channel as channel,mp_open_id as subOpenId," +
                " address_default_id as address,date_format(create_time,'yyyy-MM-dd') as regTime" +
                " from i_member.t_member");

        // order_commodity collect_list -- group_concat [1,2,3,4,5]
        Dataset<Row> order_commodity = session.sql("select o.member_id as memberId," +
                " date_format(max(o.create_time),'yyyy-MM-dd') as orderTime," +
                " count(o.order_id) as orderCount," +
                " collect_list(DISTINCT oc.commodity_id) as favGoods, " +
                " sum(o.pay_price) as orderMoney " +
                " from i_order.t_order as o left join i_order.t_order_commodity as oc" +
                " on o.order_id = oc.order_id group by o.member_id");

        Dataset<Row> freeCoupon = session.sql("select member_id as memberId, " +
                " date_format(create_time,'yyyy-MM-dd') as freeCouponTime " +
                " from i_marketing.t_coupon_member where coupon_id = 1");

        Dataset<Row> couponTimes = session.sql("select member_id as memberId," +
                " collect_list(date_format(create_time,'yyyy-MM-dd')) as couponTimes" +
                "  from i_marketing.t_coupon_member where coupon_id !=1 group by member_id");

        Dataset<Row> chargeMoney = session.sql("select cm.member_id as memberId , sum(c.coupon_price/2) as chargeMoney " +
                " from i_marketing.t_coupon_member as cm left join i_marketing.t_coupon as c " +
                " on cm.coupon_id = c.id where cm.coupon_channel = 1 group by cm.member_id");

        Dataset<Row> overTime = session.sql("select (to_unix_timestamp(max(arrive_time)) - to_unix_timestamp(max(pick_time))) " +
                " as overTime, member_id as memberId " +
                " from i_operation.t_delivery group by member_id");

        Dataset<Row> feedback = session.sql("select fb.feedback_type as feedback,fb.member_id as memberId" +
                " from i_operation.t_feedback as fb " +
                " left join (select max(id) as mid,member_id as memberId " +
                " from i_operation.t_feedback group by member_id) as t " +
                " on fb.id = t.mid");


        member.registerTempTable("member");
        order_commodity.registerTempTable("oc");
        freeCoupon.registerTempTable("freeCoupon");
        couponTimes.registerTempTable("couponTimes");
        chargeMoney.registerTempTable("chargeMoney");
        overTime.registerTempTable("overTime");
        feedback.registerTempTable("feedback");

        Dataset<Row> result = session.sql("select m.*,o.orderCount,o.orderTime,o.orderMoney,o.favGoods," +
                " fb.freeCouponTime,ct.couponTimes, cm.chargeMoney,ot.overTime,f.feedBack" +
                " from member as m " +
                " left join oc as o on m.memberId = o.memberId " +
                " left join freeCoupon as fb on m.memberId = fb.memberId " +
                " left join couponTimes as ct on m.memberId = ct.memberId " +
                " left join chargeMoney as cm on m.memberId = cm.memberId " +
                " left join overTime as ot on m.memberId = ot.memberId " +
                " left join feedback as f on m.memberId = f.memberId ");

        JavaEsSparkSQL.saveToEs(result,"/tag/_doc");

    }


    @Data
    public static class MemberTag implements Serializable{
        // i_member.t_member
        private String memberId;
        private String phone;
        private String sex;
        private String channel;
        private String subOpenId;
        private String address;
        private String regTime;
        // i_member.t_member

        // i_order
        private Long orderCount;
        // max(create_time) i_order.t_order
        private String orderTime;
        private Double orderMoney;
        private List<String> favGoods;
        // i_order

        // i_marketing
        private String freeCouponTime;
        private List<String> couponTimes;
        private Double chargeMoney;
        // i_marketing

        private Integer overTime;
        private Integer feedBack;

    }


}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值