SparkSql 读mysql数据写入到Es

话不多说直接上代码
1.Scala版本 (将mysql的两个表join然后将结果插入到ES中)


    <dependencies>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.27</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.2.1</version>
        </dependency>


        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch-spark-20_2.11</artifactId>
            <version>6.8.2</version>
        </dependency>

    </dependencies>


object SparkMain {

  def main(args: Array[String]): Unit = {

    val config = new SparkConf().setMaster("local[*]").setAppName("OCRMSparkSql")
    config.set("es.nodes.wan.only","true")
    config.set("es.nodes","https://地址:9200")
    // config.set("es.nodes","地址")
    //config.set("es.port","9200")
    //println(InetAddress.getByName("域名地址").getHostAddress)
    //config.set("es.node",InetAddress.getByName("域名地址").getHostAddress)
    config.set("es.mapping.id","app_id") //采用update方式需要指定mapping列
    config.set("es.index.auto.create", "true")
    //config.set("es.write.operation", "index") //insert更新方式

    config.set("es.write.operation", "upsert")  //update更新方式
    config.set("es.net.http.auth.user", "user") //访问es的用户名
    config.set("es.net.http.auth.pass", "password") //访问es的密码

    val spark = SparkSession.builder.config(config).getOrCreate()

    val jdbcUrl = "jdbc:mysql://10.0.0.10:5457/db_ex_after_sales?zeroDateTimeBehavior=convertToNull"
    def dbConnProperties(user:String, pass:String):Properties = {
      val ConnProperties = new Properties();
      ConnProperties.put("driver", "com.mysql.jdbc.Driver");
      ConnProperties.put("user", user);
      ConnProperties.put("password", pass);

      return ConnProperties;
    }


    val dbUser = "user"
    val dbPass = "password"

    val readConnProperties = dbConnProperties(dbUser,dbPass);

    val sql = "select concat(t1.app_id,'tag_merchant') as app_id  ,version_type ,sum_count from t_app_base_info t1 left join t_customer_deliver_records t2 on t1.app_id=t2.app_id limit 1"
    var df: DataFrame = spark.read.jdbc(jdbcUrl, s"(${sql}) t", readConnProperties)

    EsSparkSQL.saveToEs(df,"index1/type1" )


  }

}

2.java版本



import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.elasticsearch.spark.sql.EsSparkSQL;

import java.util.Properties;

public class SparkMain {
    public static void main(String[] args) {


        SparkConf config = new SparkConf().setAppName("OCRMSparkSql").setMaster("local[4]");
        config.set("es.nodes.wan.only","true");
        config.set("es.nodes","10.0.0.1:9200");

        config.set("es.mapping.id","app_id");
        config.set("es.index.auto.create", "true");
        //config.set("es.write.operation", "index") //更新方式为插入

        config.set("es.write.operation", "upsert") ; //更新方式为update
         config.set("es.net.http.auth.user", "user") ; //访问es的用户名
        config.set("es.net.http.auth.pass", "password") ; //访问es的密码
        SparkSession spark = SparkSession.builder().config(config).getOrCreate();



        String jdbcUrl = "jdbc:mysql://10.0.0.10:5457/db_ex_after_sales?zeroDateTimeBehavior=convertToNull" ;
zeroDateTimeBehavior=convertToNull用这个是因为数据里的时间戳默认为0000-00-00 00:00:00 这个会报错  所以采用这种
        Properties properties = new Properties();
        properties.put("driver", "com.mysql.jdbc.Driver");
        properties.put("user", "user");
        properties.put("password", "password");
        String sql = "select concat(t1.app_name,'tag_merchant') as app_id,t1.merchant_id,t1.version_type,t1.authentic_state,t1.authentic_body,t1.sign_time ,t1.expire_time,t1.opened_time," +
                "t1.sell_stage,t1.sell_follower_id,t1.sum_count,t1.pay_count,t1.sum_income,t1.consume,t1.seven_active_days,t1.thirty_active_days,t1.sum_active_days,t1.industry_type,t1.active,t1.trans_time,t1.pay_time,t1.pay_money,t2.deliver_staff_id  from from t_app_base_info t1 left join t_customer_deliver_records t2 on t1.app_id=t2.app_id " ;


        SQLContext sqlContext = spark.sqlContext();

        DataFrameReader reader = sqlContext.read().format("jdbc");
        Dataset<Row> app_base_info = reader.jdbc(jdbcUrl, "t_app_base_info", properties);
        Dataset<Row> t_customer_deliver_records = reader.jdbc(jdbcUrl, "t_customer_deliver_records", properties);

        app_base_info.createOrReplaceTempView("t_app_base_info");
        t_customer_deliver_records.createOrReplaceTempView("t_customer_deliver_records");

        Dataset<Row> data = spark.sql(sql);





        EsSparkSQL.saveToEs(data,"ocrm-tags-realtime/ocrm-tags-realtime-type" ) ;

    }
}

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值