SparkSql 读mysql数据写入到Es

最新推荐文章于 2024-04-23 17:00:00 发布

sghuu

最新推荐文章于 2024-04-23 17:00:00 发布

阅读量763

点赞数 1

分类专栏： spark

本文链接：https://blog.csdn.net/sghuu/article/details/107692189

版权

spark 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

话不多说直接上代码
1.Scala版本（将mysql的两个表join然后将结果插入到ES中）


    <dependencies>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.27</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.2.1</version>
        </dependency>


        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch-spark-20_2.11</artifactId>
            <version>6.8.2</version>
        </dependency>

    </dependencies>


object SparkMain {

  def main(args: Array[String]): Unit = {

    val config = new SparkConf().setMaster("local[*]").setAppName("OCRMSparkSql")
    config.set("es.nodes.wan.only","true")
    config.set("es.nodes","https://地址:9200")
    // config.set("es.nodes","地址")
    //config.set("es.port","9200")
    //println(InetAddress.getByName("域名地址").getHostAddress)
    //config.set("es.node",InetAddress.getByName("域名地址").getHostAddress)
    config.set("es.mapping.id","app_id") //采用update方式需要指定mapping列
    config.set("es.index.auto.create", "true")
    //config.set("es.write.operation", "index") //insert更新方式

    config.set("es.write.operation", "upsert")  //update更新方式
    config.set("es.net.http.auth.user", "user") //访问es的用户名
    config.set("es.net.http.auth.pass", "password") //访问es的密码

    val spark = SparkSession.builder.config(config).getOrCreate()

    val jdbcUrl = "jdbc:mysql://10.0.0.10:5457/db_ex_after_sales?zeroDateTimeBehavior=convertToNull"
    def dbConnProperties(user:String, pass:String):Properties = {
      val ConnProperties = new Properties();
      ConnProperties.put("driver", "com.mysql.jdbc.Driver");
      ConnProperties.put("user", user);
      ConnProperties.put("password", pass);

      return ConnProperties;
    }


    val dbUser = "user"
    val dbPass = "password"

    val readConnProperties = dbConnProperties(dbUser,dbPass);

    val sql = "select concat(t1.app_id,'tag_merchant') as app_id  ,version_type ,sum_count from t_app_base_info t1 left join t_customer_deliver_records t2 on t1.app_id=t2.app_id limit 1"
    var df: DataFrame = spark.read.jdbc(jdbcUrl, s"(${sql}) t", readConnProperties)

    EsSparkSQL.saveToEs(df,"index1/type1" )


  }

}

2.java版本



import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.elasticsearch.spark.sql.EsSparkSQL;

import java.util.Properties;

public class SparkMain {
    public static void main(String[] args) {


        SparkConf config = new SparkConf().setAppName("OCRMSparkSql").setMaster("local[4]");
        config.set("es.nodes.wan.only","true");
        config.set("es.nodes","10.0.0.1:9200");

        config.set("es.mapping.id","app_id");
        config.set("es.index.auto.create", "true");
        //config.set("es.write.operation", "index") //更新方式为插入

        config.set("es.write.operation", "upsert") ; //更新方式为update
         config.set("es.net.http.auth.user", "user") ; //访问es的用户名
        config.set("es.net.http.auth.pass", "password") ; //访问es的密码
        SparkSession spark = SparkSession.builder().config(config).getOrCreate();



        String jdbcUrl = "jdbc:mysql://10.0.0.10:5457/db_ex_after_sales?zeroDateTimeBehavior=convertToNull" ;
zeroDateTimeBehavior=convertToNull用这个是因为数据里的时间戳默认为0000-00-00 00:00:00 这个会报错  所以采用这种
        Properties properties = new Properties();
        properties.put("driver", "com.mysql.jdbc.Driver");
        properties.put("user", "user");
        properties.put("password", "password");
        String sql = "select concat(t1.app_name,'tag_merchant') as app_id,t1.merchant_id,t1.version_type,t1.authentic_state,t1.authentic_body,t1.sign_time ,t1.expire_time,t1.opened_time," +
                "t1.sell_stage,t1.sell_follower_id,t1.sum_count,t1.pay_count,t1.sum_income,t1.consume,t1.seven_active_days,t1.thirty_active_days,t1.sum_active_days,t1.industry_type,t1.active,t1.trans_time,t1.pay_time,t1.pay_money,t2.deliver_staff_id  from from t_app_base_info t1 left join t_customer_deliver_records t2 on t1.app_id=t2.app_id " ;


        SQLContext sqlContext = spark.sqlContext();

        DataFrameReader reader = sqlContext.read().format("jdbc");
        Dataset<Row> app_base_info = reader.jdbc(jdbcUrl, "t_app_base_info", properties);
        Dataset<Row> t_customer_deliver_records = reader.jdbc(jdbcUrl, "t_customer_deliver_records", properties);

        app_base_info.createOrReplaceTempView("t_app_base_info");
        t_customer_deliver_records.createOrReplaceTempView("t_customer_deliver_records");

        Dataset<Row> data = spark.sql(sql);





        EsSparkSQL.saveToEs(data,"ocrm-tags-realtime/ocrm-tags-realtime-type" ) ;

    }
}

sghuu

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
SparkSql 读mysql数据写入到Es

话不多说直接上代码1.Scala版本（将mysql的两个表join然后将结果插入到ES中） <dependencies> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.27&lt
复制链接

扫一扫

专栏目录