话不多说直接上代码
1.Scala版本 (将mysql的两个表join然后将结果插入到ES中)
<dependencies>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.27</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark-20_2.11</artifactId>
<version>6.8.2</version>
</dependency>
</dependencies>
object SparkMain {
def main(args: Array[String]): Unit = {
val config = new SparkConf().setMaster("local[*]").setAppName("OCRMSparkSql")
config.set("es.nodes.wan.only","true")
config.set("es.nodes","https://地址:9200")
// config.set("es.nodes","地址")
//config.set("es.port","9200")
//println(InetAddress.getByName("域名地址").getHostAddress)
//config.set("es.node",InetAddress.getByName("域名地址").getHostAddress)
config.set("es.mapping.id","app_id") //采用update方式需要指定mapping列
config.set("es.index.auto.create", "true")
//config.set("es.write.operation", "index") //insert更新方式
config.set("es.write.operation", "upsert") //update更新方式
config.set("es.net.http.auth.user", "user") //访问es的用户名
config.set("es.net.http.auth.pass", "password") //访问es的密码
val spark = SparkSession.builder.config(config).getOrCreate()
val jdbcUrl = "jdbc:mysql://10.0.0.10:5457/db_ex_after_sales?zeroDateTimeBehavior=convertToNull"
def dbConnProperties(user:String, pass:String):Properties = {
val ConnProperties = new Properties();
ConnProperties.put("driver", "com.mysql.jdbc.Driver");
ConnProperties.put("user", user);
ConnProperties.put("password", pass);
return ConnProperties;
}
val dbUser = "user"
val dbPass = "password"
val readConnProperties = dbConnProperties(dbUser,dbPass);
val sql = "select concat(t1.app_id,'tag_merchant') as app_id ,version_type ,sum_count from t_app_base_info t1 left join t_customer_deliver_records t2 on t1.app_id=t2.app_id limit 1"
var df: DataFrame = spark.read.jdbc(jdbcUrl, s"(${sql}) t", readConnProperties)
EsSparkSQL.saveToEs(df,"index1/type1" )
}
}
2.java版本
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.elasticsearch.spark.sql.EsSparkSQL;
import java.util.Properties;
public class SparkMain {
public static void main(String[] args) {
SparkConf config = new SparkConf().setAppName("OCRMSparkSql").setMaster("local[4]");
config.set("es.nodes.wan.only","true");
config.set("es.nodes","10.0.0.1:9200");
config.set("es.mapping.id","app_id");
config.set("es.index.auto.create", "true");
//config.set("es.write.operation", "index") //更新方式为插入
config.set("es.write.operation", "upsert") ; //更新方式为update
config.set("es.net.http.auth.user", "user") ; //访问es的用户名
config.set("es.net.http.auth.pass", "password") ; //访问es的密码
SparkSession spark = SparkSession.builder().config(config).getOrCreate();
String jdbcUrl = "jdbc:mysql://10.0.0.10:5457/db_ex_after_sales?zeroDateTimeBehavior=convertToNull" ;
zeroDateTimeBehavior=convertToNull用这个是因为数据里的时间戳默认为0000-00-00 00:00:00 这个会报错 所以采用这种
Properties properties = new Properties();
properties.put("driver", "com.mysql.jdbc.Driver");
properties.put("user", "user");
properties.put("password", "password");
String sql = "select concat(t1.app_name,'tag_merchant') as app_id,t1.merchant_id,t1.version_type,t1.authentic_state,t1.authentic_body,t1.sign_time ,t1.expire_time,t1.opened_time," +
"t1.sell_stage,t1.sell_follower_id,t1.sum_count,t1.pay_count,t1.sum_income,t1.consume,t1.seven_active_days,t1.thirty_active_days,t1.sum_active_days,t1.industry_type,t1.active,t1.trans_time,t1.pay_time,t1.pay_money,t2.deliver_staff_id from from t_app_base_info t1 left join t_customer_deliver_records t2 on t1.app_id=t2.app_id " ;
SQLContext sqlContext = spark.sqlContext();
DataFrameReader reader = sqlContext.read().format("jdbc");
Dataset<Row> app_base_info = reader.jdbc(jdbcUrl, "t_app_base_info", properties);
Dataset<Row> t_customer_deliver_records = reader.jdbc(jdbcUrl, "t_customer_deliver_records", properties);
app_base_info.createOrReplaceTempView("t_app_base_info");
t_customer_deliver_records.createOrReplaceTempView("t_customer_deliver_records");
Dataset<Row> data = spark.sql(sql);
EsSparkSQL.saveToEs(data,"ocrm-tags-realtime/ocrm-tags-realtime-type" ) ;
}
}
1161

被折叠的 条评论
为什么被折叠?



