[日志处理工作之九]整合Spark SQL和Elasticsearch

本文使用的Spark版本是1.4.0,Elasticsearch版本是1.5.2

1.Elasticsearch对Spark的支持详见官网:https://www.elastic.co/guide/en/elasticsearch/hadoop/current/spark.html 官网的文档主要涉及Spark1.2中的RDD API、Spark1.3以上版本的Data Frame API,语言分Java、Scala。具体应用程序的开发应参考自己使用的Spark版本的API,比如使用Spark 1.4以上版本的话写程序的时候跟官网文档中的代码还是有些出入的。

2.确保Elasticsearch集群中的每一个节点的配置文件 config/elasticsearch.yml 中Network And HTTP部分里的http.enabled配置项为true

3.因为我采用的是Spark官网的安装包,它是在Scala2.10上编译的,所以我的Maven工程的dependency如下(elastic官网文档中是2.11):

<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark_2.10</artifactId>
<version>2.1.0</version>
</dependency>

4.根据我的应用场景,需要实现DBA的存储过程的业务逻辑,demo如下:

package com.elastic.spark;
import java.lang.Double;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.api.java.UDF4;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.SparkConf;
import org.elasticsearch.spark.sql.api.java.JavaEsSparkSQL;

public class Test {

	public static void main(String[] args) {
				SparkConf conf = new SparkConf().setAppName("elastic-spark");
				conf.set("es.nodes", "9.115.42.77,9.115.42.89,9.115.42.95");
				conf.set("pushdown", "true");
				conf.set("es.port", "9200");
				JavaSparkContext jsc = new JavaSparkContext(conf);
				SQLContext sql = new SQLContext(jsc);
				sql.udf().register("concat", new UDF4<String, String,Double,String,String>() {
					public String call(String str1,String str2,Double str3,String str4) throws Exception {
		                return str1+str2+str3.toString()+str4;
		                        }
		                }, DataTypes.StringType);//concat是用户自定义函数,用来连接字符串
				
				DataFrame cdl_server_register = JavaEsSparkSQL.esDF(sql, "tbsp_compute/cdl_server_register");//读取index/type中的数据
				cdl_server_register.registerTempTable("cdl_server_register");
				//cdl_server_register.show();
				
				DataFrame cdl_tbs_warn_exception = JavaEsSparkSQL.esDF(sql, "tbsp_compute/cdl_tbs_warn_exception");
				cdl_tbs_warn_exception.registerTempTable("cdl_tbs_warn_exception");
				//cdl_tbs_warn_exception.show();
				
				DataFrame cdl_tbs_utilization = JavaEsSparkSQL.esDF(sql, "tbsp_compute/cdl_tbs_utilization_2015-09-21");
				cdl_tbs_utilization.registerTempTable("cdl_tbs_utilization");
				//cdl_tbs_utilization.show();
				
				DataFrame whole_set = sql.sql("SELECT c.HOST_NAME,c.INST_NAME,"
						+ "c.DB_NAME,c.TBSP_NAME,c.TBSP_UTILIZATION_PERCENT,c.LOADED_TIMESTAMP,c.TBSP_TYPE,85 as d.THD "
						+ "FROM cdl_tbs_utilization c LEFT OUTER JOIN cdl_tbs_warn_exception d on c.HOST_NAME=d.HOST_NAME and d.THD is null and "
						+ "c.INST_NAME=d.INST_NAME and c.DB_NAME=d.DB_NAME and c.TBSP_NAME=d.TBS_NAME order by c.TBSP_NAME");
				whole_set.show();
				whole_set.registerTempTable("whole_set");
				System.out.println("the sum of the whole set is "+(int) whole_set.count());
				
				DataFrame utilization_normal_warning = sql.sql("SELECT HOST_NAME,INST_NAME,DB_NAME,TBSP_NAME,TBSP_UTILIZATION_PERCENT,"
						+ "LOADED_TIMESTAMP,TBSP_TYPE,85 as THD,concat(TBSP_NAME,' used ',TBSP_UTILIZATION_PERCENT,'% space!') as RESULT FROM whole_set where THD is null and TBSP_UTILIZATION_PERCENT>85 order by TBSP_NAME");			
				utilization_normal_warning.show((int) utilization_normal_warning.count());
				utilization_normal_warning.registerTempTable("utilization_normal");
				System.out.println("the sum of the normal set is "+(int) utilization_normal_warning.count());
				
				DataFrame utilization_excep_warning = sql.sql("SELECT c.HOST_NAME,c.INST_NAME,"
						+ "c.DB_NAME,c.TBSP_NAME,c.TBSP_UTILIZATION_PERCENT,c.LOADED_TIMESTAMP,c.TBSP_TYPE,d.THD,concat(c.TBSP_NAME,' used ',c.TBSP_UTILIZATION_PERCENT,'% space!') as RESULT "
						+ "FROM cdl_tbs_utilization c JOIN cdl_tbs_warn_exception d on c.HOST_NAME=d.HOST_NAME and "
						+ "c.INST_NAME=d.INST_NAME and c.DB_NAME=d.DB_NAME and c.TBSP_NAME=d.TBS_NAME where c.TBSP_UTILIZATION_PERCENT>d.THD order by c.TBSP_NAME");
				utilization_excep_warning.show((int) utilization_excep_warning.count());
				utilization_excep_warning.registerTempTable("utilization_excp");
				System.out.println("the sum of the excep set is "+(int) utilization_excep_warning.count());
				
				JavaEsSparkSQL.saveToEs(utilization_normal_warning, "tbsp_compute/warning_data_20150921");
				JavaEsSparkSQL.saveToEs(utilization_excep_warning, "tbsp_compute/warning_data_20150921");
				
	}
}

5.demo运行结果:

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

老张去哪儿

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值