http协议获取splunk上数据并写入hive
cacert.cer证书在sources元路径。
-依赖
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.0.0-RC1</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.11</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.9</version>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-handler</artifactId>
<version>4.1.11.Final</version>
</dependency>
- 工具类
package com.lenovo.splunk
import java.io.InputStream
import java.net.URL
import java.security.KeyStore
import java.security.cert.{Certificate, CertificateFactory}
import java.util
import javax.net.ssl._
class TrustAnyHostnameVerifier extends HostnameVerifier{
override def verify(s: String, sslSession: SSLSession): Boolean = {
true
}
}
class HttpClient {
private val is = this.getClass.getClassLoader.getResourceAsStream("cacert.cer")
private val sslsf: SSLSocketFactory = certificateConfirm(is)
def splunkConn(url: String, token: String): HttpsURLConnection = {
// 创建远程url连接对象
val https_url = new URL(url)
// 通过远程url连接对象打开一个连接,强转成httpURLConnection类
val conn = https_url.openConnection().asInstanceOf[HttpsURLConnection]
// 设置连接方式:get
conn.setRequestMethod("GET")
// 设置连接主机服务器的超时时间:15000毫秒
conn.setConnectTimeout(15000)
// 设置读取远程返回的数据时间:60000毫秒
conn.setReadTimeout(60000)
// 设置访问权限
conn.setRequestProperty("Authorization", "Bearer " + token)
// 设置任意用户可以访问
conn.setHostnameVerifier(new TrustAnyHostnameVerifier())
//设置SSL Factory
conn.setSSLSocketFactory(sslsf)
// 发送请求
conn.connect()
conn
}
/**
* 证书确认方法
*
*/
def certificateConfirm(in: InputStream): SSLSocketFactory = {
//获取证书
val cf = CertificateFactory.getInstance("X.509")
import scala.collection.JavaConversions._
val certificates: util.Collection[_ <: Certificate] = cf.generateCertificates(in)
//为证书设置一个空的keyStore
val pw_arr = "password".toCharArray
val ks = KeyStore.getInstance(KeyStore.getDefaultType)
ks.load(null, pw_arr)
//将证书放入keyStore
var index = 0
for (c <- certificates) {
index = index + 1
ks.setCertificateEntry(Integer.toString(index),c)
}
//使用包含自签证书信息的keyStore去构建一个X509TrustManager
val kmf = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm)
kmf.init(ks, pw_arr)
val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
tmf.init(ks)
val tm_arr = tmf.getTrustManagers
val x509tm = tm_arr(0).asInstanceOf[X509TrustManager]
//使用构建出的trustManger初始化SSLContext对象
val sslc = SSLContext.getInstance("TLSv1.2")
sslc.init(null, Array[TrustManager] {
x509tm
}, null)
sslc.getSocketFactory
}
}
- 主类
package com.lenovo.splunk
import java.io.{BufferedReader, InputStreamReader}
import com.alibaba.fastjson.JSON
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
object Splunk2Hive {
def main(args: Array[String]): Unit = {
val ss = SparkSession
.builder()
.appName("Splunk2Hive")
.master("yarn")
.enableHiveSupport()
.getOrCreate()
val token = "eyJraWQiOiJzcGx1bmsuc2VjcmV0IiwiYWxnIjoiSFM1MTIiLCJ2ZXIiOiJ2MSIsInR0eXAiOiJzdGF0aWMifQ" +
".eyJpc3MiOiJsaXVjZTFAbGVub3ZvLmNvbSBmcm9tIGl0c2kubGVub3ZvLmNvbSIsInN1YiI6InN5c191cHAiLCJhdWQiOiJ" +
"HZXQgZGF0ZSBmcm9tIFNwbHVuayB0byBMdWRwIGZvciBVUFAiLCJpZHAiOiJzcGx1bmsiLCJqdGkiOiIyMjM2MmZhZjkwOWZ" +
"kZGE2NWNmMTUyNTI1ZDgyNmRhNjY4MGFiMGExMTg0YzYzYzI3MTM4OGUyNzU2OWE3MDVhIiwiaWF0IjoxNTc4OTA3NTQwLCJ" +
"leHAiOjAsIm5iciI6MTU3ODkwNzU0MH0.Z9XV5wlUoEponAUhxL7HBrYAoyLtMQEWH2chZXyJkFYhdQb5M7b1pUdCD0BBfSj" +
"euRi8Dx_pYdnCs4XHXTb5Tg"
val splunk_job_name = "ARIBA_USER_VIEW"
val url = "https://10.122.47.3:8089/servicesNS/nobody/itsi/search/jobs?search="+ splunk_job_name +
"&output_mode=json&count=1"
val job = getJob(url,token)
val url_arr = getPrdUrl(getJobId(job),getRecordSize(job))
ss.sql("truncate table ccsd.t_upp_crew_map_ariba_user")
for(url <- url_arr){
val json_str = getJob(url,token)
val schema = getFields(json_str)
val values = ss.sparkContext
.parallelize(getResults(json_str))
.map(row => {
Row.fromSeq(row)
})
ss.createDataFrame(values,schema)
.createOrReplaceTempView("ariba")
ss.sql("insert into table ccsd.t_upp_crew_map_ariba_user select * from ariba")
}
}
/**
* 获取job信息
* @param url
* @param token
* @return
*/
def getJob(url:String,token:String):String ={
val arr = new ArrayBuffer[String]()
val conn = new HttpClient().splunkConn(url,token)
if(conn.getResponseCode == 200){
val is = conn.getInputStream
val br = new BufferedReader(new InputStreamReader(is,"UTF-8"))
var t_or_f = true
while (t_or_f){
var temp = br.readLine()
if(temp == null){
t_or_f = false
}else{
arr.append(temp)
arr.append("\r\n")
}
}
br.close()
is.close()
}
arr.mkString
}
/**
* 获取数据大小
* @param job
* @return
*/
def getRecordSize(job:String):Int ={
JSON.parseObject(job)
.getJSONArray("entry")
.getJSONObject(0)
.getJSONObject("content")
.getInteger("eventCount")
}
/**
* 获取job的ID
* @param job
* @return
*/
def getJobId(job:String):String ={
JSON.parseObject(job)
.getJSONArray("entry")
.getJSONObject(0)
.getString("id")
}
/**
* 获取column名字生成schema信息
* @param json_str
* @return
*/
def getFields(json_str:String):StructType={
val fields = JSON.parseObject(json_str).getJSONArray("fields")
val structFields = new ListBuffer[StructField]()
for(i <- 0 until fields.size()){
structFields += StructField(fields.getJSONObject(i).getString("name"),StringType,true)
}
StructType(structFields)
}
/**
* 获取column名字
* @param json_str
* @return
*/
def getResults(json_str:String):ArrayBuffer[ArrayBuffer[String]]={
val fields_arr = JSON.parseObject(json_str).getJSONArray("fields")
val fields = new ArrayBuffer[String]()
for(i <- 0 until fields_arr.size()){
fields += fields_arr.getJSONObject(i).getString("name")
}
val result_arr = new ArrayBuffer[ArrayBuffer[String]]()
val results = JSON.parseObject(json_str).getJSONArray("results")
for(m <- 0 until results.size()){
val row = new ArrayBuffer[String]()
for(n <- 0 until fields.length){
row += results.getJSONObject(m).getString(fields(n))
}
result_arr += row
}
result_arr
}
/**
* 获取URL的数组
* @param job_id
* @param records_size
* @return
*/
def getPrdUrl(job_id:String,records_size:Int):ArrayBuffer[String] ={
var size = records_size
val count_num = 500
var i = 0
val endpoint = "/results?"
val output_mode = "&output_mode=json"
val offset = "&offset="
val count = "count=500"
val ab = new ArrayBuffer[String]()
while (size > count_num){
size = size - count_num
ab += job_id + endpoint + count + output_mode + offset + i
i = i + count_num
}
ab
}
}