http协议获取splunk上数据并写入hive

http协议获取splunk上数据并写入hive

cacert.cer证书在sources元路径。
-依赖

<dependency>
            <groupId>com.squareup.okhttp3</groupId>
            <artifactId>okhttp</artifactId>
            <version>3.0.0-RC1</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpcore</artifactId>
            <version>4.4.11</version>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.9</version>
        </dependency>
        <dependency>
            <groupId>io.netty</groupId>
            <artifactId>netty-handler</artifactId>
            <version>4.1.11.Final</version>
        </dependency>
  • 工具类
package com.lenovo.splunk


import java.io.InputStream
import java.net.URL
import java.security.KeyStore
import java.security.cert.{Certificate, CertificateFactory}
import java.util

import javax.net.ssl._

class TrustAnyHostnameVerifier extends HostnameVerifier{
    override def verify(s: String, sslSession: SSLSession): Boolean = {
    true
  }
}
class HttpClient {

  private val is = this.getClass.getClassLoader.getResourceAsStream("cacert.cer")
  private val sslsf: SSLSocketFactory = certificateConfirm(is)

  def splunkConn(url: String, token: String): HttpsURLConnection = {
    // 创建远程url连接对象
    val https_url = new URL(url)
    // 通过远程url连接对象打开一个连接,强转成httpURLConnection类
    val conn = https_url.openConnection().asInstanceOf[HttpsURLConnection]

    // 设置连接方式:get
    conn.setRequestMethod("GET")
    // 设置连接主机服务器的超时时间:15000毫秒
    conn.setConnectTimeout(15000)
    // 设置读取远程返回的数据时间:60000毫秒
    conn.setReadTimeout(60000)
    // 设置访问权限
    conn.setRequestProperty("Authorization", "Bearer " + token)
    // 设置任意用户可以访问
    conn.setHostnameVerifier(new TrustAnyHostnameVerifier())
    //设置SSL Factory
    conn.setSSLSocketFactory(sslsf)
    // 发送请求
    conn.connect()

    conn
  }

  /**
    * 证书确认方法
    *
    */
  def certificateConfirm(in: InputStream): SSLSocketFactory = {
    //获取证书
    val cf = CertificateFactory.getInstance("X.509")
    import scala.collection.JavaConversions._
    val certificates: util.Collection[_ <: Certificate] = cf.generateCertificates(in)

    //为证书设置一个空的keyStore
    val pw_arr = "password".toCharArray
    val ks = KeyStore.getInstance(KeyStore.getDefaultType)
    ks.load(null, pw_arr)
    //将证书放入keyStore
    var index = 0
    for (c <- certificates) {
      index = index + 1
      ks.setCertificateEntry(Integer.toString(index),c)
    }

    //使用包含自签证书信息的keyStore去构建一个X509TrustManager
    val kmf = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm)
    kmf.init(ks, pw_arr)
    val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
    tmf.init(ks)
    val tm_arr = tmf.getTrustManagers
    val x509tm = tm_arr(0).asInstanceOf[X509TrustManager]

    //使用构建出的trustManger初始化SSLContext对象
    val sslc = SSLContext.getInstance("TLSv1.2")
    sslc.init(null, Array[TrustManager] {
      x509tm
    }, null)
    sslc.getSocketFactory
  }
}

  • 主类
package com.lenovo.splunk

import java.io.{BufferedReader, InputStreamReader}

import com.alibaba.fastjson.JSON
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{StringType, StructField, StructType}

import scala.collection.mutable.{ArrayBuffer, ListBuffer}


object Splunk2Hive {
  def main(args: Array[String]): Unit = {
    val ss = SparkSession
      .builder()
      .appName("Splunk2Hive")
      .master("yarn")
      .enableHiveSupport()
      .getOrCreate()


    val token = "eyJraWQiOiJzcGx1bmsuc2VjcmV0IiwiYWxnIjoiSFM1MTIiLCJ2ZXIiOiJ2MSIsInR0eXAiOiJzdGF0aWMifQ" +
      ".eyJpc3MiOiJsaXVjZTFAbGVub3ZvLmNvbSBmcm9tIGl0c2kubGVub3ZvLmNvbSIsInN1YiI6InN5c191cHAiLCJhdWQiOiJ" +
      "HZXQgZGF0ZSBmcm9tIFNwbHVuayB0byBMdWRwIGZvciBVUFAiLCJpZHAiOiJzcGx1bmsiLCJqdGkiOiIyMjM2MmZhZjkwOWZ" +
      "kZGE2NWNmMTUyNTI1ZDgyNmRhNjY4MGFiMGExMTg0YzYzYzI3MTM4OGUyNzU2OWE3MDVhIiwiaWF0IjoxNTc4OTA3NTQwLCJ" +
      "leHAiOjAsIm5iciI6MTU3ODkwNzU0MH0.Z9XV5wlUoEponAUhxL7HBrYAoyLtMQEWH2chZXyJkFYhdQb5M7b1pUdCD0BBfSj" +
      "euRi8Dx_pYdnCs4XHXTb5Tg"

    val splunk_job_name = "ARIBA_USER_VIEW"

    val url = "https://10.122.47.3:8089/servicesNS/nobody/itsi/search/jobs?search="+ splunk_job_name +
      "&output_mode=json&count=1"

    val job = getJob(url,token)
    val url_arr = getPrdUrl(getJobId(job),getRecordSize(job))
    ss.sql("truncate table ccsd.t_upp_crew_map_ariba_user")
    for(url <- url_arr){
      val json_str = getJob(url,token)
      val schema = getFields(json_str)
      val values = ss.sparkContext
        .parallelize(getResults(json_str))
        .map(row => {
          Row.fromSeq(row)
        })
      ss.createDataFrame(values,schema)
        .createOrReplaceTempView("ariba")
      ss.sql("insert into table ccsd.t_upp_crew_map_ariba_user select * from ariba")
    }
  }

  /**
    * 获取job信息
    * @param url
    * @param token
    * @return
    */
  def getJob(url:String,token:String):String ={
    val arr = new ArrayBuffer[String]()
    val conn = new HttpClient().splunkConn(url,token)
    if(conn.getResponseCode == 200){
      val is = conn.getInputStream
      val br = new BufferedReader(new InputStreamReader(is,"UTF-8"))
      var t_or_f = true
      while (t_or_f){
        var temp = br.readLine()
        if(temp == null){
          t_or_f = false
        }else{
          arr.append(temp)
          arr.append("\r\n")
        }
      }
      br.close()
      is.close()
    }
    arr.mkString
  }

  /**
    * 获取数据大小
    * @param job
    * @return
    */
  def getRecordSize(job:String):Int ={
    JSON.parseObject(job)
      .getJSONArray("entry")
      .getJSONObject(0)
      .getJSONObject("content")
      .getInteger("eventCount")
  }

  /**
    * 获取job的ID
    * @param job
    * @return
    */
  def getJobId(job:String):String ={
      JSON.parseObject(job)
      .getJSONArray("entry")
      .getJSONObject(0)
      .getString("id")
  }

  /**
    * 获取column名字生成schema信息
    * @param json_str
    * @return
    */
  def getFields(json_str:String):StructType={
    val fields = JSON.parseObject(json_str).getJSONArray("fields")
    val structFields = new ListBuffer[StructField]()
    for(i <- 0 until fields.size()){
      structFields += StructField(fields.getJSONObject(i).getString("name"),StringType,true)
    }
    StructType(structFields)
  }

  /**
    * 获取column名字
    * @param json_str
    * @return
    */
  def getResults(json_str:String):ArrayBuffer[ArrayBuffer[String]]={
    val fields_arr = JSON.parseObject(json_str).getJSONArray("fields")
    val fields = new ArrayBuffer[String]()
    for(i <- 0 until fields_arr.size()){
      fields += fields_arr.getJSONObject(i).getString("name")
    }
    val result_arr = new ArrayBuffer[ArrayBuffer[String]]()
    val results = JSON.parseObject(json_str).getJSONArray("results")
    for(m <- 0 until results.size()){
      val row = new ArrayBuffer[String]()
      for(n <- 0 until fields.length){
        row += results.getJSONObject(m).getString(fields(n))
      }
      result_arr += row
    }
    result_arr
  }

  /**
    * 获取URL的数组
    * @param job_id
    * @param records_size
    * @return
    */
  def getPrdUrl(job_id:String,records_size:Int):ArrayBuffer[String] ={
    var size = records_size
    val count_num = 500
    var i = 0
    val endpoint = "/results?"
    val output_mode = "&output_mode=json"
    val offset = "&offset="
    val count = "count=500"

    val ab = new ArrayBuffer[String]()
    while (size > count_num){
      size = size - count_num
      ab += job_id + endpoint + count + output_mode + offset + i
      i = i + count_num
    }
    ab
  }
}

发布了34 篇原创文章 · 获赞 5 · 访问量 1万+
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 创作都市 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览