spark链接ldap获取数据(thumbnailPhoto属性获取以及转成base64图片格式存储)

spark链接ldap获取数据

  • 依赖
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-distcp</artifactId>
            <version>2.6.0-cdh5.5.0</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-maven-plugins</artifactId>
            <version>2.6.0-cdh5.5.0</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>2.11.8</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging</artifactId>
            <version>1.1.1</version>
            <type>jar</type>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.1</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.9</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.6.4</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.4</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.6.4</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.0.0</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.0.0</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.25</version>
            <scope>provided</scope>
        </dependency>
  • MySQL工具类
package com.lenovo.Utils

class MysqlSession {
  //云测试环境
  val url = "jdbc:mysql://10.122.64.84:3306/test?rewriteBatchedStatements=true&useUnicode=true&characterEncoding=UTF-8"
  val user = "root"
  val password = "test"
  val driver = "com.mysql.jdbc.Driver"
}
  • 代码
package com.lenovo.ldap

import java.text.SimpleDateFormat
import java.util
import java.util.{Date, Properties}

import com.lenovo.Utils.MysqlSession
import javax.naming.directory.{Attribute, SearchControls}
import javax.naming.{AuthenticationException, Context, NamingEnumeration, NamingException}
import javax.naming.ldap._
import org.apache.commons.codec.binary.Base64
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}

import scala.collection.mutable.{ArrayBuffer, ListBuffer}

object GetDataFromLDAP {
  def main(args: Array[String]): Unit = {
    val ss = SparkSession
      .builder()
      .master("yarn")
      .appName("GetDataFromLDAP")
      //.config("spark.sql.warehouse.dir", "file:///E://lenovo_pj//upp//upp_big_data//upp//recommend")
      .getOrCreate()

    val ms = new MysqlSession

    val tb_pw = "ad_user_ldap_password"
    val conn = Map("url" -> ms.url, "dbtable" -> tb_pw, "user" -> ms.user, "password" -> ms.password, "driver" -> ms.driver)
    val ldapPW = ss.read.options(conn).format("jdbc").load().head().get(0).toString

    val ldapUser =  "ladp01"
    val ldapContext = ldapConnect(ldapUser,ldapPW)

    //ldap搜索字段
    val searchWords = Array(
      "name"                          ,"l"                            ,"c",
      "mail"                          ,"title"                        ,"displayName",
      "employeeType"                  ,"department"                   ,"manager",
      "physicalDeliveryOfficeName"    ,"mobile"                       ,"telephoneNumber",
      "pwdLastSet"                    ,"whenCreated"                  ,"accountExpires",
      "userAccountControl"            ,"company"                      ,"otherPager",
      "memberOf"                      ,"msRTCSIP-PrimaryUserAddress"  ,"userPrincipalName",
      "msExchExtensionAttribute16"    ,"thumbnailPhoto"
    )

    //
    //mysql数据库除了ou之外所有的column名字
    val mysqlColumn = Array(
      "itcode"                        ,"city"                         ,"country",
      "mail"                          ,"title"                        ,"display_name",
      "employee_type"                 ,"department"                   ,"manager",
      "office_address"                ,"mobile_phone"                 ,"work_phone",
      "pwd_last_set"                  ,"when_created"                 ,"account_expires",
      "user_account_control"          ,"company"                      ,"external_email",
      "member_of"                     ,"msrtcsip_primary_user_address","user_principal_name",
      "local_name"                    ,"thumbnail_photo"
    )
    //
    val fields = new ListBuffer[StructField]()
    for(col <- mysqlColumn){
      fields += StructField(col,StringType)
    }
    fields += StructField("ou",StringType)
    val schema = StructType(fields)

    val prop = new Properties()
    prop.setProperty("user",ms.user)
    prop.setProperty("password",ms.password)
    prop.setProperty("driver",ms.driver)
    prop.setProperty("charset","UTF-8")

    //用户搜索的过滤表达式(所有用户账号)
    val filter = "sAMAccountName=*"
    //ldap搜索的根目录名(ou)在职员工
    val ouArr = Array("AG","AP","China","EMEA","Motorola")

    //ldap搜索的根目录名(ou)离职员工
    //val ouArr = Array("disabled accounts")
    //遍历ou读取数据并写到MySQL数据库

    //目录树查询参数设置
    val searchControls = new SearchControls
    //设置搜索范围(所有目录树的子树)
    searchControls.setSearchScope(SearchControls.SUBTREE_SCOPE)
    //设置搜索字段
    searchControls.setReturningAttributes(searchWords)

    val baseArr = ArrayBuffer[Map[String, Any]]()
    for(ou <- ouArr){
      //用户目录com/lenovo/User Accounts/+ ou 在职员工
      val baseDir = "OU=" + ou +",OU=User Accounts,DC=lenovo,DC=com"

      //CN=0065lo,OU=disabled accounts,DC=lenovo,DC=com 离职员工
      //val baseDir = "OU=" + ou +",DC=lenovo,DC=com"


      val pageSize = 1000
      var cookie: Array[Byte] = null
      var num = 0

      //循环检索数据页
      try {
        //分页读取请求控制
        ldapContext.setRequestControls(Array[Control](new PagedResultsControl(pageSize,Control.CRITICAL)))
        do {
          val results = ldapContext.search(baseDir, filter, searchControls)
          //循环检索每一页数据
          while (results != null && results.hasMore) {
            num = num + 1
            val attributes = results.next().getAttributes
            if (attributes.size() > 0) {
              val all: NamingEnumeration[_ <: Attribute] = attributes.getAll
              //循环检索每一行数据所有属性
              baseArr += lineForeach(all)
            }
            if(num >= 2000){
              val value = ss.sparkContext
                .parallelize(baseArr)
                .map(m => {
                  val arr = ArrayBuffer[String]()
                  for (word <- searchWords) {arr += m.getOrElse(word,"").toString}
                  arr += ou
                  Row.fromSeq(arr)
                })
              ss.createDataFrame(value,schema)
                .write
                .mode(SaveMode.Append)
                .jdbc(ms.url,"ad_user_ldap",prop)

              //重设参数
              num = 0
              //清空数组
              baseArr.clear()
            }
          }
          cookie = checkCookie(ldapContext.getResponseControls)
          ldapContext.setRequestControls(Array[Control](new PagedResultsControl(pageSize,cookie,Control.CRITICAL)))
        } while (cookie != null && cookie.length != 0)
      } catch {
        case e:Exception => e.printStackTrace()
      }

      val value = ss.sparkContext
        .parallelize(baseArr)
        .map(m => {
          val arr = ArrayBuffer[String]()
          for (word <- searchWords) {arr += m.getOrElse(word,"").toString}
          arr += ou
          Row.fromSeq(arr)
        })
      ss.createDataFrame(value,schema)
        .write
        .mode(SaveMode.Append)
        .jdbc(ms.url,"ad_user_ldap",prop)

      //清空数组
      baseArr.clear()
    }
    //关闭ldap链接
    ldapContext.close()

    ss.stop()
  }
  /**
    * 建立LDAP链接
    * @param user ldap账号
    * @param password ldap账号密码
    * @return LdapContext(LDAP链接)
    */
  def ldapConnect(user: String, password: String):LdapContext = {
    val url = "ldap://lenovo.com"
    val factory = "com.sun.jndi.ldap.LdapCtxFactory"
    val env = new util.Hashtable[String,String]()
    env.put(Context.INITIAL_CONTEXT_FACTORY, factory)
    env.put(Context.SECURITY_AUTHENTICATION, "simple")
    env.put(Context.PROVIDER_URL, url)
    env.put(Context.SECURITY_PRINCIPAL, user)
    env.put(Context.SECURITY_CREDENTIALS, password)
    env.put(Context.BATCHSIZE, "1000")

    var ldapContext: LdapContext = null
    try {
      ldapContext = new InitialLdapContext(env, null)
      println("ldap connect success")
    } catch {
      case _:AuthenticationException => println("ldap认证失败")
      case _:NamingException         => println("ldap参数有误导致连接失败")
    }
    ldapContext
  }

  /**
    * 循环检索每一行数据所有属性
    * @param attrAll ldap属性数组
    * @return
    */
  def lineForeach(attrAll: NamingEnumeration[_ <: Attribute]):Map[String,Any]={
    val sdf_from = new SimpleDateFormat("yyyyMMddhhmmss")
    val sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss")
    var row:Map[String,Any] = Map()
    while (attrAll.hasMore) {
      val attr = attrAll.next()
      val key = attr.getID
      var value = attr.get()
      if ("manager".equals(key)) {
        value = value.toString.split(",")(0).split("=")(1)
      } else if ("whenCreated".equals(key)) {
        value = sdf.format(sdf_from.parse(value.toString.substring(0, 14)))
      } else if ("pwdLastSet".equals(key)) {
        value = timeChange(value.toString, sdf)
      } else if ("accountExpires".equals(key)) {
        if (value.toString.length == 18) value = timeChange(value.toString, sdf)
      } else if ("memberOf".equals(key)) {
        val all = attr.getAll
        var v = ""
        while (all != null && all.hasMore) {
          if ("".equals(value)) {
            v = all.next().toString
          } else {
            v += "|" + all.next().toString
          }
        }
        value = v
      }else if("thumbnailPhoto".equals(key)){
        value = "data:image/jpeg;base64,"+bytesToBase64(value.asInstanceOf[Array[Byte]])
      }
      row += (key -> value.toString)
    }
    row
  }

  /**
    * byte数组转换成十六进制(bytesToHexString)
    * @param b_arr   byte数组
    * @return
    */
  def bytesToBase64(b_arr:Array[Byte]): String ={
    Base64.encodeBase64String(b_arr)
  }
  /**
    * 用于判断分页查询生成的唯一标识cookie,进而判断是否查询完数据
    * @param controls 分页控制cookie
    * @return
    */
  def checkCookie(controls:Array[Control]):Array[Byte] ={
    var cookie: Array[Byte] = null
    if(controls.length != 0) for (c <- controls) {
      if (c.isInstanceOf[PagedResultsResponseControl])
        cookie = c.asInstanceOf[PagedResultsResponseControl].getCookie
    }
    cookie
  }

  /**
    * 时间格式转换:
    *   ldap的时间戳是从1601年1月1日0时起经过的1E-7秒(即100纳秒)的个数(时间是GMT的,中国的北京的时间需要加上8个小时)
    *   例如:以lastLogon、pwdLastSet、accountExpires等属性为代表(输出没有'Z'结尾)
    * @param ldapTime ldap时间戳
    * @param sdf 时间戳格式化格式
    * @return
    */
  def timeChange(ldapTime: String, sdf: SimpleDateFormat):String = {
    val BJTime =  ldapTime.toLong/10000 - 11644473600125L
    sdf.format(new Date(BJTime))
  }
  
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

不会飞的乌龟

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值