1.生成本地主机秘钥进行配置
2.安装Kerberos 客户端
ubuntu:
centos:
yum install krb5-user libpam-krb5 libpam-ccreds auth-client-config
yum install krb5-workstation
2.配置本地的hostname-ip 在远程集群livy,Kerberos集群,在本地hosts文件中配置远程集群(Kerberos),livy的hostname ip映射关系
2.下载远程集群的etc/krb5.conf 文件
[libdefaults]
renew_lifetime = 7d
forwardable = true
default_realm = GAI.COM
ticket_lifetime = 24h
dns_lookup_realm = false
dns_lookup_kdc = false
default_ccache_name = /tmp/krb5cc_%{uid}
#default_tgs_enctypes = aes des3-cbc-sha1 rc4 des-cbc-md5
#default_tkt_enctypes = aes des3-cbc-sha1 rc4 des-cbc-md5[domain_realm]
gai.com = EXAPMLE.COM[logging]
default = FILE:/var/log/krb5kdc.log
admin_server = FILE:/var/log/kadmind.log
kdc = FILE:/var/log/krb5kdc.log[realms]
EXAPMLE.COM = {
admin_server = kdc所在主机的hostname或ip
kdc = kdc所在主机的hostname或ip
}
2.配置jaas文件
vim jaas.conf:
Client {
com.sun.security.auth.module.Krb5LoginModule required
storeKey=true
debug=true
useTicketCache=false
refreshKrb5Config=true
useKeyTab=true# keytab所在目录,如果文件不支持注释,配置后删除注释
keyTab="/home/jerry/keytab/dp.jerry.keytab"# principal
principal="dp/jerry@EXAPMLE.COM";
};
3.下载maven(gradle)依赖
<dependencies> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.4</version> </dependency> <!-- https://mvnrepository.com/artifactr/net.sourceforge.spnego/spnego --> <dependency> <groupId>net.sourceforge.spnego</groupId> <artifactId>spnego</artifactId> <version>7.0</version> </dependency> <dependency> <groupId>com.cloudera.livy</groupId> <artifactId>livy-client-http</artifactId> <version>0.2.0</version> </dependency>
4.编码:配置
a.进行livy访问的远程工具类
KerberosUtil.java
package com.myd.cn.livy; import net.sourceforge.spnego.SpnegoHttpURLConnection; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.Map; public class KBHttpUtils { /** * HttpGET请求 * @param url * @param headers * @return */ public static String getAccess(String url, Map<String,String> headers) { StringBuilder sb = new StringBuilder(); ByteArrayOutputStream bos = new ByteArrayOutputStream(); InputStream in = null; try { final SpnegoHttpURLConnection spnego = new SpnegoHttpURLConnection("Client"); spnego.setRequestMethod("GET"); if(headers != null && headers.size() > 0){ //headers.forEach((K,V)->spnego.setRequestProperty(K,V)); for (String s:headers.keySet()) { spnego.setRequestProperty(s,headers.get(s)); } } spnego.connect(new URL(url),bos); in = spnego.getInputStream(); byte[] b = new byte[1024]; int len ; while ((len = in.read(b)) > 0) { sb.append(new String(b, 0, len)); } }catch (Exception e){ e.printStackTrace(); }finally { if (in != null) { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } if (bos != null) { try { bos.close(); } catch (IOException e) { e.printStackTrace(); } } } System.out.println("Result:" + sb.toString()); return sb.toString(); } /** * HttpDelete请求 * @param url * @param headers * @return */ public static String deleteAccess(String url, Map<String,String> headers) { StringBuilder sb = new StringBuilder(); ByteArrayOutputStream bos = new ByteArrayOutputStream(); InputStream in = null; try { final SpnegoHttpURLConnection spnego = new SpnegoHttpURLConnection("Client"); spnego.setRequestMethod("DELETE"); if(headers != null && headers.size() > 0){ //headers.forEach((K,V)->spnego.setRequestProperty(K,V)); for (String s:headers.keySet()) { spnego.setRequestProperty(s,headers.get(s)); } } spnego.connect(new URL(url),bos); in = spnego.getInputStream(); byte[] b = new byte[1024]; int len ; while ((len = in.read(b)) > 0) { sb.append(new String(b, 0, len)); } }catch (Exception e){ e.printStackTrace(); }finally { if (in != null) { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } if (bos != null) { try { bos.close(); } catch (IOException e) { e.printStackTrace(); } } } System.out.println("Result:" + sb.toString()); return sb.toString(); } /** * HttpPost请求 * @param url * @param headers * @param data * @return */ public static String postAccess(String url, Map<String,String> headers, String data) { StringBuilder sb = new StringBuilder(); ByteArrayOutputStream bos = new ByteArrayOutputStream(); InputStream in = null; try { final SpnegoHttpURLConnection spnego = new SpnegoHttpURLConnection("Client"); spnego.setRequestMethod("POST"); if(headers != null && headers.size() > 0){ //headers.forEach((K,V)->spnego.setRequestProperty(K,V)); for (String s:headers.keySet()) { spnego.setRequestProperty(s,headers.get(s)); } } if(data != null){ bos.write(data.getBytes()); } System.out.println(""+url+"\t"+bos); spnego.connect(new URL(url),bos); System.out.println("Kerberos data:"+data); System.out.println("HTTP Status Code: " + spnego.getResponseCode()); System.out.println("HTTP Status Message: "+ spnego.getResponseMessage()); in = spnego.getInputStream(); byte[] b = new byte[1024]; int len ; while ((len = in.read(b)) > 0) { sb.append(new String(b, 0, len)); } }catch (Exception e){ e.printStackTrace(); }finally { if (in != null) { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } if (bos != null) { try { bos.close(); } catch (IOException e) { e.printStackTrace(); } } } System.out.println("Result:" + sb.toString()); return sb.toString(); } }
b.进行远程作业提交的类
LivySubmitClient.java
package com.myd.cn.livy; import java.util.HashMap; import java.util.UUID; public class LivySubmitClient { //livy 本地机器 //private static String LIVY_HOST = "http://jerry:8998"; //16 上14 livy private static String LIVY_HOST = "http://10.111.32.14:8998"; public static void main(String[] args) { /** * livy 相关环境变量 */ System.setProperty("java.security.krb5.conf", "/etc/krb5.conf"); System.setProperty("javax.security.auth.useSubjectCredsOnly", "true"); //16 客户端访问, System.setProperty("java.security.auth.login.config", "/home/jerry/14keytab/jaas.conf"); //代理操作远程hdfs集群的用户(读写hdfs集群) System.setProperty("HADOOP_USER_NAME", "dp"); /** * 提交livy 所需参数 */ HashMap<String, String> headers = new HashMap<>(); headers.put("Content-Type", "application/json"); headers.put("Accept", "application/json"); headers.put("X-Requested-By", "dp"); String uuid = UUID.randomUUID().toString(); //******************************************************************************* 以下为批处理 <<<< 根据需求(交互式访问,批量提交)进行解注代码 >>>>>>******************************************************************************* //pyspark //String submitJob = "{\"kind\": \"pyspark\",\"file\": \"/yhx/gai_platform/gai_feature_project/data_preprocessing_dataframe/MissingValueFill.py\",\"name\": \"Kerberos-" +uuid+"\",\"proxyUser\": \"dp\",\"executorMemory\": \"2g\",\"pyFiles\": \"/yhx/gai_platform.zip\"}"; //"pyFiles": ["hdfs://10.111.32.184:8020/test/mypi.py"], "name": "PythonPi","file":"hdfs://10.111.32.184:8020/test/mypi.py", // String submitJob = "{\"conf\": {\"spark.master\":\"yarn-cluster\",\"spark.yarn.appMasterEnv.PYSPARK_PYTHON\":\"./py3all/py3all/bin/python\"},\"file\":\"hdfs://10.111.32.184:8020/test/mypi.py\", \"pyFiles\": [\"hdfs://10.111.32.184:8020/test/mypi.py\"], \"name\": \"PythonPi\", \"executorCores\":4, \"executorMemory\":\"2048m\", \"driverCores\":4,\"driverMemory\":\"2048m\", \"queue\":\"default\",\"archives\":[\"hdfs://10.111.32.184:8020/test/py3all.zip\"]}"; //String submitJob = "{\"conf\": {\"spark.master\":\"yarn-cluster\"},\"file\":\"hdfs://10.111.32.184:8020/test/mypi.py\", \"pyFiles\": [\"hdfs://10.111.32.184:8020/test/mypi.py\"], \"name\": \"PythonPi\", \"executorCores\":4, \"executorMemory\":\"2048m\", \"driverCores\":4,\"driverMemory\":\"2048m\", \"queue\":\"default\",\"archives\":[\"hdfs://10.111.32.184:8020/test/py3all.zip\"]}"; //封装提交Spark作业的JSON数据 java/scala batches 批处理 //String submitJob = "{\"className\": \"Test.TestSpark\",\"name\": \"Kerberos-" +uuid+"\",\"proxyUser\": \"dp\",\"executorMemory\": \"1g\",\"file\": \"/yhx/LogProcess-1.0-SNAPSHOT-all.jar\"}"; //String submitJob = "{\"className\": \"org.apache.spark.examples.SparkPi\",\"name\": \"jerrrylivy\",\"proxyUser\": \"dp\",\"executorMemory\": \"1g\",\"args\": [200],\"file\": \"/yhx/spark-examples_2.11-2.3.0.jar\"}"; //向集群提交Spark作业 批处理作业 //KBHttpUtils.postAccess(LIVY_HOST + "/batches", headers, submitJob); /** * 通过提交作业返回的SessionID获取具体作业的执行状态及APPID */ // KBHttpUtils.getAccess(LIVY_HOST + "/batches/4", headers); //******************************************************************************* 以上为批处理 ******************************************************************************* //******************************************************************************* jerry 交互式会话 ******************************************************************************* //1.产生交互式会话 String submitJob = "{ \"conf\": {\"master\":\"yarn-cluster\"," + "\"spark.yarn.appMasterEnv.PYSPARK_PYTHON\":\"./py3all/py3all/bin/python\"}," + " \"pyFiles\": [\"/test/mypi.py\"], " + " \"kind\": \"pyspark\", " + "\"name\": \"Python Livy Pi Example 64Node\", " + "\"executorCores\":4, \"executorMemory\":\"2048m\", " + "\"driverCores\":4, \"driverMemory\":\"2048m\", " + "\"queue\":\"default\",\"archives\":[\"hdfs:///test/py3all.zip#py3all\"]}"; KBHttpUtils.postAccess(LIVY_HOST + "/sessions/", headers, submitJob); //2.交互式会话产生,提交任务 // String pData = "{ \"code\": \"" + // "import pandas as pd \\r\\n" + // "print(sc.parallelize([1,2]).count())" + // "\"" + // "}"; // // KBHttpUtils.postAccess(LIVY_HOST + "/sessions/13/statements", headers, pData); //删除某个sessions KBHttpUtils.deleteAccess(LIVY_HOST + "/sessions/18", headers); } }