一、kettle安装
下载kettle文件到本地
在win下面直接解压解压目录如下:
打开界面化管理工具Sqoop.bat:
二、json的使用
①加载单个文件
②加载文件夹指定后缀文件(注正则表达式写法)
③加载流中指定字段
二、文本文档加载
三、调用jar方法:
调用jar主要可以用以下两种方法(java和javaScript)
①java调用
1、准备jar包
①代码如下(scala):
package scalaApp import java.net.URL import java.security.PrivilegedExceptionAction import java.util.Properties import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.sql.SparkSession object App { def main(args: Array[String]): Unit = { testSpark // authKrb5ToHive // createFile // proxyUser } def testSpark(): Unit = { val spark = SparkSession.builder().appName("MysqlQueryDemo").master("yarn").getOrCreate() val jdbcDF = spark.read .format("jdbc") .option("url", "jdbc:mysql://localhost:3306/bigdata?useUnicode=true&characterEncoding=utf-8") .option("dbtable", "test") .option("user", "root") .option("password", "xxxx") .option("driver", "com.mysql.jdbc.Driver") .load() jdbcDF.createOrReplaceTempView("testm") jdbcDF.write .format("jdbc") .option("url", "jdbc:mysql://localhost:3306/bigdata?useUnicode=true&characterEncoding=utf-8") .option("dbtable", "test") .option("user", "root") .option("password", "xxxx") .option("driver", "com.mysql.jdbc.Driver").mode("append").save() } def authKrb5ToHive() { val conf: Configuration = new Configuration() val keytab: URL = this.getClass.getClassLoader.getResource("keytab/eflow.keytab") val keytabFullPath = keytab.getFile UserGroupInformation.setConfiguration(conf) println("验证开始") UserGroupInformation.loginUserFromKeytab("eflow@SWAD.COM", keytabFullPath) println("结束") } def createFile(): Unit = { val spark = SparkSession.builder().appName("MysqlQueryDemo").master("local").getOrCreate() val conf = spark.sparkContext.hadoopConfiguration val hdfs = org.apache.hadoop.fs.FileSystem.get(conf) val hdfspath = new Path("/apps") val f = hdfs.exists(hdfspath) print(f) } def proxyUser(): Unit = { authKrb5ToHive val conf: Configuration = new Configuration() // 创建superUser用户 val superUser = UserGroupInformation.getCurrentUser(); //创建proxyUser用户 val proxyUser = UserGroupInformation.createProxyUser("wangwwwww", superUser); // 使用proxyUser用户访问集群 proxyUser.doAs(new PrivilegedExceptionAction[Boolean]() { @Override def run(): Boolean = { val fs = FileSystem.get(conf) val t = fs.exists(new Path("/tmp/eflow_test/abc2")) val y=fs.mkdirs(new Path("/tmp/abc2222")) print(t+".............t") print(y+".............y") return t } }); } } ②pom如下
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>sparkdemo</groupId> <artifactId>sparkdemo</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <properties> <scala.version>2.11.12</scala.version> </properties> <dependencies> <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.2.0</version> <scope>provided</scope> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.2.0</version> <scope>provided</scope> </dependency> <dependency> <groupId>com.jcraft</groupId> <artifactId>jsch</artifactId> <version>0.1.53</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>${scala.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-compiler</artifactId> <version>${scala.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-reflect</artifactId> <version>${scala.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.23</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>2.3.2</version> <configuration> <verbose>true</verbose> <source>1.8</source> <target>1.8</target> <encoding>UTF-8</encoding> </configuration> </plugin> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <version>2.15.2</version> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> <plugin> <!--mvn package assembly:assembly--> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-assembly-plugin</artifactId> <version>2.6</version> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> <archive> <manifest> <addClasspath>true</addClasspath> <mainClass>scalaApp.App</mainClass> </manifest> </archive> </configuration> </plugin> </plugins> </build> </project>
③将代码打包打包命令如下
mvn package assembly:assembly
④ 使用kettle 调用(注意打包时一定将依赖打全)
1、将打包好的java代码放到kettle的lib里启动kettle
2、
注意:
//其中scalaApp.App()为到类名的引入,testSpark()为需要调用的方法名。
var re = new Packages.scalaApp.App();
var FamilyNames = re.testSpark();
代码参考下载:https://download.csdn.net/download/qingshu0400/11852022