接着1上篇:https://my.oschina.net/u/988386/blog/802045
接着2上篇:https://my.oschina.net/u/988386/blog/802052
接着3上篇:https://my.oschina.net/u/988386/blog/802063
接着4上篇:https://my.oschina.net/u/988386/blog/802073
Eclipse远程调试。
环境信息 win10+eclipse neon+maven
windows下要winutils.exe文件,在hadoop-common-bin中
下载地址(不是我传的,要分和我没关系):
(http://download.csdn.net/detail/speedgoddeer/9347013)
- 服务器上hbase-site.xml 文件要复制一份到eclipse的src目录。
eclipse 调试 会出现各种 jar包版本问题,注意查看log信息。
调试代码如下:仅供参考
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.util.Base64;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
public class SparkHBaseDemo {
private static final String master = "spark://d155:7077";
//hbase解压目录下的lib
private static final String HBASE_PATH = "F:\\BACKUP_SOFTWARE\\hadoop\\hbase\\hbase-1.1.7\\lib\\";
public static String[] sparkJars() {
List<String> list = new ArrayList<String>();
list.add(HBASE_PATH+"zookeeper-3.4.6.jar");
list.add(HBASE_PATH+"hbase-common-1.1.7.jar");
list.add(HBASE_PATH+"hbase-client-1.1.7.jar");
list.add(HBASE_PATH+"hbase-server-1.1.7.jar");
list.add(HBASE_PATH+"hbase-protocol-1.1.7.jar");
list.add(HBASE_PATH+"htrace-core-3.1.0-incubating.jar");
//当前程序jar
list.add("D:\\WIBZPLACE7\\hdp\\target\\hdp-0.0.1-SNAPSHOT.jar");
String[] array =new String[list.size()];
list.toArray(array);
return array;
}
public static void debug() {
System.setProperty("hadoop.home.dir", "E:\\DEVELOP\\SOFTWARE\\hadoop-common-bin");
SparkConf sparkConf = new SparkConf();
sparkConf.setAppName("HBASE").setMaster(master);
sparkConf.setJars(sparkJars());
JavaSparkContext sc = new JavaSparkContext(sparkConf);
Configuration conf = HBaseConfiguration.create();
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("liezu1"));
try {
String tableName = "tb_demo1";
conf.set(TableInputFormat.INPUT_TABLE, tableName);
ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
String ScanToString = Base64.encodeBytes(proto.toByteArray());
conf.set(TableInputFormat.SCAN, ScanToString);
JavaPairRDD<ImmutableBytesWritable, Result> myRDD = sc.newAPIHadoopRDD(conf, TableInputFormat.class,
ImmutableBytesWritable.class, Result.class);
System.out.println(myRDD.count());
} catch (Exception ex) {
ex.printStackTrace();
} finally {
sc.close();
}
}
public static void main(String[] args) {
debug();
}
}
- pom.xml文件
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>net.ibizsys.hdp</groupId>
<artifactId>hdp</artifactId>
<version>0.0.1-SNAPSHOT</version>
<repositories>
<repository>
<id>cloudera-repo-releases</id>
<name>Spark-Support</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
</repositories>
<properties>
<jackson.version>2.6.7</jackson.version>
<scala.binary.version>2.11</scala.binary.version>
<spark.version>2.0.2</spark.version>
<scala.lang.version>2.11.8</scala.lang.version>
<hadoop.version>2.7.3</hadoop.version>
<hbase.version>1.2.4</hbase.version>
</properties>
<dependencies>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.0.29.Final</version>
</dependency>
<!-- <dependency> <groupId>io.netty</groupId> <artifactId>netty</artifactId>
</dependency> -->
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.7</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.lang.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.0.2</version>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.0.2</version>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>