目录
vim /usr/local/hbase/conf/hbase-site.xml
vim /usr/local/hbase/conf/hbase-env.sh
vim /usr/local/hbase/conf/hbase-site.xml
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://localhost:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.wal.provider</name>
<value>filesystem</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
</configuration>
vim /usr/local/hbase/conf/hbase-env.sh
export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_371
export HBASE_CLASSPATH=/usr/local/hbase/conf
export HBASE_MANAGES_ZK=true
SLF4J-jar包多绑定冲突问题:
hadoop的slf4j 与hbase的slf4j jar包发生了冲突,移除其中一个即可
hbase配置spark
cd /usr/local/spark/jars
mkdir hbase
cd hbase
cp /usr/local/hbase/lib/hbase*.jar ./
cp /usr/local/hbase/lib/guava-12.0.1.jar ./
cp /usr/local/hbase/lib/htrace-core-3.1.0-incubating.jar ./
cp /usr/local/hbase/lib/protobuf-java-2.5.0.jar ./
mv ~/下载/spark-examples* /usr/local/spark/jars/hbase/
cd /usr/local/spark/conf
vim spark-env.sh
export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath):$(/usr/local/hbase/bin/hbase classpath):/usr/local/spark/jars/hbase/*
pyspark编写程序写入HBase数据
host = 'localhost'
table = 'student'
keyConv = "org.apache.spark.examples.pythonconverters.StringToImmutableBytesWritableConverter"
valueConv = "org.apache.spark.examples.pythonconverters.StringListToPutConverter"
conf = {"hbase.zookeeper.quorum": host,"hbase.mapred.outputtable": table,"mapreduce.outputformat.class": "org.apache.hadoop.hbase.mapreduce.TableOutputFormat","mapreduce.job.output.key.class": "org.apache.hadoop.hbase.io.ImmutableBytesWritable","mapreduce.job.output.value.class": "org.apache.hadoop.io.Writable"}
rawData = ['3,info,name,Rongcheng','4,info,name,Guanhua']
sc.parallelize(rawData).map(lambda x: (x[0],x.split(','))).saveAsNewAPIHadoopDataset(conf=conf,keyConverter=keyConv,valueConverter=valueConv)
运行结果:
pyspark.sql.utils.IllegalArgumentException: 'Can not create a Path from a null string'
常用命令行
hbase
cd /usr/local/hadoop
./sbin/start-dfs.sh
cd /usr/local/hbase
bin/start-hbase.sh
bin/hbase shell
exit
hbase-daemon.sh stop master
bin/stop-hbase.sh
spark
cd /usr/local/spark
bin/pyspark
exit()
cd /usr/local/spark/bin
/usr/local/spark/bin/spark-submit /