sqoop单机版的搭建和测试

sqoop下载地址:
用这个
CDH
软件下载地址

5.3.6
不用考虑各个版本之间的依赖,兼容 不需要重新编译。
        hadoop-2.5.0-cdh5.3.6
        hive-0.13.1-cdh5.3.6
        sqoop-1.4.5-cdh5.3.6
        zookeeper-3.4.5-cdh5.3.6


[beifeng@manager hadoop]$ vim hadoop-env.sh
[beifeng@manager hadoop]$ pwd
/opt/cdh-5.3.6/hadoop-2.5.0-cdh5.3.6/etc/hadoop

export JAVA_HOME=/opt/modules/jdk1.7.0_67
vim yarn-env.sh
export JAVA_HOME=/opt/modules/jdk1.7.0_67
vim mapred-env.sh
export JAVA_HOME=/opt/modules/jdk1.7.0_67

vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://manager:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/cdh-5.3.6/hadoop-2.5.0-cdh5.3.6/data/tmp</value>
</property>
</configuration>


vim hdfs-site.xml
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>manager:50090</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>manager:50070</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>

vim slaves
manager

格式化
bin/hdfs namenode -format
启动
sbin/hadoop-daemon.sh start namenode

vim yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>manager</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>4096</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>4</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>640800</value>
</property>

vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>manager:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>manager:19888</value>
</property>
</configuration>


启动
hdfs
sbin/hadoop-daemon.sh start namenode
sbin/hadoop-daemon.sh start datanode

yarn
sbin/yarn-daemon.sh start resourcemanager
sbin/yarn-daemon.sh start nodemanager

jobhistory
sbin/mr-jobhistory-daemon.sh start historyserver

hive配置
[beifeng@manager conf]$ pwd
/opt/cdh-5.3.6/hive-0.13.1-cdh5.3.6/conf
mv hive-env.sh.template hive-env.sh
vim hive-env.sh
HADOOP_HOME=/opt/cdh-5.3.6/hadoop-2.5.0-cdh5.3.6
export HIVE_CONF_DIR=/opt/cdh-5.3.6/hive-0.13.1-cdh5.3.6/conf
mv hive-log4j.properties.template hive-log4j.properties
vim hive-log4j.properties
hive.log.dir=/opt/cdh-5.3.6/hive-0.13.1-cdh5.3.6/logs

touch hive-site.xml
vim hive-site.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://manager:3306/metadata?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>Mima19870405</value>
</property>
</configuration>

添加
mysql-connector-java-5.1.27-bin.jar
bin/hive

bin/hdfs dfs -mkdir -p /user/hive/warehouse
同组写权限
bin/hdfs dfs -chmod g+w /user/hive/warehouse
测试:
create table student(id int,name string) row format delimited fields terminated by '\t';


sqoop配置
mv sqoop-env-template.sh sqoop-env.sh
[beifeng@manager conf]$ vim sqoop-env.sh

export HADOOP_COMMON_HOME=/opt/cdh-5.3.6/hadoop-2.5.0-cdh5.3.6
export HADOOP_MAPRED_HOME=/opt/cdh-5.3.6/hadoop-2.5.0-cdh5.3.6
export HIVE_HOME=/opt/cdh-5.3.6/hive-0.13.1-cdh5.3.6


CREATE TABLE `my_user` (
`id` tinyint(4) NOT NULL AUTO_INCREMENT,
`account` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`passwd` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=8 DEFAULT CHARSET=latin1;

-- ----------------------------
-- Records of my_user
-- ----------------------------
INSERT INTO `my_user` VALUES ('1', 'admin', 'admin');
INSERT INTO `my_user` VALUES ('2', 'pu', '12345');
INSERT INTO `my_user` VALUES ('3', 'system', 'system');
INSERT INTO `my_user` VALUES ('4', 'zxh', 'zxh');
INSERT INTO `my_user` VALUES ('5', 'test', 'test');
INSERT INTO `my_user` VALUES ('6', 'pudong', 'pudong');
INSERT INTO `my_user` VALUES ('7', 'qiqi', 'qiqi');

复制 mysql-connector-java-5.1.27-bin.jar 到sqoop的lib中
bin/sqoop list-databases \
--connect jdbc:mysql://manager:3306 \
--username root \
--password Mima19870405

bin/sqoop import \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--table my_user


bin/sqoop import \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--table my_user \
--target-dir /user/beifeng/sqoop/imp_my_user \
--num-mappers 1


bin/sqoop import \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--table my_user \
--target-dir /user/beifeng/sqoop/imp_my_user_parquet \
--num-mappers 1 \
--as-parquetfile


在hive中创建表,读取数据
drop table if exists default.hive_user_orc;
create table default.hive_user_orc(
id int,
username string,
password string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
STORED AS parquet;
加载hdfs(/user/beifeng/sqoop/imp_my_user_parquet)上的数据
load data inpath '/user/beifeng/sqoop/imp_my_user_parquet' into table default.hive_user_orc;
查询数据
select * from default.imp_my_user_parquet;
结果为null,为啥? 不知道。。。待解决


只导入id,account两列
bin/sqoop import \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--table my_user \
--target-dir /user/beifeng/sqoop/imp_my_user_column \
--num-mappers 1 \
--columns id,account


data的清洗过滤
bin/sqoop import \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--query 'select id, account from my_user where $CONDITIONS' \
--target-dir /user/beifeng/sqoop/imp_my_user_query \
--num-mappers 1 


压缩配置 import hdfs : compress
bin/sqoop import \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--table my_user \
--target-dir /user/beifeng/sqoop/imp_my_user_sna2 \
--delete-target-dir \
--num-mappers 1 \
--compress \
--compression-codec org.apache.hadoop.io.compress.SnappyCodec




执行失败。因为不支持snappy bin/hadoop checknative
替换重新编译的native包,重新执行。成功。




centos编译hadoop源码
安装maven
wget http://mirrors.hust.edu.cn/apache/maven/maven-3/3.1.1/binaries/apache-maven-3.1.1-bin.tar.gz
tar zxf apache-maven-3.1.1-bin.tar.gz
在/etc/profile配置maven
#MAVEN_HOME
export M2_HOME=/opt/apache-maven-3.1.1
export PATH=$PATH:$M2_HOME/bin


source /etc/profile
查看版本
mvn -v
-----------------------------------------------------------




load data inpath '/user/beifeng/sqoop/imp_my_user_snappy' into table default.hive_user_snappy;


流程
mysql table
->
hdfs(snappy)
->
hive create table
->
load data into table
->
query



导出 >>>>export mysql table


touch /opt/datas/user.txt
vim /opt/datas/user.txt
8,beifeng,beifeng
9,xuanyun,xuanyun


bin/hdfs dfs -mkdir -p /user/beifeng/sqoop/exp/user/
bin/hdfs dfs -put /opt/datas/user.txt /user/beifeng/sqoop/exp/user/


bin/sqoop export \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--table my_user \
--export-dir /user/beifeng/sqoop/exp/user/ \
--num-mappers 1


Hive数据存储到hdfs上
use default;
drop table if exists user_hive;
create table user_hive(
id int,
account string,
password string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


bin/sqoop import \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--table my_user \
--fields-terminated-by '\t' \
--delete-target-dir \
--num-mappers 1 \
--hive-import \
--hive-database default \
--hive-table user_hive






Hive中数据导出到dbms(mysq)
create table my_user2(
id tinyint(4) NOT NULL AUTO_INCREMENT,
account varchar(255) DEFAULT NULL,
passwd varchar(255) DEFAULT NULL,
PRIMARY KEY(id)
);




bin/sqoop export \
--connect jdbc:mysql://manager:3306/metadata \
--username root \
--password Mima19870405 \
--table my_user2 \
--export-dir /user/hive/warehouse/user_hive \
--num-mappers 1 \
--input-fields-terminated-by '\t'
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值