[免验证]# service mysqld stop # mysqld_safe –skip-grant-tables &# mysql –uroot –p[初始密码]
初始密码:
# grep 'temporary password' /var/log/mysqld.log[root@jetflow205 mysql]# grep 'temporary password' /var/log/mysqld.log
2019-04-01T08:50:00.799172Z 1 [Note] A temporary password is generated for root@localhost: 1q+6p9OiO))p #密码为 1q+6p9OiO))p[修改密码]
mysql> use mysql;
mysql> update user set password=PASSWORD("root")where user="root";
mysql> flush privileges;
mysql> quit
[重启/检查]# service mysqld restart # mysql -uroot –proot[建库/用户]
mysql> create database hive;
mysql> GRANT ALL PRIVILEGES ON *.* TO hive@'10.47.85.%' IDENTIFIED BY 'hive' WITH GRANT OPTION;
mysql> flush privileges;
【报错】
[解决方案][由于mysql 默认的mysql.sock 是在/var/lib/mysql/mysql.sock,但linux系统总是去/tmp/mysql.sock查找]# mysql --socket=/var/lib/mysql/mysql.sock [临时]# ln -s /var/lib/mysql/mysql.sock /tmp/mysql.sock [永久]
【安装hive】
[解压]# mkdir /home/hadoop/hive# mv apache-hive-2.3.2-bin.tar.gz /home/hadoop/hive# tar -zxvf apache-hive-2.3.2-bin.tar.gz[修改配置]# cd /home/hadoop/hive/conf# cp hive-env.sh.template hive-env.sh# vi hive-env.sh
添加配置:
export JAVA_HOME=/home/java
export HIVE_HOME=/home/hive
export HADOOP_HOME=/home/hadoop
export HIVE_CONF_DIR=/home/hive/conf
# vi hive-site.xml
新增文件/配置:
<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?><configuration><property><name>javax.jdo.option.ConnectionURL</name><value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value><description>JDBC connect string for a JDBC metastore </description></property><property><name>javax.jdo.option.ConnectionDriverName</name><value>com.mysql.jdbc.Driver</value></property><property><name>javax.jdo.option.ConnectionUserName</name><value>root</value></property><property><name>javax.jdo.option.ConnectionPassword</name><value>123456</value></property><property><name>hive.querylog.location</name><value>/home/hadoop/hive/tmp</value></property><property><name>hive.exec.local.scratchdir</name><value>/home/hadoop/hive/tmp</value></property><property><name>hive.downloaded.resources.dir</name><value>/home/hadoop/hive/tmp</value></property><property><name>datanucleus.schema.autoCreateAll</name><value>true</value></property></configuration>[MYSQL-JDBC驱动]
文件上传到:/home/hadoop/hive/lib/mysql-connector-java-5.1.32.jar
[环境变量]export HIVE_HOME=/home/hadoop/hive
export HIVE_CONF_DIR=$HIVE_HOME/conf
export PATH=$PATH:$HIVE_HOME/bin
[初始化]
首次执行初始化:
hive的bin目录下执行schematool脚本 :
[ ./schematool -dbType mysql -initSchema ]
查看初始化信息:
schematool -dbType mysql -info
【远程服务启动】【不需要】
# cd /home/hadoop/hive/bin# nohup hive --service hiveserver2 [调试模式][定位环境问题使用]
./hive -hiveconf hive.root.logger=DEBUG,console [非调试模式不加参数即可]
【hive建表】
[多字段]
create table sqldata_100m
(id int,
name string,
age int,
department string,
job_number bigint,
time date,
on_the_job boolean
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
location '/hive';[单字段]
create table S4M (line string) ROW FORMAT DELIMITED location '/flume';
【数据导入】
hadoop fs -put SQLData_100M.txt /hive/sqldata_100m
【查询SQL】
hive -e 'select * from sqldata_100m limit 10'
hive -e "use source;select * from sqldata_100m where id<10 order by id;">user_base.txt
【对比表数据】
select count(*) from sqldata_300m a left outer join sqldata_400m b on a.line=b.line where b.line is null; join左为样本数据,右为yita数据;
select a.* from sqldata_300m a left outer join sqldata_400m b on a.line=b.line where b.line is null;