3、Hive安装配置

3.1安装MySQL

datanode5上安装MySQL

# yum -y installmysql-server mysql

# mysql

mysql> grant all privileges on *.* tohive@'10.40.214.%' identified by "hive";

mysql> flush privileges;

3.2安装hive

# tar -zxf apache-hive-0.13.1-bin.tar.gz-C /var/data/; mv /var/data/apache-hive-0.13.1 /var/data/hive

# cd /var/data/hive

# vimbin/hive-config.sh    ##在脚本开头添加下面的内容

exportJAVA_HOME=/usr/java/jdk1.7.0_71

exportHIVE_HOME=/var/data/hive

exportHADOOP_HOME=/var/data/Hadoop

# vim hive-site.xml     ##配置hive

         <configuration>

                   <property>

                         <name>hive.exec.scratchdir</name>

                         <value>/tmp/hive-${user.name}</value>

                         <description>Scratchspace for Hive jobs</description>

</property>

<property>

                           <name>hive.exec.local.scratchdir</name>

                         <value>/tmp/${user.name}</value>

                         <description>Localscratch space for Hive jobs</description>

</property>

<property>

 <name>javax.jdo.option.ConnectionURL</name>

 <value>jdbc:mysql://10.40.214.9:3306/hive?createDatabaseIfNotExist=true</value>

  <description>JDBCconnect string for a JDBC metastore</description>

</property>

<property>

 <name>javax.jdo.option.ConnectionDriverName</name>

 <value>org.apache.derby.jdbc.EmbeddedDriver</value>

  <description>Driverclass name for a JDBC metastore</description>

</property>

<property>

 <name>javax.jdo.PersistenceManagerFactoryClass</name>

 <value>org.datanucleus.api.jdo.JDOPersistenceManagerFactory</value>

  <description>classimplementing the jdo persistence</description>

</property>

<property>

  <name>javax.jdo.option.DetachAllOnCommit</name>

 <value>true</value>

  <description>detachesall objects from session so that they can be used after transaction iscommitted</description>

</property>

<property>

 <name>javax.jdo.option.NonTransactionalRead</name>

 <value>true</value>

  <description>readsoutside of transactions</description>

</property>

<property>

 <name>javax.jdo.option.ConnectionUserName</name>

 <value>hive</value>

  <description>usernameto use against metastore database</description>

</property>

<property>

 <name>javax.jdo.option.ConnectionPassword</name>

 <value>hive</value>

  <description>passwordto use against metastore database</description>

</property>

<property>

 <name>hive.metastore.warehouse.dir</name>

  <value>/user/hive/warehouse</value>

  <description>locationof default database for the warehouse</description>

</property>

         </configuration>

# cpmysql-connector-java-5.1.18-bin.jar /var/data/hive/lib/     ##加入MySQL java驱动jar

# vim /etc/profile    ##配置hive环境变量

         #hivepath

exportHIVE_HOME=/var/data/hive

exportPATH=$PATH:$HIVE_HOME/bin

# source /etc/profile

# hive      ##启动hive

3.3hive的使用

##创建库

# hive -e "createdatabase maillog;"

##创建表

# hive -e"create table maillog.izhenxin(mail_time string, message_id string,mail_to string, mail_domain string, mail_relay string, mail_delay string,mail_delays string,mail_dsn string, mail_status string) ROW FORMAT DELIMITEDFIELDS TERMINATED BY '|' STORED AS TEXTFILE;"

##删除表

# hive -e "droptable maillog.izhenxin;"

##导入数据到hive表中

# hive -e "LOADDATA LOCAL INPATH '/opt/zhangdh/to_result.txt' OVERWRITE INTO TABLEmaillog.izhenxin;"

##简单的数据查询

# hive -e "usemaillog;select * from izhenxin_total limit 10;"

##hive统计数据,会执行mapreduce过程

# hive -e"select mail_domain,sum(case when mail_status='sent' then 1 else 0 end )sent,sum(case when mail_status='bounced' then 1 else 0 end ) bounced,sum(casewhen mail_status='deferred' then 1 else 0 end ) deferred from maillog.izhenxingroup by mail_domain order by sent desc;"

##hive表数据的查询结果,添加到一个新表中

# hive -e"create table maillog.izhenxin_total(mail_domain string, sent_number int,bounced_number int, deferred int) ROW FORMAT DELIMITED FIELDS TERMINATED BY'\t' STORED AS TEXTFILE;"

# hive -e "usemaillog; insert into table izhenxin_total select mail_domain,sum(case whenmail_status='sent' then 1 else 0 end ) sent,sum(case when mail_status='bounced'then 1 else 0 end ) bounced,sum(case when mail_status='deferred' then 1 else 0end ) deferred from maillog.izhenxin group by mail_domain order by sentdesc;"