http://www.powerxing.com/hadoop-build-project-using-eclipse/ /*第四章 HiveQL:数据定义*/ /*4.1 Hive中的数据库*/ create database financials; create database if not exists financials; show databases; create database humnan_resources; show databases; show databases Like 'h.*'; Create databases financials Location '/my/preferred/directory'; Create Database financials Comment 'hold all financials tables '; Describe Database financials; Create Database financials With dbproperties ('creator'='Mark Moneybags','Date'='2012-01-02'); describe Database financials ; describe Database extended financials ; User financials ; Set hive.cli.print.current.db=True; Drop Database If Exists financials ; Drop Database Is Exists financials Cascade; /*4.2 修改数据库*/ Alter Database financials Set dbproperties ('edited-By'='Joe Dba'); /*4.3 创建表*/ Create Table If Not Exists mydb.employess ( Name String Comment 'Employess name', salary Float Comment 'Emplyess salary', subordinates Array<String> Comment 'Name of subordinates ', deductions Map<String,Float> Comment 'keys are deductions name, vlues are percentages', address struct <street:String,city:String,state:String,zip:Int> Comment 'home address') Comment 'Description of the table' tblproperties('creator'='me','created_at'='2012-01-02 10:00:00',...) Location 'user/hive/warehouse/mydb.db/employess'; Create Table If Not Exists mydb.employess2 Like mydb.employees ; /* 4.3.2 外部表*/ Create External Table If Not Exists stocks ( Exchange String , symbol String , ymd String, price_open Float; price_high Float, price_loe Float, price_close Float, volume Int , price_adj_close Float) Row pormat delimited fields terminated By ',' Location '/data/stocks;' Create External Table If Not Exists mydb.employees3 Like mybd.employees Location '/path/to/data'; /* 4.4 分区表、管理表*/ Create Table employess ( Name String , salary Float , suborinates Array<String>, deductions Map<String ,Float>, address struct <street:String,city:String,stats:String,zip:Int>) Partitioned By (country String ,stats String) ; Set hive.mapred.mode=strict; Select e.name ,e.salary From employees e Limit 100; Set hive.mapred.mode=nostrict; Select e.name ,e.salary From employees e Limit 100; show Partitions employees ; show Partitions enployees Partition (country ='US'); show Partitions employees Partition (country='US',state='AK'); describe extended employee; load Data Local inpath '${env:HOME}/california-employee' Into Table employees Partition (country ='US',state='CA') /*4.4.1 外部分区表*/ Create External Table If Not Exists log_messages ( hms Int , severity String , server String , proess_id Int , message String ) partitioned By (Year Int ,Month Int ,Day Int ) Row format delimited fields terminated By '\t'; Alter Table log_messages Add Partition (Year=212,Month=1,Day=2) Location 'hdfs://master_server/data/log_messages/2012/01/02'; /*4.4.2 自定义表的存储格式*/ Create Table employees ( Name String, salary Float, subordinates Array<String>, deductions Map<String,Float> address strunct<street:String ,city:String,state:String,zip:Int>) Row format delimited fields terminated By '\001' collection items terminated By '\002' Map keys terminated By '\003' lines terminated By '\n' stored As textfile; /*4.5 删除表*/ Drop Table If Exists employess ; /*4.6 修改表*/ /* 4.6.1 表重命名*/ Alter Table log_messages Rename To logmsgs ; /*4.6.2 增加,修改和删除表分区*/ Alter Table log_message Add If Not Exists Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/01' Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/02' Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/02'; Alter Table log_messages partition(Year=2011,Month=12,Day=2) Set Location 's3n://outbuncket/logs/2011/01/02'; Alter Table log_messages Drop If Exists Partition (Year=2011,Month=12,Day=2); /*4.6 3 修改列信息*/ Alter Table log_messages Change Column hms hours_minutes_seconds Int Comment 'The hours , minutes ,and seconds part of the timestamp' After severity; /*4.6 4 增加列*/ Alter Table log_messages Add Columns (app_name String Comment 'Application name ', seesion_id Long Comment 'then current seeion id '); /*4.6.5 删除后者替换列*/ Alter Table log_messages Replace Column ( hour_mins_sces Int Comment 'hour,minute,seconds from timestamp', severity String Comment 'then message severity', mssage String Comment 'then rest of the message ' /*4.6.6 修改表属性*/ Alter Table log_messags Set tblproperties ( 'notes'= 'the process id is no longer captured ;this column is always null '); /*4.6.6 修改存储属性*/ Alter Table log_messages Partition (Year=2012,Month=1,Day=1) Set fileformat sequencefile; Alter Table table_using_json_storage Set serde 'com.example.JOSNSerDe' whit serdeproperties ( 'prop1'='value1', 'prop2'='value1'); Alter Table table_using_json_storage Set serdeproperties whit serdeproperties ( 'prop3'='value3', 'prop4'='value4'); Alter Table stocks clusered By (Exchange ,symbol) sorted (symbol) Into 48 buckets ; /*4.6.8 众多的修改表语句 */ Alter Table log_messages touch Partition (Year=2012,Month =1 ,Day =1); hive -e 'Alter Table log_messages touch Partition (Year=2012,Month =1 ,Day =1)'; /*第五章 HiveQL:数据操作*/ /*5.1 向管理表中转载数据*/ load Data Local inpath '${env:home}/california-employess' overwrite Into Table employess Partition (country='USA',state='CA'); /*5.2 通过查询语句想表中插入数据*/ Insert overwrite Table employess Partition (country='US',state='OR'); Select * From staged_employess se Where se.cnty='US' And se.st='OR'; From staged_employess se Insert overwrite Table employess Partition (country='US',syate='OR') Select * Where se.cnty='US' And se.st='OR' Insert overwrite Table employess Partition (country='US',syate='CA') Select * Where se.cnty='US' And se.st='CA' Insert overwrite Table employess Partition (country='US',syate='IL') Select * Where se.cnty='US' And se.st='IL' /*动态分区插入*/ Insert overwrite Table employess Partition (country ,state) Select ...,se.cnty,se.st From staged_employess se ; /*动态静态混合插入*/ Insert overwrite Table employees Partition (country='US',state) Select ... ,se.cnty ,se.st From staged_employees se Where se.cnty='US'; hive> Set hive.exec.dynamic.partition=True ; hive> Set hive.exec.dynamic.partition.mode = nostrict ; hive> Set hive.exec.dynamic.partition.pernode =1000 ; /*5.3 当查询语句中创建表并加载数据*/ Create Table ca_employees As Select Name ,salary ,address From employees Where se.state='CA'; /* 5.4 导出数据*/ hadoop fs -cp source_path targe_parth 或 Insert overwrite Local Directory '/emp/ca-employees' Select Name ,salary ,address From employees Where se.state ='CA'; hive > ! ls /tmp/ca_employees ; hive > ! cat /tmp/ca_employees/ ??? ; /* 第六章 HiveQL:查询*/ /*6.1 Select ...from */ Create Table employees ( Name String ,salary Float ,subordinates Array<String>, deductions Map<String ,Float>, address struct<street:String,city:String,state:String,zip:Int>) partitioned By (country String ,state String ); Hive> Select Name ,salary From emplyees; Hive> Select e.Name ,e.salary From emplyees e; Hive> Select upper(Name),salary ,deuctions["Federal Taxes"], round(salary * (1-deductions["Federal Taxes"])) From employess; /* 6.1.5 Limit 语句*/ Hive> Select upper(Name),salary ,deuctions["Federal Taxes"], round(salary * (1-deductions["Federal Taxes"])) From employess Limit 2; /*6.1.6 列别名*/ Hive> Select upper(Name),salary ,deuctions["Federal Taxes"], round(salary * (1-deductions["Federal Taxes"])) As salary_minus From employess Limit 2; /*6.1.7 嵌套select语句*/ from(Select upper(Name),salary ,deuctions["Federal Taxes"] As fed_taxes, round(salary * (1-deductions["Federal Taxes"])) As salary_minus_fed_taxes From employess) e Select e.name,e.salary_minus_fed_taxes Where e.salary_minus_fed_taxes>70000; /*6.1.8 CASE...WHEN...THEN 句式*/ Select Name ,salary ,Case When salary <50000 Then 'low' When salary>=7000 And salary <70000 Then 'niddle' When salary >-70000 And salasr <100000 Then 'high' Else 'Very high0' End As brack From employess; /*6.3 GROUPP BY 语句*/ select year(ymd) ,Avg(price_close) From stocks Where exchang = 'NASDAQ' And symbol ='AAPL' Group By Year(ymd); Having y语句; select year(ymd) ,Avg(price_close) From stocks Where exchang = 'NASDAQ' And symbol ='AAPL' Group By Year(ymd) Having Avg(price_close) > 50; /*6.4 Join 语句*/ /*6.4.1 Inner Join */ select a.ymd ,a.price, b.price_close from stocks a join stocks b on a.ymd = b.ymd where a.symbol = 'AAPL' and b.symbol = 'IBM'; select s.ymd ,s.symbol, s.price_close, d.divdend from stocks s join dividends d Join s.ymd = d.ymd and s.symbl = d.symbol where a.symbol = 'AAPL'; select s.ymd ,s.symbol, s.price_close, d.divdend from stocks s join dividends d on s.ymd = d.ymd and s.symbl = d.symbol where a.symbol = 'AAPL'; select s.ymd ,s.symbol, s.price_close, d.divdend from stocks a join dividends b on a.ymd = b.ymd Join stocks c On a.ymd= c.ymd where a.symbol = 'AAPL'; /* +streamtable(s) */ select /* +streamtable(s) */ s.ymd ,s.symbol, s.price_close, d.divdend from stocks s join dividends d on s.ymd = d.ymd and s.ymd = d.ymd where s.symbol = 'AAPL'; |
Hive脚本
最新推荐文章于 2024-05-05 20:10:38 发布