mysql -uroot -pok retail < /root/day01/retail_db.sql
sqoop import --connect jdbc:mysql://hadoop001:3306/retail \
--username root --password ok \
--table orders --incremental append --check-column order_id \
--target-dir /data1/retail_db/orders --m 3;
create external table orders_ext(
order_id int,
order_date string,
order_customer_id int,
order_status string)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile
location "/data1/retail_db/orders";
CREATE TABLE `orders` (
`order_id` int(11) NOT NULL DEFAULT '0',
`order_date` datetime NOT NULL,
`order_customer_id` int(11) NOT NULL,
`order_status` varchar(45) NOT NULL
)
sqoop export --connect jdbc:mysql://hadoop001:3306/sqoop
--username root --password ok --table orders --m 1
--export-dir /data1/retail_db/orders
--input-fields-terminated-by ",";
create table orders_partition(
order_id int,
order_date string,
order_customer_id int,
order_status string)
partitioned by (date string)
;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=50000;
insert into orders_partition partition(date)
select *,date(order_date) from orders_ext;
drop table ext_students;
create external
table ext_students(s_id int,s_name string,s_birth string,s_sex string)
row format delimited
fields terminated by ' ';
drop table ext_course;
create external
table ext_course(c_id int,c_name string,t_id int)
row format delimited
fields terminated by ' ';
drop table ext_teacher;
create external
table ext_teacher(t_id int,t_name string)
row format delimited
fields terminated by ' ';
drop table ext_score;
create external
table ext_score(s_id int,c_id int,s_score int)
row format delimited
fields terminated by ' ';
sqoop import --connect jdbc:mysql://hadoop001:3306/retail
--username root --password ok \
--table customers --incremental append --check-column customer_id \
--target-dir /data1/retail_db/customers --m 3
create external table customers_ext(
customer_id int ,
customer_fname string ,
customer_lname string ,
customer_email string ,
customer_password string ,
customer_street string ,
customer_city string ,
customer_state string ,
customer_zipcode string )
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile
location "/data1/retail_db/customers";
sqoop import --connect jdbc:mysql://hadoop001:3306/retail \
--username root --password ok \
--table departments --incremental append --check-column department_id \
--target-dir /data1/retail_db/departments --m 3
create external table departments_ext(
department_id int,
department_name string)
row format delimited
fields terminated by ','
lines terminated by '\n'
location "/data1/retail_db/departments";
sqoop import --connect jdbc:mysql://hadoop001:3306/retail \
--username root --password ok \
--table products --incremental append --check-column product_id \
--target-dir /data1/retail_db/products --m 3
create external table products_ext(
product_id int,
product_category_id int,
product_name string,
product_description string,
product_price float,
product_image string)
row format delimited
fields terminated by ','
lines terminated by '\n'
location "/data1/retail_db/products";
查询顾客表中地区为“NY”所在城市为'New York'的用户
select * from customers where customer_state='NY' and customer_city='New York';
查询订单表中共有多少不同顾客下过订单
select count(distinct order_customer_id) from orders_partition;
查询商品表中前5个商品
select * from products limit 5;
使用关联查询获取没有订单的所有顾客
select * from customers c left join orders_partition o
on c.customer_id=o.order_customer_id where o.order_customer_id is null;
将order_items.csv数据通过load方式加载到order_items表
create table order_items(
order_item_id string,
order_item_order_id string,
order_item_product_id string,
order_item_quantity string,
order_item_subtotal string,
order_item_product_price string)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties(
"separatorChar"=",",
"quoteChar"="\"",
"escapeChar"="\\")
location '/data2/retail/order_items';
load data local inpath '/root/retail/order_items.csv'
into table order_items;
将order_items表中数据加载到order_test2表
create table order_test2 as select * from order_items;
将order_items表中数据同时加载到o_01和o_02表
create table o_01 like order_items;
create table o_02 like order_items;
from order_items
insert into o_01 select *
insert into o_02 select *;
将order_items表中数据导出到本地以及hdfs
insert overwrite local directory '/root/data' select * from order_items;
insert overwrite directory '/root/data' select * from order_items;
统计order_items表中订单数量排行(取前10)
select order_item_order_id,count(1) total from order_items group by
order_item_order_id order by total desc limit 10;
统计order_items表中销量最多的前10个商品
select order_item_product_id,count(1) total from order_items group by
order_item_product_id order by total desc limit 10;
统计每个商品大类下的商品子类
select category_name,product_name from categories c join products p
on c.category_id=p.product_category_id;
根据商品子类id大小对每个商品大类下的子类进行排名使用ROW_NUMBER()函数
select category_name,product_name,ROW_NUMBER() over(partition by category_name)
from categories c join products p
on c.category_id=p.product_category_id;
统计order_items表中各订单中不同商品总数、订单总金额、订单最高/最低/平均金额
select order_item_order_id,
count(distinct order_item_product_id),
sum(order_item_product_price),
max(order_item_product_price),
min(order_item_product_price),
avg(order_item_product_price)
from order_items group by order_item_order_id
hive数据仓库项目sql语句
最新推荐文章于 2023-04-21 15:10:43 发布