1.创建customers ;customers ;departments ;order_items ;orders ;products的数据库表格方法
CREATE EXTERNAL TABLE IF NOT EXISTS customers (
customer_id int,
customer_fname varchar(45),
customer_lname varchar(45),
customer_email varchar(45),
customer_password varchar(45),
customer_street varchar(255),
customer_city varchar(45),
customer_state varchar(45),
customer_zipcode varchar(45)
)
ROW FORMAT serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")
LOCATION '/data/retail_db/customers';
CREATE EXTERNAL TABLE IF NOT EXISTS customers (
category_id int,
category_department_id int,
category_name varchar(45)
)
ROW FORMAT serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")
LOCATION '/data/retail_db/categories';
CREATE EXTERNAL TABLE IF NOT EXISTS departments (
department_id int,
department_name varchar(45)
)
ROW FORMAT serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")
LOCATION '/data/retail_db/departments';
CREATE EXTERNAL TABLE IF NOT EXISTS order_items (
order_item_id int,
order_item_order_id int,
order_item_product_id int,
order_item_quantity int,
order_item_subtotal float,
order_item_product_price float)
ROW FORMAT serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")
LOCATION '/data/retail_db/order_items';
CREATE EXTERNAL TABLE IF NOT EXISTS orders (
order_id int,
order_date date,
order_customer_id int,
order_status varchar(45)
)
ROW FORMAT serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")
LOCATION '/data/retail_db/orders';
CREATE EXTERNAL TABLE IF NOT EXISTS products (
product_id int,
product_category_id int,
product_name varchar(45),
product_description varchar(255),
product_price float,
product_image varchar(255))
ROW FORMAT serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")
LOCATION '/data/retail_db/products';
2.创建一个新的表 order_details
,同时使用 SELECT
语句来选择特定的列和行
create table order_details as
select order_items.order_item_order_id,
orders.order_date,
customers.customer_lname,
customers.customer_city,
products.product_name,
categories.category_name,
order_items.order_item_quantity,
order_items.order_item_product_price
from
orders join customers on orders.order_customer_id=customers.customer_id
join order_items on orders.order_id=order_items.order_item_order_id
join products on order_items.order_item_product_id=products.product_id
join categories on products.product_category_id=categories.category_id
limit 10;
3.零售商品业务数据查询
select a.id,sum(a.subtotal) total
from
(select orders.order_customer_id id,order_items.order_item_subtotal subtotal
from
orders join customers on orders.order_customer_id=customers.customer_id
join order_items on orders.order_id=order_items.order_item_order_id) a
group by a.id order by total desc;
4.统计每日订单量
select substring(orders.order_date,0,10) daystr,
count(order_id) over(partition by substring(orders.order_date,0,10)) as row_count
from orders order by row_count desc limit 10;
5.统计每日销售额排名
select b.*from(
select a.daystr days,sum(a.subtotal) total,
ROW_NUMBER() over(partition by a.daystr order by sum(a.subtotal) desc) as rn
from
(select substring(orders.order_date,0,10) daystr,
order_items.order_item_subtotal subtotal
from orders join order_items on orders.order_id=order_items.order_item_order_id) a
group by a.daystr) b
where b.rn <=10;
6.在商品表products中,使用字符函数对商品名称product_name进行截取并输出;
要求商品名称不高于x个字符,其中x取第10个字符向后最近空格的位置
关键步骤:substr()可以实现字符截取;locate()可以实现x值的获取
select
product_id,
substr(product_name, 1, locate(' ', product_name, 10) - 10) as truncated_product_name
from
products;
//另一种方法
select substr(a.product_name,0,a.loc)
from
(select product_name,locate(' ',product_name,10) as loc
from products) a limit 5;
//另一种方法
select substr(product_name,0,locate(" ",product_name,10)) as
product_name from products limit 5;