数据仓库与应用(创建数据库及相关应用学习)

1.创建customers ;customers ;departments ;order_items ;orders ;products的数据库表格方法

CREATE EXTERNAL TABLE IF NOT EXISTS customers (  
customer_id int,  
customer_fname varchar(45),  
customer_lname varchar(45),  
customer_email varchar(45),  
customer_password varchar(45),  
customer_street varchar(255),   
customer_city varchar(45),
customer_state varchar(45),  
customer_zipcode varchar(45) 
)
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/customers';


CREATE EXTERNAL TABLE IF NOT EXISTS customers (  
category_id int,  
category_department_id int,  
category_name varchar(45)

ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/categories';

CREATE EXTERNAL TABLE IF NOT EXISTS departments (  
department_id int,
department_name varchar(45)

ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/departments';

CREATE EXTERNAL TABLE IF NOT EXISTS order_items (  
order_item_id int,
order_item_order_id int,  
order_item_product_id int,  
order_item_quantity int,  
order_item_subtotal float,  
order_item_product_price float)
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/order_items';

CREATE EXTERNAL TABLE IF NOT EXISTS orders (  
order_id int,  
order_date date,  
order_customer_id int,  
order_status varchar(45)
)
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/orders';

CREATE EXTERNAL TABLE IF NOT EXISTS products (  
product_id int,  
product_category_id int,
product_name varchar(45),  
product_description varchar(255),  
product_price float,
product_image varchar(255)) 
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/products';

2.创建一个新的表 order_details,同时使用 SELECT 语句来选择特定的列和行

create table order_details as 
                 select order_items.order_item_order_id,
                 orders.order_date,
                 customers.customer_lname,
                 customers.customer_city,
                 products.product_name,
                 categories.category_name,
                 order_items.order_item_quantity,
                 order_items.order_item_product_price
                 from
                 orders join customers on orders.order_customer_id=customers.customer_id
                 join order_items on orders.order_id=order_items.order_item_order_id
                 join products on order_items.order_item_product_id=products.product_id
                 join categories on products.product_category_id=categories.category_id
                 limit 10;

3.零售商品业务数据查询

select a.id,sum(a.subtotal) total
                 from
                 (select orders.order_customer_id id,order_items.order_item_subtotal subtotal
                 from 
                 orders join customers on orders.order_customer_id=customers.customer_id
                 join order_items on orders.order_id=order_items.order_item_order_id) a
                 group by a.id order by total desc;

4.统计每日订单量

select substring(orders.order_date,0,10) daystr,
                 count(order_id) over(partition by substring(orders.order_date,0,10)) as row_count
                 from orders order by row_count desc limit 10;

5.统计每日销售额排名

select b.*from(
                 select a.daystr days,sum(a.subtotal) total,
                 ROW_NUMBER() over(partition by a.daystr order by sum(a.subtotal) desc) as rn
                 from
                 (select substring(orders.order_date,0,10) daystr,
                 order_items.order_item_subtotal subtotal
                 from orders join order_items on orders.order_id=order_items.order_item_order_id) a
                 group by a.daystr) b
                 where b.rn <=10;

6.在商品表products中,使用字符函数对商品名称product_name进行截取并输出;
要求商品名称不高于x个字符,其中x取第10个字符向后最近空格的位置
关键步骤:substr()可以实现字符截取;locate()可以实现x值的获取

select
    product_id,
    substr(product_name, 1, locate(' ', product_name, 10) - 10) as truncated_product_name
from
    products;
//另一种方法
select substr(a.product_name,0,a.loc)
from 
(select product_name,locate(' ',product_name,10) as loc
from products) a limit 5;

//另一种方法
select substr(product_name,0,locate(" ",product_name,10)) as 
product_name from products limit 5;

  • 13
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值