Popularity Percentage
Find the popularity percentage for each user on Facebook. The popularity percentage is defined as the total number of friends the user has divided by the total number of users on the platform, then converted into a percentage by multiplying by 100. Output each user along with their popularity percentage. Order records in ascending order by user id. The 'user1' and 'user2' column are pairs of friends.
Table: facebook_friends
--建表
CREATE TABLE `facebook_friends`(
`user1` int,
`user2` int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'=',',
'serialization.format'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://nameservice1/user/hive/warehouse/strata.db/facebook_friends'
;
--查看数据
hive> select * from `facebook_friends` limit 5;
OK
2 1
1 3
4 1
1 5
1 6
--
with all_users as (
select user1, user2
from facebook_friends
union
select user2 as user1, user1 as user2
from facebook_friends
),
friends as
(select user1, count(user2) as popularity_percent
from all_users
group by user1
order by user1)
select a.user1,a.popularity_percent/b.cnt*100
from friends a,(select count(distinct user1) cnt from friends)b
;
OK
1 55.55555555555556
2 33.33333333333333
3 33.33333333333333
4 11.11111111111111
5 11.11111111111111
6 22.22222222222222
7 11.11111111111111
8 11.11111111111111
9 11.11111111111111