1、某游戏的客户端每隔5分钟会向服务端报告一次玩家的账户积分,如果两次报告的时间间隔不大于5分钟,认为该玩家在这5分钟内在线,假设报告数据的格式如下:
IP Datetime Score
223.152.112.238 2014-08-22 12:01:35 54232
IP Datetime Score
223.152.112.238 2014-08-22 12:01:35 54232
现有一天的数据,按时间按序保存,粗略估计玩家数在百万左右,请使用尽量少的硬件资源完成以下请求,统计在线时长最长的十个玩家;如果玩家两次提交的积分相同,认为玩家在5min的在线时间内不活跃,请统计一天内一直处于不活跃状态的玩家的百分比。
#!/bin/bash
exec 2>> /dev/null
DATA_FILE=/root/tkp/score.dat
TEMP_FILE=/root/tkp/temp.dat
TOTAL_LINES=`awk -F" " '/^[0-9]/{print $1}' /root/tkp/score.dat | sort -u | wc -l`
list=`awk -F" " '/^[0-9]/{print $1}' /root/tkp/score.dat | sort -u`
unlive_count=0
#Phase1 output the top 10 online users
echo TOP 10 online users
awk -F" " '/^[0-9]/{count[$1]++}END{for(ip in count){printf("%s %d minutes\n",ip,count[ip]*5);}}' /root/tkp/score.dat | sort -nrk 2 | head -10
echo "----------------------------------------------------------"
#Phase2 output the percent of users always offline
#this function can judge whether the user is always offline
function live()
{
ip=$1
start_score=`cat /root/tkp/score.dat | grep $ip | awk -F" " '{print $4}' | head -1`
start_count=`cat /root/tkp/score.dat | grep $ip | wc -l`
real_count=`cat /root/tkp/score.dat | grep $start_score | grep $ip | wc -l`
if [ $start_count -eq $real_count ];then
echo 1
else
echo 0
fi
}
for val in $list
do
IP=$val
#echo ip is $val
flag=$(live $IP)
if [ $flag -eq 1 ];then
unlive_count=$(($unlive_count+1))
fi
done
unlive_count=$(($unlive_count*100))
percent=$(($unlive_count/$TOTAL_LINES))
echo the percentage of unlive-user is $percent%