Python进行数据的Group by、sort、limit及从分表取数据一例

1、在Mysql单表上操作
# mysql 单表
select * from 
(select uid,sum(gift_point) gift_point 
from gift_record_61 where gift_id in(28,29) group by uid)x 
order by 2 desc
limit 10

2、在Hive汇总表上计算
hive上的表,是将Mysql上的各分表数据进行合并;数据体量大。
#hive
select uid,gift_point from 
(select uid,sum(gift_point) gift_point 
from data_chushou_gift_record where gift_id in(28,29) group by uid)x 
order by gift_point desc
limit 10;

-- resualt:
uid,gift_point 
178236720 11013400
577484623 5305200
367625864 3402100
1126170673 2917400
282417747 2567800
1036752959 1876800
406271 1809900
1094468906 1648500
995497862 1623800
53104325 1592100

3、利用Python从Mysql各分表取数据再进行合并计算
/Users/nisj/PycharmProjects/EsDataProc/Mysql_submete_Data_Group_sort_Limit.py
# -*- coding=utf-8 -*-
import smtplib
import MySQLdb
import warnings
import datetime
import os
import os.path
import shutil
from itertools import groupby
from operator import itemgetter


warnings.filterwarnings("ignore")


db_config = {
    'host': 'MysqlIp',
    'user': 'MysqlUser',
    'passwd': 'MysqlPasswd',
    'port': 3306,
    'db': 'jellyfish_server'
}






def getDB():
    try:
        conn = MySQLdb.connect(host=db_config['host'], user=db_config['user'], passwd=db_config['passwd'],
                               port=db_config['port'])
        conn.autocommit(True)
        curr = conn.cursor()
        curr.execute("SET NAMES utf8");
        curr.execute("USE %s" % db_config['db']);


        return conn, curr
    except MySQLdb.Error, e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        return None, None




conn, curr = getDB()


today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
tomorrow = today + datetime.timedelta(days=1)




sql_text = "select table_name from information_schema.tables where table_schema='jellyfish_server' and table_name like 'gift_record%';"
curr.execute(sql_text)
tab_list = curr.fetchall()


col_datas_union = []
for tab_name in tab_list:
    tab_name = tab_name[0]


    sql_text = "select uid,gift_point from jellyfish_server.%s where gift_id in(28,29);" % (tab_name)
    curr.execute(sql_text)
    col_datas = curr.fetchall()
    # col_datas_union = list(set(col_datas_union).(set(col_datas))) #去重UNION


    col_datas_union.extend(col_datas)
    # col_datas_union.append(col_datas)  #append与extend的区别


group_col_datas = groupby(sorted(col_datas_union, key=itemgetter(0)), itemgetter(0))
# group_col_datas = groupby(sorted(col_datas, key=itemgetter(0)), itemgetter(0))




result_list = []
rl = []
for key, item in group_col_datas:
    i = 0
    j = 0
    for jtem in item:
        i += float(jtem[1])
        j += 1
    # print key, i, j
    result_list = (key, i, j)
    rl.append(result_list)
rl.sort(key=lambda x:x[1],reverse=True)
print rl[0:10]


curr.close()
conn.close()

数据结果:
[(178236720L, 11013400.0, 28486), (577484623L, 5305200.0, 7764), (367625864L, 3402100.0, 9820), (1126170673L, 2917400.0, 12218), (282417747L, 2567800.0, 4852), (1036752959L, 1876800.0, 18165), (406271L, 1809900.0, 4680), (1094468906L, 1648500.0, 3903), (995497862L, 1623800.0, 14015), (53104325L, 1592100.0, 3699)]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值