一直在写pandas,都没记录下来,下面的语言是工作中用pandas生成了一个透视表,pandas比较灵活,具体代码如下:
#ecoding:utf-8
'''
Created on 2015年9月7日
@author: ZHOUMEIXU204
'''
import MySQLdb
import pandas as pd
import numpy as np
import datetime
con=MySQLdb.connect(host="10.10.109.62", port=1333, user="zhoumeixu204", \
passwd="zhoumeixu204@123456!", db="tracker", use_unicode=True, charset="utf8")
result=pd.read_sql("select * from( \
SELECT left(insert_time,8) as insert_date, option_label as position_name , count(*) as cnt FROM tracker.hbase_event \
where site = 'pc' and category = 'hp' and action = 'click' and insert_time between 20150831000000000 and 20150907000000000 \
group by 1,2)a union(SELECT left(insert_time,8) as insert_date, position_name ,count(*) as cnt FROM tracker.hbase_campaign \
where site = 'pc' and position = 'hp' \
and insert_time between 20150831000000000 and 20150907000000000 group by 1,2)",con)
result_pivot=pd.pivot_table(result, index=['position_name'],columns=["insert_date"],values=['cnt'], aggfunc=np.sum)
print(type(result_pivot))
result_pivot[u'总计']=result_pivot.sum(axis=1)
result_sum=pd.DataFrame(result_pivot.sum()).T
result_pivot_sum=result_pivot.append(result_sum)
result_pivot_sum=result_pivot_sum.rename(index={0:u'总计'})
time_day=(datetime.datetime.now()-datetime.timedelta(days=1)).strftime("%Y.%m.%d")
time_day_week=(datetime.datetime.now()-datetime.timedelta(days=8)).strftime("%Y.%m.%d")
result='result'+time_day+"--"+time_day_week
# result_pivot=result_pivot.append(result_pivot.sum(axis=0),ignore_index=True)
result_pivot_sum.to_excel("D:\\Users\\zhoumeixu204\\Desktop\\%s.xls"%(result),encoding="utf-8")
con.commit()
con.close()
print("sucess")
# result_pivot=pd.pivot_table(result, index=['position_name'],columns=["insert_date"],values=['cnt'], aggfunc=np.sum,margins=True) 有margins自动会all