pandas groupby


#!/usr/bin/env python
# coding: utf-8
import numpy as np
import pandas as pd
##导入数据
movie_box_df = pd.read_csv('./result.csv',header = None)
##查看前五列的信息
movie_box_df.head()

##重置列名
movie_box_df.columns = ['movie_name','movie_id','time','box_value']
##按电影名分组
movie_whole_box = movie_box_df.groupby('movie_name').sum()

##查看分组后的数据
movie_whole_box.head()

##去掉movieid
movie_whole_box.drop('movie_id',axis = 1,inplace = True)
movie_box_df.head()
movie2id = {}
movie =pd.DataFrame(movie_box_df.groupby('movie_name'))
movie.columns = ['movie_name','s']
movie_name_set = movie.movie_name.values
len(movie_name_set)
movie_id = movie_box_df[['movie_name','movie_id']]
movie_id.drop_duplicates()
len(set(movie_id.movie_name.values))
##3448
len(set(movie_id.movie_id.values))
##3490
##按movie_name删除重复
movie_id.drop_duplicates(['movie_name'],inplace=True)
##merge两个dataframe
all = pd.merge(movie_id,movie_whole_box,how= 'left',on=['movie_name','movie_name'])
all.to_csv('movie_all_box_office.csv',index=0)

##keys = movie_id['movie_name'].tolist()  # 列A 
##values = movie_id['movie_id'].tolist()  # 列B
##movive2dict = dict(zip(keys, values))

with open('movie2index.txt','w') as f:
    f.write(str(movie2dict))
data_dict=movie_id.groupby('movie_name').movie_id.apply(list).to_dict() 
x = {}
for i,v in data_dict.items():
    x[i] = v[0]

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值