#!/usr/bin/env python
# coding: utf-8
import numpy as np
import pandas as pd
##导入数据
movie_box_df = pd.read_csv('./result.csv',header = None)
##查看前五列的信息
movie_box_df.head()
##重置列名
movie_box_df.columns = ['movie_name','movie_id','time','box_value']
##按电影名分组
movie_whole_box = movie_box_df.groupby('movie_name').sum()
##查看分组后的数据
movie_whole_box.head()
##去掉movieid
movie_whole_box.drop('movie_id',axis = 1,inplace = True)
movie_box_df.head()
movie2id = {}
movie =pd.DataFrame(movie_box_df.groupby('movie_name'))
movie.columns = ['movie_name','s']
movie_name_set = movie.movie_name.values
len(movie_name_set)
movie_id = movie_box_df[['movie_name','movie_id']]
movie_id.drop_duplicates()
len(set(movie_id.movie_name.values))
##3448
len(set(movie_id.movie_id.values))
##3490
##按movie_name删除重复
movie_id.drop_duplicates(['movie_name'],inplace=True)
##merge两个dataframe
all = pd.merge(movie_id,movie_whole_box,how= 'left',on=['movie_name','movie_name'])
all.to_csv('movie_all_box_office.csv',index=0)
##keys = movie_id['movie_name'].tolist() # 列A
##values = movie_id['movie_id'].tolist() # 列B
##movive2dict = dict(zip(keys, values))
with open('movie2index.txt','w') as f:
f.write(str(movie2dict))
data_dict=movie_id.groupby('movie_name').movie_id.apply(list).to_dict()
x = {}
for i,v in data_dict.items():
x[i] = v[0]
pandas groupby
最新推荐文章于 2023-10-09 15:16:55 发布