wide_to_long同时stack多列

import pandas as pd
# 读取movie数据集,选取所有演员名和其Facebook likes
movie = pd.read_csv('data/movie.csv')
actor = movie[['movie_title', 'actor_1_name', 'actor_2_name', 'actor_3_name', 
               'actor_1_facebook_likes', 'actor_2_facebook_likes', 'actor_3_facebook_likes']]
actor.head()
movie_titleactor_1_nameactor_2_nameactor_3_nameactor_1_facebook_likesactor_2_facebook_likesactor_3_facebook_likes
0AvatarCCH PounderJoel David MooreWes Studi1000.0936.0855.0
1Pirates of the Caribbean: At World's EndJohnny DeppOrlando BloomJack Davenport40000.05000.01000.0
2SpectreChristoph WaltzRory KinnearStephanie Sigman11000.0393.0161.0
3The Dark Knight RisesTom HardyChristian BaleJoseph Gordon-Levitt27000.023000.023000.0
4Star Wars: Episode VII - The Force AwakensDoug WalkerRob WalkerNaN131.012.0NaN
# 创建一个自定义函数,用来改变列名。wide_to_long要求分组的变量要有相同的数字结尾:
# 使用wide_to_long函数时,要求 1,2,3 这样的顺序信息在列名的最后,并用分隔符隔开。
# 加载数据,调整名字:
def change_col_name(col_name):
    col_name = col_name.replace('_name', '')#把_name换成空白
    if 'facebook' in col_name:
        fb_idx = col_name.find('facebook')
        col_name = col_name[:5] + col_name[fb_idx -1:] + col_name[5:fb_idx-1]
        #print(fb_idx -1) #7,第二个数值从第7位开始取值最后。
    return col_name
actor2 = actor.rename(columns=change_col_name)
actor2.head()
movie_titleactor_1actor_2actor_3actor_facebook_likes_1actor_facebook_likes_2actor_facebook_likes_3
0AvatarCCH PounderJoel David MooreWes Studi1000.0936.0855.0
1Pirates of the Caribbean: At World's EndJohnny DeppOrlando BloomJack Davenport40000.05000.01000.0
2SpectreChristoph WaltzRory KinnearStephanie Sigman11000.0393.0161.0
3The Dark Knight RisesTom HardyChristian BaleJoseph Gordon-Levitt27000.023000.023000.0
4Star Wars: Episode VII - The Force AwakensDoug WalkerRob WalkerNaN131.012.0NaN
# 使用wide_to_long函数,同时stack两列actor和Facebook
stubs = ['actor', 'actor_facebook_likes']
actor2_tidy = pd.wide_to_long(actor2, stubnames=stubs, i=['movie_title'],j='actor_num',sep='_').reset_index()
actor2_tidy.head()                 
 movie_titleactor_numactoractor_facebook_likes
0Avatar1CCH Pounder1000.0
1Pirates of the Caribbean: At World's End1Johnny Depp40000.0
2Spectre1Christoph Waltz11000.0
3The Dark Knight Rises1Tom Hardy27000.0
4Star Wars: Episode VII - The Force Awakens1Doug Walker131.0
#wide_to_long()基本语法如下:
'''
pd.wide_to_long(df, stubnames(提取以指定字符串开头的列),
                   i(用作索引的列),
                   j(提取开头后剩余的部分会成一列,在此指定列名),
                   sep(分隔符),
                   suffix(捕获正则表达式匹配的后缀)
)
'''
# 加载数据
df = pd.read_csv('data/stackme.csv')
df
 StateCountrya1b2Testde
0TXUS0.450.3Test126
1MAUS0.031.2Test297
2ONCAN0.704.2Test342
# 对列重命名
df2 = df.rename(columns = {'a1':'group1_a1', 'b2':'group1_b2',
                                    'd':'group2_a1', 'e':'group2_b2'})
df2
 StateCountrygroup1_a1group1_b2Testgroup2_a1group2_b2
0TXUS0.450.3Test126
1MAUS0.031.2Test297
2ONCAN0.704.2Test342
# 设定stubnames=['group1', 'group2'],对任何数字都起作用
pd.wide_to_long(df2, stubnames=['group1', 'group2'], 
                         i=['State', 'Country', 'Test'], 
                         j='Label', 
                         suffix='.+', 
                         sep='_')
    group1group2
StateCountryTestLabel  
TXUSTest1a10.452
b20.306
MAUSTest2a10.039
b21.207
ONCANTest3a10.704
b24.202
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

缘 源 园

你的鼓励将是我创造的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值