pandas——常用操作

常用操作

import pandas as pd
import numpy as np
example = pd.DataFrame({'Amount': [74., 235., 175., 100., 115., 245., 180., 90., 88., 129., 273., 300.],
              'Category': ['Transportation', 'Grocery', 'Household', 'Entertainment', 
                           'Transportation', 'Grocery', 'Household', 'Entertainment', 
                           'Transportation', 'Grocery', 'Household', 'Entertainment'],
              'Month': ['January', 'January', 'January', 'January', 
                        'February', 'February', 'February', 'February', 
                        'Marth', 'Marth', 'Marth', 'Marth']})
example
AmountCategoryMonth
074.0TransportationJanuary
1235.0GroceryJanuary
2175.0HouseholdJanuary
3100.0EntertainmentJanuary
4115.0TransportationFebruary
5245.0GroceryFebruary
6180.0HouseholdFebruary
790.0EntertainmentFebruary
888.0TransportationMarth
9129.0GroceryMarth
10273.0HouseholdMarth
11300.0EntertainmentMarth
1. 排序操作
example.sort_values(by=['Month', 'Amount'], ascending=[True, False])   # by 指定按照哪些属性排序 Month放在第一个 先把Month 按照升序排序,再对Amount降序排序
AmountCategoryMonth
5245.0GroceryFebruary
6180.0HouseholdFebruary
4115.0TransportationFebruary
790.0EntertainmentFebruary
1235.0GroceryJanuary
2175.0HouseholdJanuary
3100.0EntertainmentJanuary
074.0TransportationJanuary
11300.0EntertainmentMarth
10273.0HouseholdMarth
9129.0GroceryMarth
888.0TransportationMarth
data = pd.DataFrame({'k1':[1,2,3,5,5,4,6,2,2], 'k2':[12,8,11,4,4,52,30,18,18]})
data
k1k2
0112
128
2311
354
454
5452
6630
7218
8218
data.sort_values(by='k2')
k1k2
354
454
128
2311
0112
7218
8218
6630
5452
data.drop_duplicates()    # 去除完全一样的数据
k1k2
0112
128
2311
354
5452
6630
7218
data.drop_duplicates(subset=['k1'])    # 只要 k1 属性的值一样 就去除
k1k2
0112
128
2311
354
5452
6630
data2 = pd.DataFrame({'k1':[1,5,3,16,19], 'k2':['A1','A2', 'A1', 'B2', 'A2']})
data2
k1k2
01A1
15A2
23A1
316B2
419A2
如果希望将 A1和A2 都归为A类
def map(series):
    if series['k2'] == 'A1':
        return 'A'
    elif series['k2'] == 'A2':
        return 'A'
data2['k2_map'] = data2.apply(map, axis=1)
data2
k1k2k2_map
01A1A
15A2A
23A1A
316B2None
419A2A
df = pd.DataFrame({'t1':np.random.randn(5), 't2':np.random.randn(5)})
df
t1t2
00.909904-0.394096
10.9431650.480281
2-1.6162222.153146
3-0.403424-1.160060
40.8500741.280206
df2 = df.assign(ration = df['t1'] / df['t2'])    # 新插入一列属性 ration 值为 t1的值/t2的值
df2
t1t2ration
00.909904-0.394096-2.308840
10.9431650.4802811.963779
2-1.6162222.153146-0.750633
3-0.403424-1.1600600.347762
40.8500741.2802060.664013
df2.drop('ration', axis=1)     # 删除 ration 这一列
t1t2
00.909904-0.394096
10.9431650.480281
2-1.6162222.153146
3-0.403424-1.160060
40.8500741.280206
将数据按指定区间切分
age = pd.Series([18, 9, 6, 20, 26, 67, 52, 38, 40, 81, 36, 74, 88])
bins = [10, 40, 60, 90]     # 将数据切分为 10-40 40-60 60-90 的区间
res = pd.cut(age, bins)
res
0     (10.0, 40.0]
1              NaN
2              NaN
3     (10.0, 40.0]
4     (10.0, 40.0]
5     (60.0, 90.0]
6     (40.0, 60.0]
7     (10.0, 40.0]
8     (10.0, 40.0]
9     (60.0, 90.0]
10    (10.0, 40.0]
11    (60.0, 90.0]
12    (60.0, 90.0]
dtype: category
Categories (3, interval[int64, right]): [(10, 40] < (40, 60] < (60, 90]]
有的数据不在区间范围内就被设置为 NaN
pd.value_counts(res)     # 对每个区间进行计数
(10, 40]    6
(60, 90]    4
(40, 60]    1
dtype: int64
w = pd.cut(age, bins, labels=['youth', 'mid', 'old'])     # 给每个区间设置一个名称 数量应与区间数一致
pd.value_counts(w)
youth    6
old      4
mid      1
dtype: int64
res.isnull()      # isnull() 方法判断每一个值是否为NAN   DataFrame也是一样的方法
0     False
1      True
2      True
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
dtype: bool
qw = pd.Series([np.nan, 1, 2, 3])  
qw
0    NaN
1    1.0
2    2.0
3    3.0
dtype: float64
qw.fillna(5)   # 用一个指定值 填充所有 NaN
0    5.0
1    1.0
2    2.0
3    3.0
dtype: float64

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值