4.机器学习-pandas基本数据操作

print(data)
"""
           open   high  close  ...  price_change  p_change  turnover
2018-02-27  23.53  25.88  24.16  ...          0.63      2.68      2.39
2018-02-26  22.80  23.78  23.53  ...          0.69      3.02      1.53
2018-02-23  22.88  23.37  22.82  ...          0.54      2.42      1.32
2018-02-22  22.25  22.76  22.28  ...          0.36      1.64      0.90
2018-02-14  21.49  21.99  21.92  ...          0.44      2.05      0.58
2018-02-13  21.40  21.90  21.48  ...          0.28      1.32      0.77
2018-02-12  20.70  21.40  21.19  ...          0.82      4.03      0.81
"""

索引——先列后行

print(data['open']['2018-02-27'])
# 23.53

# loc-先行后列
print(data.loc['2018-02-27':'2018-02-14', 'open':'close'])
"""
             open   high  close
2018-02-27  23.53  25.88  24.16
2018-02-26  22.80  23.78  23.53
2018-02-23  22.88  23.37  22.82
2018-02-22  22.25  22.76  22.28
2018-02-14  21.49  21.99  21.92
"""

# iloc-下标索引
print(data.iloc[0:5, 0:3])
"""
             open   high  close
2018-02-27  23.53  25.88  24.16
2018-02-26  22.80  23.78  23.53
2018-02-23  22.88  23.37  22.82
2018-02-22  22.25  22.76  22.28
2018-02-14  21.49  21.99  21.92
"""

# ix-组合索引,下标/索引      0.20被淘汰
print(data.ix[0:5, ["open", "close"]])
# 使用loc,iloc实习
print(data.loc[data.index[0:5], ["open", "close"]])
print(data.iloc[0:5, data.columns.get_indexer(["open", "close"])])
"""
             open  close
2018-02-27  23.53  24.16
2018-02-26  22.80  23.53
2018-02-23  22.88  22.82
2018-02-22  22.25  22.28
2018-02-14  21.49  21.92
"""

赋值,修改某列的值

data.close = 10
print(data.head(2))
"""
          open   high  close  ...  price_change  p_change  turnover
2018-02-27  23.53  25.88     10  ...          0.63      2.68      2.39
2018-02-26  22.80  23.78     10  ...          0.69      3.02      1.53
"""
data["close"] = 20
print(data.head(2))
"""
          open   high  close  ...  price_change  p_change  turnover
2018-02-27  23.53  25.88     20  ...          0.63      2.68      2.39
2018-02-26  22.80  23.78     20  ...          0.69      3.02      1.53
"""

排序

DataFrame 排序
"""
排序有两种形式,索引排序和内容排序
"""
# 内容排序
"""
by:指定排序参考的键
ascending:是否是升序
"""
value_sort = data.sort_values(by=["open", "high"], ascending=True)
print(value_sort.head())
index_sort = data.sort_index()
print(index_sort.head())

# Series 排序
# 值排序
print(data["high"].sort_values(ascending=True).head())
"""
2015-03-02    12.67
2015-03-04    12.92
2015-03-03    13.06
2015-09-07    13.38
2015-03-05    13.45
"""
# 索引排序
print(data["high"].sort_index(ascending=True).head())
"""
2015-03-02    12.67
2015-03-03    13.06
2015-03-04    12.92
2015-03-05    13.45
2015-03-06    14.48
Name: high, dtype: float64
"""

算术运算

# 加法
print(data["open"].add(10).head())
"""
2018-02-27    33.53
2018-02-26    32.80
2018-02-23    32.88
2018-02-22    32.25
2018-02-14    31.49
Name: open, dtype: float64

一般不直接加
data["open"]+10
"""
# 减法
print(data['open'].sub(10).head())
"""
2018-02-27    13.53
2018-02-26    12.80
2018-02-23    12.88
2018-02-22    12.25
2018-02-14    11.49
Name: open, dtype: float64
"""

逻辑运算

print(data["open"].head())
"""
2018-02-27    23.53
2018-02-26    22.80
2018-02-23    22.88
2018-02-22    22.25
2018-02-14    21.49
Name: open, dtype: float64
"""
print(data["open"].head() > 22)
"""
2018-02-27     True
2018-02-26     True
2018-02-23     True
2018-02-22     True
2018-02-14    False
Name: open, dtype: bool
"""
print((data["open"].head() > 22) & (data["open"].head() < 23))
print(data.head().query("open<23 & open>22")["open"])
"""
2018-02-26    22.80
2018-02-23    22.88
2018-02-22    22.25
Name: open, dtype: float64
"""
print(data[data["open"].isin([22.25, 22.88, 22.80])]["open"].head())
"""
2018-02-26    22.80
2018-02-23    22.88
2018-02-22    22.25
2018-02-06    22.80
2017-12-27    22.25
Name: open, dtype: float64
"""

统计运算

# 统计分析,计算平均值,标准差,最大值,最小值等
print(data.describe())

# 每一列最大值,0或空
print(data.max(0))
# 最小
print(data.min(0))
# 绝对值
print(data.abs())
# 中位数
print(data.median())
# 平均值
print(data.mean())
# 求和
print(data.sum())
# 众数
print(data.mode())
# 任何轴的乘积
print(data.prod(axis=1))
# 任何轴的标准差
print(data.std(axis=1))
# 方差
print(data.var(axis=1))
# 最大值索引
print(data.idxmax())
# 最小值索引
print(data.idxmin())


# 累计统计函数
index_sort = data.sort_index()
index_sort_change = index_sort["p_change"]

# 计算前1/2/3/.../n个数的和
sum = index_sort_change.cumsum()
print(sum)
"""
2015-03-02      2.62
2015-03-03      4.06
2015-03-04      5.63
2015-03-05      7.65
"""
# 计算前1/2/3/.../n个数的最大值
print(index_sort_change.cummax())
"""
2015-03-02     2.62
2015-03-03     2.62
2015-03-04     2.62
2015-03-05     2.62
2015-03-06     8.51
"""
# 计算前1/2/3/.../n个数的最小值
print(index_sort_change.cummin())

# 计算前1/2/3/.../n个数的积
print(index_sort_change.cumprod())

自定义运算

print(data[["open", "close"]].apply(lambda x: x.max() - x.min(), axis=0))
"""
open     22.74
close    22.85
dtype: float64
"""
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值