量化交易之python篇 - pandas库 - 调研

最新推荐文章于 2024-02-05 10:33:24 发布

_Erwin_

最新推荐文章于 2024-02-05 10:33:24 发布

阅读量281

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/Michael_234198652/article/details/111304219

版权

python 专栏收录该内容

71 篇文章 1 订阅

订阅专栏

import pandas
import numpy


"""
    0    丁一
    1    王二
    2    张三
    dtype: object
"""
foo1 = pandas.Series(["丁一", "王二", "张三"])
# print(foo1)


"""
       0  1
    0  1  2
    1  3  4
    2  5  6
"""
foo2 = pandas.DataFrame([[1, 2], [3, 4], [5, 6]])
# print(foo2)


"""
       date  score
    A     1      2
    B     3      4
    C     5      6
"""
foo3 = pandas.DataFrame([[1, 2], [3, 4], [5, 6]], columns=['date', 'score'], index=['A', 'B', 'C'])
# print(foo3)


"""
       date  score
    0     1      2
    1     3      4
    2     5      6
"""
foo4 = pandas.DataFrame()
date = [1, 3, 5]
score = [2, 4, 6]
foo4['date'] = date
foo4['score'] = score
# print(foo4)


"""
       date  score
    x     1      2
    y     3      4
    z     5      6
"""
foo5 = pandas.DataFrame({'date': [1, 3, 5], 'score': [2, 4, 6]}, index=['x', 'y', 'z'])
# print(foo5)


"""
       0  1  2
    a  1  3  5
    b  2  4  6
"""
foo6 = pandas.DataFrame.from_dict({'a': [1, 3, 5], 'b': [2, 4, 6]}, orient='index')  # 参数orient: 可以是index/columns, 表示'a'、'b'是列数据
# print(foo6)


"""
       A  B   C   D
    1  0  1   2   3
    2  4  5   6   7
    3  8  9  10  11
"""
array_3_4 = numpy.arange(12).reshape(3, 4)
foo7 = pandas.DataFrame(array_3_4, index=[1, 2, 3], columns=['A', 'B', 'C', 'D'])
# print(foo7)


"""
             date  score
    company             
    A           1      2
    B           3      4
    C           5      6
"""
foo8 = pandas.DataFrame([[1, 2], [3, 4], [5, 6]], columns=['date', 'score'], index=['A', 'B', 'C'])
foo8.index.name = "company"
# print(foo8)


"""
               日期  分数
    company          
    Tencent     1   2
    ByteDance   3   4
    Google      5   6
"""
foo9 = foo8.rename(index={'A': 'Tencent', 'B': 'ByteDance', 'C': 'Google'}, columns={'date': '日期', 'score': '分数'})
# print(foo9)


"""
               日期  分数
    company          
    Tencent     1   2
    ByteDance   3   4
    Google      5   6
"""
foo8.rename(index={'A': 'Tencent', 'B': 'ByteDance', 'C': 'Google'}, columns={'date': '日期', 'score': '分数'}, inplace=True)  # 设置 inplace=True, 直接修改数据, 不需要返回值
# print(foo8)


"""
         company  日期  分数
    0    Tencent   1   2
    1  ByteDance   3   4
    2     Google   5   6
"""
foo10 = foo8.reset_index()
# print(foo10)


"""
          company  分数
    日期               
    1     Tencent   2
    3   ByteDance   4
    5      Google   6
"""
foo11 = foo10.set_index('日期')
# print(foo11)

"""
             columns1  columns2  columns3
    index_1         1         2         3
    index_2         4         5         6
    index_3         7         8         9
"""
data4 = pandas.DataFrame(numpy.arange(1, 10).reshape(3, 3),
                         index=['index_1', 'index_2', 'index_3'],
                         columns=['columns1', 'columns2', 'columns3'])
# print(data4)


"""
    index_1    1
    index_2    4
    index_3    7
    Name: columns1, dtype: int32
"""
foo_data1 = data4['columns1']  # 取 columns1 这一列的数据;
# print(foo_data1)


"""
             columns1
    index_1         1
    index_2         4
    index_3         7
"""
foo_data2 = data4[['columns1']]
# print(foo_data2)


"""
             columns1  columns2
    index_1         1         2
    index_2         4         5
    index_3         7         8
"""
foo_data3 = data4[['columns1', 'columns2']]  # 取['columns1', 'columns2']这两列 的数据
# print(foo_data3)


"""
             columns1  columns2  columns3
    index_2         4         5         6
    index_3         7         8         9
"""
foo_data4 = data4[1:3]  # 取下标为[1:3]的行 的数据(不推荐使用)
# print(foo_data4)


"""
             columns1  columns2  columns3
    index_2         4         5         6
    index_3         7         8         9
"""
foo_data5 = data4.iloc[1:3]  # 取下标为[1:3]的行 的数据
# print(foo_data5)


"""
    columns1    7
    columns2    8
    columns3    9
    Name: index_3, dtype: int32
"""
foo_data6 = data4.iloc[-1]  # 取 最后一行的数据
# print(foo_data6)


"""
             columns1  columns2  columns3
    index_1         1         2         3
    index_2         4         5         6
"""
foo_data7 = data4.loc[['index_1', 'index_2']]  # 取 ['index_1', 'index_2'] 这2行的数据
# print(foo_data7)


"""
             columns1  columns2  columns3
    index_1         1         2         3
    index_2         4         5         6
"""
foo_data8 = data4.head(n=2)  # 从开始 取 2行数据
# print(foo_data8)





# 取某一区域的数据
"""
             columns1  columns3
    index_1         1         3
    index_2         4         6
"""
# foo_data9 = data4[['columns1', 'columns3']][0:2]
# foo_data9 = data4[0:2][['columns1', 'columns3']]
foo_data9 = data4.iloc[0:2][['columns1', 'columns3']]
# print(foo_data9)


"""
    3
"""
foo_data10 = data4.iloc[0]['columns3']  # 索引
# print(foo_data10)


"""
             columns1  columns3
    index_1         1         3
    index_2         4         6
"""
foo_data11 = data4.loc[['index_1', 'index_2'], ['columns1', 'columns3']]
# foo_data11 = data4.iloc[0:2, [0, 2]]
# print(foo_data11)


"""
             columns1  columns2  columns3
    index_2         4         5         6
    index_3         7         8         9
"""
foo_data12 = data4[data4['columns1'] > 1]  # 取行数据, 需满足条件: 'columns1'列的数据大于1
# print(foo_data12)


"""
             columns1  columns2  columns3
    index_2         4         5         6
"""
foo_data13 = data4[(data4['columns1'] > 1) & (data4['columns2'] == 5)]  # 取行数据, 需满足条件: 'columns1'列的数据大于1, 'columns2'列的数据为5
# print(foo_data13)



"""
             columns1  columns2  columns3
    index_3         7         8         9
    index_2         4         5         6
    index_1         1         2         3
"""
foo_data14 = data4.sort_values(by='columns2', ascending=False)  # 'columns2'列 倒序排列
# print(foo_data14)


"""
             columns1  columns2  columns3
    index_3         7         8         9
    index_2         4         5         6
    index_1         1         2         3
"""
foo_data15 = data4.sort_index(ascending=False)  # 所有行倒序
# print(foo_data15)


"""
             columns1  columns2  columns3  columns4
    index_1         1         2         3         2
    index_2         4         5         6         2
    index_3         7         8         9         2
"""
data4['columns4'] = data4['columns3'] - data4['columns1']  # 添加 新列'columns4'
# print(data4)


"""
             columns1  columns2  columns3  columns4
    index_3         7         8         9         2
"""
# foo_data16 = data4.drop(columns='columns4')  # 删除'columns4'列数据: 返回删减过后的数据, 原数据不变
# foo_data16 = data4.drop(columns=['columns1', 'columns4'])
foo_data16 = data4.drop(index=['index_1', 'index_2'])
# print(foo_data16)


"""
             columns1  columns2  columns3  columns4
    index_3         7         8         9         2
"""
data4.drop(index=['index_1', 'index_2'], inplace=True)  # inplace=True: 直接修改原数据
# print(data4)

"""
              公司  分数   股价
    0    Tencent  90   20
    1  ByteDance  95  180
"""
data_frame1 = pandas.DataFrame({'公司': ['Tencent', 'ByteDance', 'Google'], '分数': [90, 95, 85]})
data_frame2 = pandas.DataFrame({'公司': ['Tencent', 'ByteDance', 'Apple'], '股价': [20, 180, 30]})
data_frame3 = pandas.merge(left=data_frame1, right=data_frame2, on='公司')  # 合并共有数据
# print(data_frame3)


"""
              公司    分数     股价
    0    Tencent  90.0   20.0
    1  ByteDance  95.0  180.0
    2     Google  85.0    NaN
    3      Apple   NaN   30.0
"""
data_frame4 = pandas.merge(left=data_frame1, right=data_frame2, how='outer')  # 合并所有数据
# print(data_frame4)


"""
              公司  分数     股价
    0    Tencent  90   20.0
    1  ByteDance  95  180.0
    2     Google  85    NaN
"""
data_frame5 = pandas.merge(left=data_frame1, right=data_frame2, how='left')  # 合并后保留左边表的数据
# print(data_frame5)


"""
            公司_x  分数       公司_y   股价
    0    Tencent  90    Tencent   20
    1  ByteDance  95  ByteDance  180
    2     Google  85      Apple   30
"""
data_frame6 = pandas.merge(left=data_frame1, right=data_frame2, left_index=True, right_index=True)
# print(data_frame6)


"""
              公司  分数         公司   股价
    0    Tencent  90    Tencent   20
    1  ByteDance  95  ByteDance  180
    2     Google  85      Apple   30
"""
data_frame7 = pandas.concat([data_frame1, data_frame2], axis=1)  # 参数: axis=1, 横向拼接
# print(data_frame7)


"""
              公司    分数     股价
    0    Tencent  90.0    NaN
    1  ByteDance  95.0    NaN
    2     Google  85.0    NaN
    0    Tencent   NaN   20.0
    1  ByteDance   NaN  180.0
    2      Apple   NaN   30.0
"""
data_frame8 = pandas.concat([data_frame1, data_frame2], axis=0)  # 参数: axis=0, 纵向拼接
# print(data_frame8)


"""
              公司    分数     股价
    0    Tencent  90.0    NaN
    1  ByteDance  95.0    NaN
    2     Google  85.0    NaN
    3    Tencent   NaN   20.0
    4  ByteDance   NaN  180.0
    5      Apple   NaN   30.0
"""
data_frame9 = pandas.concat([data_frame1, data_frame2], ignore_index=True)
# print(data_frame9)


"""
              公司    分数     股价
    0    Tencent  90.0    NaN
    1  ByteDance  95.0    NaN
    2     Google  85.0    NaN
    3    Tencent   NaN   20.0
    4  ByteDance   NaN  180.0
    5      Apple   NaN   30.0
"""
data_frame10 = data_frame1.append(data_frame2, ignore_index=True)
# print(data_frame10)


"""
              公司  分数
    0    Tencent  90
    1  ByteDance  95
    2     Google  85
    3      Yahoo  95
"""
data_frame11 = data_frame1.append({'公司': 'Yahoo', '分数': 95}, ignore_index=True)
# print(data_frame11)

import pandas
import numpy
import xlwt


# pandas 读取 excel、csv文件
data1 = pandas.read_excel('foo_1.xls', sheet_name="test_sheet")
# data2 = pandas.read_csv('foo_1.csv')


# 将数据写入 excel中
data3 = pandas.DataFrame([[1, 2], [3, 4], [5, 6]], columns=['A列', 'B列'])
data3.to_excel('foo_1.xls', sheet_name="test_sheet", encoding="utf-8")


#  # 将 data3的A列数据 写入 foo_1.xls 中
# data3.to_excel('foo_1.xls', columns=['A列'], index=False, sheet_name="test_sheet2")

import pandas

stocks = pandas.Series(
    [
        54.74,
        190.9,
        173.14,
        1050.3,
        181.86,
        1139.49
    ],
    index=[
        '腾讯',
        '阿里巴巴',
        '苹果',
        '谷歌',
        'Facebook',
        '亚马逊'
    ]
)

print(stocks.describe())

print(stocks.iloc[0])
print(stocks.loc['Facebook'])


series_1 = pandas.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
series_2 = pandas.Series([10, 20, 30, 40], index=['a', 'b', 'e', 'f'])

series_3 = series_1 + series_2
print(series_3)
print(series_3.dropna())  # 删除 空值

series_3 = series_1.add(series_2, fill_value=0)  # 用 fill_value 来填充空值
print(series_3)

import pandas
from collections import OrderedDict


sales_dictionary = {
    '购药时间': ['2018-08-01 星期五', '2018-08-02 星期六', '2018-08-03 星期三'],
    '社保卡号': ['001616528', '001616528', '0012602828'],
    '商品编码': [2366701, 236701, 236701],
    '商品名称': ['最强 VC 银胶片', '清热解毒口服液', '感康'],
    '销售数量': [6, 1, 2],
    '应收金额': [82.8, 28, 16.8],
    '实收金额': [69.00, 24.64, 15.00]
}

sales_order_dictionary = OrderedDict(sales_dictionary)

"""
             购药时间        社保卡号     商品编码       商品名称  销售数量  应收金额   实收金额
0  2018-08-01 星期五   001616528  2366701  最强 VC 银胶片     6  82.8  69.00
1  2018-08-02 星期六   001616528   236701    清热解毒口服液     1  28.0  24.64
2  2018-08-03 星期三  0012602828   236701         感康     2  16.8  15.00
"""
sales_order_data_frame = pandas.DataFrame(sales_order_dictionary)
print(sales_order_data_frame)

print(sales_order_data_frame.iloc[0, 0])  # 2018-08-01 星期五
print(sales_order_data_frame.iloc[0, 3])  # 最强 VC 银胶片

"""
购药时间    2018-08-01 星期五
社保卡号         001616528
商品编码           2366701
商品名称         最强 VC 银胶片
销售数量                 6
应收金额              82.8
实收金额              69.0
Name: 0, dtype: object
"""
print(sales_order_data_frame.iloc[0, :])


"""
0    2018-08-01 星期五
1    2018-08-02 星期六
2    2018-08-03 星期三
Name: 购药时间, dtype: object
"""
print(sales_order_data_frame.iloc[:, 0])

print(sales_order_data_frame.loc[0, '商品名称'])  # 最强 VC 银胶片


"""
0    最强 VC 银胶片
1      清热解毒口服液
2           感康
Name: 商品名称, dtype: object
"""
print(sales_order_data_frame.loc[:, '商品名称'])


"""
        商品名称  应收金额
0  最强 VC 银胶片  82.8
1    清热解毒口服液  28.0
2         感康  16.8
"""
print(sales_order_data_frame[['商品名称', '应收金额']])


"""
        商品名称  销售数量  应收金额   实收金额
0  最强 VC 银胶片     6  82.8  69.00
1    清热解毒口服液     1  28.0  24.64
2         感康     2  16.8  15.00
"""
print(sales_order_data_frame.loc[:, '商品名称':'实收金额'])


"""
0     True
1    False
2     True
Name: 销售数量, dtype: bool
"""
query_series = sales_order_data_frame.loc[:, '销售数量'] > 1
print(query_series)


"""
             购药时间        社保卡号     商品编码       商品名称  销售数量  应收金额  实收金额
0  2018-08-01 星期五   001616528  2366701  最强 VC 银胶片     6  82.8  69.0
2  2018-08-03 星期三  0012602828   236701         感康     2  16.8  15.0
"""
print(sales_order_data_frame.loc[query_series, :])

_Erwin_

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
量化交易之python篇 - pandas库 - 调研

import pandasimport numpy""" 0 丁一 1 王二 2 张三 dtype: object"""foo1 = pandas.Series(["丁一", "王二", "张三"])# print(foo1)""" 0 1 0 1 2 1 3 4 2 5 6"""foo2 = pandas.DataFrame([[1, 2], [3, 4], [5, 6]])#.
复制链接

扫一扫

专栏目录