​量化交易之python篇 - 统计学习导论python版(第二章习题)

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston

from public_module.tqz_extern.tools.pandas_operator.pandas_operator import pandas

# 第二章第八题
def charpter_2_test8():
    college = pd.read_csv(r'data/College.csv')

    college = college.set_index(['Unnamed: 0'], append=True, verify_integrity=True)
    college.rename_axis([None, 'Name'], inplace=True)  # 将 值为None的列名 设置为Name

    # print(college.shape)  # 行列(777, 18)
    # print(college.describe())  # 矩阵简单描述

    college['Elite'] = np.where(college['Top10perc'] > 50, 'Yes', 'No')  # 新建一列Elite, 若Top10perc > 50 则设置为Yes 否则为No
    # print(college['Elite'].value_counts())  # 查看 Elite这一列 各个值 的 出现次数

    college['Enroll'] = pd.cut(college['Enroll'], bins=3, labels=['Low', 'Medium', 'High'])
    college['PhD'] = pd.cut(college['PhD'], bins=5, labels=['Very low', 'Low', 'Medium', 'High', 'Very High'])  # bins: 5 等分
    college['Terminal'] = pd.cut(college['Terminal'], bins=4, labels=['Very low', 'Low', 'High', 'Very High'])

    # Enroll | PhD | Terminal 列值的分布柱状图
    fig = plt.figure()

    plt.subplot(2, 2, 1)
    college['Enroll'].value_counts().plot.bar(title='Enroll')
    plt.subplot(2, 2, 2)
    college['PhD'].value_counts().plot.bar(title='PhD')
    plt.subplot(2, 2, 3)
    college['Terminal'].value_counts().plot.bar(title='Terminal')

    fig.subplots_adjust(hspace=1)

    plt.show()


def charpter_2_test9():
    auto = pd.read_csv(r'data/Auto.csv')

    # print(auto.nunique())  # 各列 不同值的个数;
    # print(auto.info())  # 简单看下各列的值是否为NAN

    auto['horsepower'] = auto['horsepower'].replace('?', np.nan)  # 将df中的 ? 用 np.nan 替代
    auto.dropna(inplace=True)

    info = auto.describe().T
    info['range'] = info['max'] - info['min']
    info = info[['mean', 'range', 'std']]


def charpter_2_test10():
    boston = pd.DataFrame(load_boston().data, columns=load_boston().feature_names)

    boston['target'] = load_boston().target
    # print(boston.shape[0])  # 行
    # print(boston.shape[0])  # 列

    corr_matrix = boston.corr()  # 相关性(默认方法为皮尔逊)
    foo = boston.sort_values(by=['CRIM', 'TAX', 'PTRATIO'], ascending=False)


if __name__ == '__main__':
    pass

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值