数据科学导论——数据预处理

第2关:数据清理-查漏补缺

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def student():
    train = pd.read_csv('Task1/diabetes_null.csv', na_values=['#NAME?'])
    train['Insulin'] = train['Insulin'].fillna(100)
    train['SkinThickness'] = train['SkinThickness'].fillna(train['SkinThickness'].median())
    train['BloodPressure'] = train['BloodPressure'].fillna(train['BloodPressure'].median())
    train['BMI'] = train['BMI'].fillna(train['BMI'].mean())
    train['Glucose'] = train['Glucose'].fillna(train['Glucose'].mean())
    #********* Begin *********#
    train.sort_values(by='Age', ascending=False)[:1]
    train = train.drop((train[train['Age'] >= 80]).index)
    plt.figure(figsize=(10, 10))
    plt.scatter(x=train['Age'], y=train['Pregnancies'])
    plt.savefig("Task1/img/T1.png")
    plt.show()



    #********* End *********#    

第3关:数据集成-海纳百川

import numpy as np
import pandas as pd

def student():
    #********* Begin *********#
    train = pd.read_csv('Task2/diabetes_null.csv', na_values=['#NAME?'])
    another_train = pd.read_csv('Task2/diabetes_zero.csv', na_values=['#NAME?'])
    merge_data=pd.concat([train,another_train])
    print(merge_data.shape)


    #********* End *********#    

第4关:数据变换-同源共流

import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize,MinMaxScaler

def student():
    train = pd.read_csv('Task3/diabetes_null.csv', na_values=['#NAME?'])
    train['Insulin'] = train['Insulin'].fillna(100)
    train['SkinThickness'] = train['SkinThickness'].fillna(train['SkinThickness'].median())
    train['BloodPressure'] = train['BloodPressure'].fillna(train['BloodPressure'].median())
    train['BMI'] = train['BMI'].fillna(train['BMI'].mean())
    train['Glucose'] = train['Glucose'].fillna(train['Glucose'].mean())
    #********* Begin *********#
    data_normalized=normalize(train,axis=0)
    print("z-score规范化:\n",data_normalized)
    data_scaler=MinMaxScaler()
    data_scaled=data_scaler.fit_transform(train)
    print("\n最小-最大规范化:\n",data_scaled)




    #********* End *********#    
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值