使用python进行数据分析和特征获取的常用函数

13 篇文章 0 订阅
3 篇文章 0 订阅
import pandas as pd
import numpy as np

def draw_missing_data_table(df): #得出缺失数据占总数的百分比
    total=df.isnull().sum().sort_values(ascending=False)
    percent=(df.isnull().sum()/df.count()).sort_values(ascending=False)
    missing_data=pd.concat([total,percent],axis=1,keys=['Total','Percent'])
    return missing_data
def plot_learning_curve(estimator,title,X,y,ylim=None,cv=None, n_jobs=1,train_sizes=np.linspace(.1,1.0,5)):
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores=learning_curve(estimator,X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
    train_scores_mean=np.mean(train_scores, axis=1)
    train_scores_std=np.std(train_scores,axis=1)
    test_scores_mean=np.mean(test_scores,axis=1)
    test_scores_std=np.std(test_scores,axis=1)
    plt.grid()
    plt.fill_between(train_sizes, train_scores_mean-train_scores_std, train_scores_mean+train_scores_std, alpha=0.1, color='r')
    plt.fill_between(train_sizes, test_scores_mean-test_scores_std,test_scores_mean+test_scores_std,alpha=0.1, color="g")
    plt.plot(train_sizes,train_scores_mean,'o-', color="r",label="Training Score")
    plt.plot(train_sizes,test_scores_mean, 'o-',color="g",label="Validation Score")
    plt.legend(loc="best")
    return plt
def plot_validation_curve(estimator, title, X, y,param_name, param_range, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.1,1.0,5)):
    train_scores, test_scores=validation_curve(estimator, X, y, param_name, param_range, cv)
    train_mean=np.mean(train_scores, axis=1)
    train_std=np.std(train_scores, axis=1)
    test_mean=np.mean(test_scores, axis=1)
    test_std=np.std(test_scores, axis=1)
    plt.plot(param_range, train_mean, color='r',marker='o', markersize=5, label='Training Score')
    plt.fill_between(param_range, train_mean+train_std, train_mean-train_std, alpha=0.15, color='r')
    plt.plot(param_range, test_mean, color='g', linestyle='--', marker='s',markersize=5, label='ValidationScore')
    plt.fill_between(param_range,test_mean+test_std, test_mean-test_std, alpha=0.15, color='g')
    plt.grid()
    plt.xscale('log')
    plt.legend(loc='best')
    plt.xlabel('Parameter')
    plt.ylabel('Score')
    plt.ylim(ylim)

来源

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值