AI day08(2020 8/7)

最新推荐文章于 2024-06-12 13:56:17 发布

随风@

最新推荐文章于 2024-06-12 13:56:17 发布

阅读量418

点赞数

分类专栏： AI

本文链接：https://blog.csdn.net/weixin_44679856/article/details/107874795

版权

AI 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

集成算法

随机森林实现坦尼克获救预测

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

data = pd.read_csv('data/data2138/train.csv')
data.head()

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

titanic = data.copy()
titanic.head(4)

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S

titanic.describe()

	PassengerId	Survived	Pclass	Age	SibSp	Parch	Fare
count	891.000000	891.000000	891.000000	714.000000	891.000000	891.000000	891.000000
mean	446.000000	0.383838	2.308642	29.699118	0.523008	0.381594	32.204208
std	257.353842	0.486592	0.836071	14.526497	1.102743	0.806057	49.693429
min	1.000000	0.000000	1.000000	0.420000	0.000000	0.000000	0.000000
25%	223.500000	0.000000	2.000000	20.125000	0.000000	0.000000	7.910400
50%	446.000000	0.000000	3.000000	28.000000	0.000000	0.000000	14.454200
75%	668.500000	1.000000	3.000000	38.000000	1.000000	0.000000	31.000000
max	891.000000	1.000000	3.000000	80.000000	8.000000	6.000000	512.329200

# 缺失值处理之---取均值
titanic["Age"] = titanic["Age"].fillna(titanic["Age"].mean())
# print(titanic["Age"].mean(),titanic["Age"].median())
titanic.describe()

	PassengerId	Survived	Pclass	Age	SibSp	Parch	Fare
count	891.000000	891.000000	891.000000	891.000000	891.000000	891.000000	891.000000
mean	446.000000	0.383838	2.308642	29.699118	0.523008	0.381594	32.204208
std	257.353842	0.486592	0.836071	13.002015	1.102743	0.806057	49.693429
min	1.000000	0.000000	1.000000	0.420000	0.000000	0.000000	0.000000
25%	223.500000	0.000000	2.000000	22.000000	0.000000	0.000000	7.910400
50%	446.000000	0.000000	3.000000	29.699118	0.000000	0.000000	14.454200
75%	668.500000	1.000000	3.000000	35.000000	1.000000	0.000000	31.000000
max	891.000000	1.000000	3.000000	80.000000	8.000000	6.000000	512.329200

# ttt.loc[ttt["Sex"] == "male"] = 0
# ttt.loc[ttt["Sex"] == "female"] = 1
titanic.loc[titanic["Sex"] == "male", "Sex"] = 0
titanic.loc[titanic["Sex"] == "female", "Sex"] = 1
titanic.head()

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	0	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	1	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	1	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	1	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	0	35.0	0	373450	8.0500	NaN	S

print(titanic["Embarked"].unique())
titanic["Embarked"] = titanic["Embarked"].fillna('S')
titanic.loc[titanic["Embarked"] == "S", "Embarked"] = 0
titanic.loc[titanic["Embarked"] == "C", "Embarked"] = 1
titanic.loc[titanic["Embarked"] == "Q", "Embarked"] = 2
titanic.head()

['S' 'C' 'Q' nan]

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	0	22.0	1	A/5 21171	7.2500	NaN	0
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	1	38.0	1	PC 17599	71.2833	C85	1
2	3	1	3	Heikkinen, Miss. Laina	1	26.0	0	STON/O2. 3101282	7.9250	NaN	0
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	1	35.0	1	113803	53.1000	C123	0
4	5	0	3	Allen, Mr. William Henry	0	35.0	0	373450	8.0500	NaN	0

# Import the linear regression class
from sklearn.linear_model import LinearRegression
# Sklearn also has a helper that makes it easy to do cross validation
from sklearn.model_selection import KFold

# The columns we'll use to predict the target
# 分类器的特征
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]

# Initialize our algorithm class
alg = LinearRegression()
# Generate cross validation folds for the titanic dataset.  It return the row indices corresponding to train and test.
# We set random_state to ensure we get the same splits every time we run this.
kf = KFold(n_splits=3, random_state=1)
print(kf)
predictions = []
# 此时相当于用predictions来进行折叠交叉划分
for train, test in kf.split(titanic):
    # The predictors we're using the train the algorithm.  Note how we only take the rows in the train folds.
    train_predictors = (titanic[predictors].iloc[train,:])
    # The target we're using to train the algorithm.
    train_target = titanic["Survived"].iloc[train]
    # Training the algorithm using the predictors and target.
    alg.fit(train_predictors, train_target)
    # We can now make predictions on the test fold
    test_predictions = alg.predict(titanic[predictors].iloc[test,:])
    predictions.append(test_predictions)

KFold(n_splits=3, random_state=1, shuffle=False)

import numpy as np

# The predictions are in three separate numpy arrays.  Concatenate them into one.  
# We concatenate them on axis 0, as they only have one axis.
predictions = np.concatenate(predictions, axis=0)

# Map predictions to outcomes (only possible outcomes are 1 and 0)
predictions[predictions > .5] = 1
predictions[predictions <=.5] = 0
accuracy = sum(predictions[predictions == titanic["Survived"]]) / len(predictions)
print(accuracy)

0.26038159371492703

from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
# Initialize our algorithm
alg = LogisticRegression(random_state=1)
# Compute the accuracy score for all the cross validation folds.  (much simpler than what we did before!)
scores = model_selection.cross_val_score(alg, titanic[predictors], titanic["Survived"], cv=3)
# Take the mean of the scores (because we have one for each fold)
print(scores.mean())

0.7901234567901234


/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)

titanic_test = pd.read_csv("data/data2138/test.csv")
titanic_test["Age"] = titanic_test["Age"].fillna(titanic["Age"].median())
titanic_test["Fare"] = titanic_test["Fare"].fillna(titanic_test["Fare"].median())
titanic_test.loc[titanic_test["Sex"] == "male", "Sex"] = 0 
titanic_test.loc[titanic_test["Sex"] == "female", "Sex"] = 1
titanic_test["Embarked"] = titanic_test["Embarked"].fillna("S")

titanic_test.loc[titanic_test["Embarked"] == "S", "Embarked"] = 0
titanic_test.loc[titanic_test["Embarked"] == "C", "Embarked"] = 1
titanic_test.loc[titanic_test["Embarked"] == "Q", "Embarked"] = 2

from sklearn import model_selection
from sklearn.ensemble import RandomForestClassifier

predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]

# Initialize our algorithm with the default paramters
# n_estimators is the number of trees we want to make
# min_samples_split is the minimum number of rows we need to make a split
# min_samples_leaf is the minimum number of samples we can have at the place where a tree branch ends (the bottom points of the tree)
alg = RandomForestClassifier(random_state=1, n_estimators=10, min_samples_split=2, min_samples_leaf=1)
# Compute the accuracy score for all the cross validation folds.  (much simpler than what we did before!)
kf = model_selection.KFold(n_splits=3, random_state=1)
scores = model_selection.cross_val_score(alg, titanic[predictors], titanic["Survived"], cv=kf)

# Take the mean of the scores (because we have one for each fold)
print(scores.mean())

0.7946127946127947


/opt/conda/envs/python35-paddle120-env/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192
  return f(*args, **kwds)

alg = RandomForestClassifier(random_state=1, n_estimators=100, min_samples_split=4, min_samples_leaf=2)
# Compute the accuracy score for all the cross validation folds.  (much simpler than what we did before!)
kf = model_selection.KFold(n_splits=3, random_state=1)
scores = model_selection.cross_val_score(alg, titanic[predictors], titanic["Survived"], cv=kf)

# Take the mean of the scores (because we have one for each fold)
print(scores.mean())

0.8305274971941637

# Generating a familysize column
titanic["FamilySize"] = titanic["SibSp"] + titanic["Parch"]

# The .apply method generates a new series
titanic["NameLength"] = titanic["Name"].apply(lambda x: len(x))

import re

# A function to get the title from a name.
def get_title(name):
    # Use a regular expression to search for a title.  Titles always consist of capital and lowercase letters, and end with a period.
    title_search = re.search(' ([A-Za-z]+)\.', name)
    # If the title exists, extract and return it.
    if title_search:
        return title_search.group(1)
    return ""

# Get all the titles and print how often each one occurs.
titles = titanic["Name"].apply(get_title)
print(pd.value_counts(titles))

# Map each title to an integer.  Some titles are very rare, and are compressed into the same codes as other titles.
title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Dr": 5, "Rev": 6, "Major": 7, "Col": 7, "Mlle": 8, "Mme": 8, "Don": 9, "Lady": 10, "Countess": 10, "Jonkheer": 10, "Sir": 9, "Capt": 7, "Ms": 2}
for k,v in title_mapping.items():
    titles[titles == k] = v

# Verify that we converted everything.
print(pd.value_counts(titles))

# Add in the title column.
titanic["Title"] = titles

Mr          517
Miss        182
Mrs         125
Master       40
Dr            7
Rev           6
Col           2
Major         2
Mlle          2
Ms            1
Sir           1
Don           1
Capt          1
Countess      1
Mme           1
Lady          1
Jonkheer      1
Name: Name, dtype: int64
1     517
2     183
3     125
4      40
5       7
6       6
7       5
10      3
8       3
9       2
Name: Name, dtype: int64

import numpy as np
from sklearn.feature_selection import SelectKBest, f_classif
import matplotlib.pyplot as plt
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked", "FamilySize", "Title", "NameLength"]

# Perform feature selection
selector = SelectKBest(f_classif, k=5)
selector.fit(titanic[predictors], titanic["Survived"])

# Get the raw p-values for each feature, and transform from p-values into scores
scores = -np.log10(selector.pvalues_)

# Plot the scores.  See how "Pclass", "Sex", "Title", and "Fare" are the best?
plt.bar(range(len(predictors)), scores)
plt.xticks(range(len(predictors)), predictors, rotation='vertical')
plt.show()

# Pick only the four best features.
predictors = ["Pclass", "Sex", "Fare", "Title"]

alg = RandomForestClassifier(random_state=1, n_estimators=50, min_samples_split=8, min_samples_leaf=4)

在这里插入图片描述

from sklearn.ensemble import GradientBoostingClassifier
import numpy as np

# The algorithms we want to ensemble.
# We're using the more linear predictors for the logistic regression, and everything with the gradient boosting classifier.
algorithms = [
    [GradientBoostingClassifier(random_state=1, n_estimators=25, max_depth=3), ["Pclass", "Sex", "Age", "Fare", "Embarked", "FamilySize", "Title",]],
    [LogisticRegression(random_state=1), ["Pclass", "Sex", "Fare", "FamilySize", "Title", "Age", "Embarked"]]
]

# Initialize the cross validation folds
kf = KFold(n_splits=3, random_state=1)

predictions = []
for train, test in kf.split(titanic):
    train_target = titanic["Survived"].iloc[train]
    full_test_predictions = []
    # Make predictions for each algorithm on each fold
    for alg, predictors in algorithms:
        # Fit the algorithm on the training data.
        alg.fit(titanic[predictors].iloc[train,:], train_target)
        # Select and predict on the test fold.  
        # The .astype(float) is necessary to convert the dataframe to all floats and avoid an sklearn error.
        test_predictions = alg.predict_proba(titanic[predictors].iloc[test,:].astype(float))[:,1]
        full_test_predictions.append(test_predictions)
    # Use a simple ensembling scheme -- just average the predictions to get the final classification.
    test_predictions = (full_test_predictions[0] + full_test_predictions[1]) / 2
    # Any value over .5 is assumed to be a 1 prediction, and below .5 is a 0 prediction.
    test_predictions[test_predictions <= .5] = 0
    test_predictions[test_predictions > .5] = 1
    predictions.append(test_predictions)

# Put all the predictions together into one array.
predictions = np.concatenate(predictions, axis=0)

# Compute accuracy by comparing to the training data.
accuracy = sum(predictions[predictions == titanic["Survived"]]) / len(predictions)
print(accuracy)

0.28058361391694725


/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)

titles = titanic_test["Name"].apply(get_title)
# We're adding the Dona title to the mapping, because it's in the test set, but not the training set
title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Dr": 5, "Rev": 6, "Major": 7, "Col": 7, "Mlle": 8, "Mme": 8, "Don": 9, "Lady": 10, "Countess": 10, "Jonkheer": 10, "Sir": 9, "Capt": 7, "Ms": 2, "Dona": 10}
for k,v in title_mapping.items():
    titles[titles == k] = v
titanic_test["Title"] = titles
# Check the counts of each unique title.
print(pd.value_counts(titanic_test["Title"]))

# Now, we add the family size column.
titanic_test["FamilySize"] = titanic_test["SibSp"] + titanic_test["Parch"]

1     240
2      79
3      72
4      21
7       2
6       2
10      1
5       1
Name: Title, dtype: int64

predictors = ["Pclass", "Sex", "Age", "Fare", "Embarked", "FamilySize", "Title"]

algorithms = [
    [GradientBoostingClassifier(random_state=1, n_estimators=25, max_depth=3), predictors],
    [LogisticRegression(random_state=1), ["Pclass", "Sex", "Fare", "FamilySize", "Title", "Age", "Embarked"]]
]

full_predictions = []
for alg, predictors in algorithms:
    # Fit the algorithm using the full training data.
    alg.fit(titanic[predictors], titanic["Survived"])
    # Predict using the test dataset.  We have to convert all the columns to floats to avoid an error.
    predictions = alg.predict_proba(titanic_test[predictors].astype(float))[:,1]
    full_predictions.append(predictions)

# The gradient boosting classifier generates better predictions, so we weight it higher.
predictions = (full_predictions[0] * 3 + full_predictions[1]) / 4
predictions

full_predictions.append(predictions)

# The gradient boosting classifier generates better predictions, so we weight it higher.
predictions = (full_predictions[0] * 3 + full_predictions[1]) / 4
predictions

/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)





array([0.11807259, 0.47822375, 0.12462684, 0.13123953, 0.52147166,
       0.14405811, 0.65234387, 0.1802725 , 0.68012401, 0.12125989,
       0.11629344, 0.20925094, 0.9107872 , 0.10880785, 0.89138813,
       0.8782404 , 0.16296756, 0.14291456, 0.54160221, 0.56760964,
       0.22156587, 0.54251391, 0.90495957, 0.39151768, 0.88798204,
       0.10353232, 0.90987758, 0.14125142, 0.31079626, 0.11851882,
       0.11660639, 0.18302896, 0.55221428, 0.49272633, 0.42105082,
       0.14583487, 0.5066577 , 0.52528274, 0.13299751, 0.28234297,
       0.10968642, 0.46320596, 0.1022238 , 0.83450859, 0.89959252,
       0.15012741, 0.31305   , 0.12634294, 0.89502176, 0.54196445,
       0.35731292, 0.17899274, 0.83117293, 0.87973644, 0.16617068,
       0.1388858 , 0.10909063, 0.12604733, 0.12007949, 0.91673075,
       0.13376846, 0.15382149, 0.13269273, 0.66863607, 0.66476617,
       0.87104277, 0.67522861, 0.28842503, 0.34995894, 0.85458555,
       0.66517012, 0.12970652, 0.554527  , 0.37002563, 0.91502691,
       0.40890492, 0.12908007, 0.83644549, 0.15659765, 0.66517012,
       0.68317077, 0.20542316, 0.20398454, 0.11629344, 0.17592991,
       0.12304517, 0.65976226, 0.53109837, 0.65295576, 0.79949035,
       0.53813483, 0.1162825 , 0.89146888, 0.12908007, 0.28653456,
       0.12606249, 0.86720702, 0.14689044, 0.58677172, 0.1227783 ,
       0.90808749, 0.14766236, 0.12634294, 0.12521557, 0.62485053,
       0.13179877, 0.15049556, 0.12634294, 0.1291402 , 0.17553266,
       0.14147767, 0.65296261, 0.89966106, 0.67432128, 0.88308402,
       0.14119978, 0.12024012, 0.69700903, 0.3639144 , 0.86267533,
       0.87698992, 0.11500886, 0.90683083, 0.12354849, 0.12634294,
       0.57047222, 0.12874782, 0.63491247, 0.13356351, 0.13371653,
       0.12742002, 0.52623123, 0.23540454, 0.11013043, 0.10120996,
       0.12693838, 0.13712945, 0.16270586, 0.52102805, 0.1222544 ,
       0.20724733, 0.90536356, 0.19439057, 0.16203707, 0.4295811 ,
       0.10489214, 0.33332074, 0.13553382, 0.46320596, 0.34436138,
       0.91494613, 0.12039784, 0.10686614, 0.48990707, 0.11268417,
       0.12690582, 0.9107304 , 0.58065103, 0.4295811 , 0.51305878,
       0.65295183, 0.58100617, 0.82162033, 0.11621241, 0.28934108,
       0.58617353, 0.29842896, 0.14612484, 0.90176977, 0.52331926,
       0.11626213, 0.13664523, 0.12439163, 0.12032565, 0.13189309,
       0.87341765, 0.87677589, 0.29330671, 0.83384348, 0.85994426,
       0.15659765, 0.33032795, 0.90233144, 0.12634294, 0.91768113,
       0.1362275 , 0.8553238 , 0.12261711, 0.14043024, 0.13580644,
       0.13508652, 0.25249698, 0.49980291, 0.12580212, 0.72032716,
       0.11039674, 0.85634736, 0.59070563, 0.16711372, 0.53690057,
       0.64674118, 0.66386485, 0.61985206, 0.87388036, 0.16382217,
       0.24550823, 0.64232817, 0.16545199, 0.89452538, 0.1260729 ,
       0.12867718, 0.11621693, 0.24753472, 0.80159671, 0.41223325,
       0.29796924, 0.65298609, 0.21820318, 0.90408498, 0.12908007,
       0.82044762, 0.13648168, 0.84309761, 0.12969486, 0.88256801,
       0.59477442, 0.12782707, 0.65295576, 0.11493503, 0.14443998,
       0.25164631, 0.89291147, 0.11869905, 0.12635953, 0.33919596,
       0.13067568, 0.19107838, 0.14406003, 0.81001866, 0.90188222,
       0.88065969, 0.82583107, 0.32685654, 0.11629269, 0.3309346 ,
       0.28726563, 0.87935118, 0.16056495, 0.86267533, 0.58832597,
       0.74641911, 0.15473563, 0.39871787, 0.13385349, 0.12719544,
       0.11626213, 0.12634294, 0.13038185, 0.83056411, 0.12969243,
       0.10897972, 0.12970167, 0.85025491, 0.65018993, 0.16509308,
       0.11629344, 0.21089966, 0.11626213, 0.5066577 , 0.14062548,
       0.34186233, 0.12634294, 0.91621424, 0.63108254, 0.12032519,
       0.85762513, 0.15912541, 0.12508226, 0.14296088, 0.16880716,
       0.52118231, 0.66287975, 0.65295576, 0.64391749, 0.71285183,
       0.10841758, 0.116235  , 0.36337866, 0.12032565, 0.12908007,
       0.3299976 , 0.65218355, 0.12032565, 0.50498382, 0.12095141,
       0.12522778, 0.78071971, 0.11851882, 0.33092189, 0.12283242,
       0.11462412, 0.16606205, 0.12173917, 0.13378225, 0.65295576,
       0.82086002, 0.33497438, 0.67770069, 0.20896045, 0.42565222,
       0.13956556, 0.14179853, 0.11626439, 0.63057995, 0.90503027,
       0.67676128, 0.23639211, 0.17407497, 0.12440289, 0.18555394,
       0.12521557, 0.13862343, 0.16270586, 0.45603926, 0.906007  ,
       0.127745  , 0.86613458, 0.34288366, 0.14500518, 0.17106167,
       0.82123266, 0.32556508, 0.12032519, 0.64542685, 0.12440698,
       0.25183622, 0.15373655, 0.09420612, 0.21009234, 0.34350924,
       0.17589221, 0.11460759, 0.14678999, 0.91927193, 0.33466538,
       0.61919255, 0.16270586, 0.63523695, 0.16605342, 0.85201539,
       0.90009692, 0.16382217, 0.24548019, 0.16056652, 0.70117197,
       0.15656787, 0.85634687, 0.11629194, 0.12634294, 0.57289007,
       0.1040704 , 0.87782394, 0.86946706, 0.13123953, 0.9197846 ,
       0.15482638, 0.12304553, 0.53386746, 0.89969628, 0.20142488,
       0.15371633, 0.91292636, 0.1659313 , 0.13143617, 0.87627464,
       0.91356273, 0.48910599, 0.17074356, 0.19865529, 0.13509944,
       0.12634294, 0.14429279, 0.5384507 , 0.5955289 , 0.15701686,
       0.83302687, 0.1269299 , 0.12017161, 0.15048423, 0.18803983,
       0.38651985, 0.87742   , 0.56484787, 0.12816624, 0.10318537,
       0.91182863, 0.14659202, 0.89175071, 0.12874546, 0.12498845,
       0.90760303, 0.12646634, 0.90970496, 0.36091019, 0.30139909,
       0.19177446, 0.15061537, 0.26514803, 0.6529489 , 0.64672084,
       0.65295576, 0.91126128, 0.56998187, 0.12908007, 0.86620612,
       0.10354847, 0.12908007, 0.41604653])

随风@

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
AI day08(2020 8/7)

集成算法随机森林实现坦尼克获救预测import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport sklearndata = pd.read_csv('data/data2138/train.csv')data.head() PassengerId Survived Pclass Name Sex
复制链接

扫一扫