学习记录Task03:数据重构

本文记录了使用Python进行数据重构的过程,包括数据清洗、特征选择和数据转换等步骤,旨在提升数据质量,为后续分析和建模提供更有效的输入。
摘要由CSDN通过智能技术生成
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import Image
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
plt.rcParams['figure.figsize'] = (10, 6)  # 设置输出图片大小
train = pd.read_csv('train (2).csv')
train.shape
train.head()
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
data = pd.read_csv('clear_data (2).csv')
train = pd.read_csv('train (2).csv')
X = data
y = train['Survived']

from sklearn.model_selection import train_test_split
data = pd.read_csv('clear_data (2).csv')
train = pd.read_csv('train (2).csv')
X = data
y = train['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)
X_train.shape,X_test.shape
((668, 11), (223, 11))

lr = LogisticRegression()
lr.fit(X_train, y_train)
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-13-d1d3c051dba4> in <module>
      1 lr = LogisticRegression()
----> 2 lr.fit(X_train, y_train)


~\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py in fit(self, X, y, sample_weight)
   1405         else:
   1406             prefer = 'processes'
-> 1407         fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
   1408                                **_joblib_parallel_args(prefer=prefer))(
   1409             path_func(X, y, pos_class=class_, Cs=[C_],


~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
   1039             # remaining jobs.
   1040             self._iterating = False
-> 1041             if self.dispatch_one_batch(iterator):
   1042                 self._iterating = self._original_iterator is not None
   1043 


~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
    857                 return False
    858             else:
--> 859                 self._dispatch(tasks)
    860                 return True
    861 


~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
    775         with self._lock:
    776             job_idx = len(self._jobs)
--> 777             job = self._backend.apply_async(batch, callback=cb)
    778             # A job can complete so quickly than its callback is
    779             # called before we get here, causing self._jobs to


~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
    206     def apply_async(self, func, callback=None):
    207         """Schedule a func to be run"""
--> 208         result = ImmediateResult(func)
    209         if callback:
    210             callback(result)


~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
    570         # Don't delay the application, to avoid keeping the input
    571         # arguments in memory
--> 572         self.results = batch()
    573 
    574     def get(self):


~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
    260         # change the default number of processes to -1
    261         with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262             return [func(*args, **kwargs)
    263                     for func, args, kwargs in self.items]
    264 


~\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
    260         # change the default number of processes to -1
    261         with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262             return [func(*args, **kwargs)
    263                     for func, args, kwargs in self.items]
    264 


~\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py in _logistic_regression_path(X, y, pos_class, Cs, fit_intercept, max_iter, tol, verbose, solver, coef, class_weight, dual, penalty, intercept_scaling, multi_class, random_state, check_input, max_squared_sum, sample_weight, l1_ratio)
    760                 options={"iprint": iprint, "gtol": tol, "maxiter": max_iter}
    761             )
--> 762             n_iter_i = _check_optimize_result(
    763                 solver, opt_res, max_iter,
    764                 extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)


~\anaconda3\lib\site-packages\sklearn\utils\optimize.py in _check_optimize_result(solver, result, max_iter, extra_warning_msg)
    241                 "    https://scikit-learn.org/stable/modules/"
    242                 "preprocessing.html"
--> 243             ).format(solver, result.status, result.message.decode("latin1"))
    244             if extra_warning_msg is not None:
    245                 warning_msg += "\n" + extra_warning_msg


AttributeError: 'str' object has no attribute 'decode'
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
RandomForestClassifier()
from sklearn.model_selection import cross_val_score
lr = LogisticRegression(C=100)
scores = cross_val_score(lr, X_train, y_train, cv=10)
C:\Users\hp\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:548: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\hp\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\hp\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\hp\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\hp\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\hp\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\hp\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值