import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib. pyplot as plt
from IPython. display import Image
from sklearn. linear_model import LogisticRegression
from sklearn. ensemble import RandomForestClassifier
% matplotlib inline
plt. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
plt. rcParams[ 'axes.unicode_minus' ] = False
plt. rcParams[ 'figure.figsize' ] = ( 10 , 6 )
train = pd. read_csv( 'train (2).csv' )
train. shape
train. head( )
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
0
1
0
3
Braund, Mr. Owen Harris
male
22.0
1
0
A/5 21171
7.2500
NaN
S
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38.0
1
0
PC 17599
71.2833
C85
C
2
3
1
3
Heikkinen, Miss. Laina
female
26.0
0
0
STON/O2. 3101282
7.9250
NaN
S
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35.0
1
0
113803
53.1000
C123
S
4
5
0
3
Allen, Mr. William Henry
male
35.0
0
0
373450
8.0500
NaN
S
data = pd. read_csv( 'clear_data (2).csv' )
train = pd. read_csv( 'train (2).csv' )
X = data
y = train[ 'Survived' ]
from sklearn. model_selection import train_test_split
data = pd. read_csv( 'clear_data (2).csv' )
train = pd. read_csv( 'train (2).csv' )
X = data
y = train[ 'Survived' ]
X_train, X_test, y_train, y_test = train_test_split( X, y, stratify= y, random_state= 0 )
X_train. shape, X_test. shape
((668, 11), (223, 11))
lr = LogisticRegression( )
lr. fit( X_train, y_train)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-13-d1d3c051dba4> in <module>
1 lr = LogisticRegression()
----> 2 lr.fit(X_train, y_train)
~\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py in fit(self, X, y, sample_weight)
1405 else:
1406 prefer = 'processes'
-> 1407 fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
1408 **_joblib_parallel_args(prefer=prefer))(
1409 path_func(X, y, pos_class=class_, Cs=[C_],
~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1039 # remaining jobs.
1040 self._iterating = False
-> 1041 if self.dispatch_one_batch(iterator):
1042 self._iterating = self._original_iterator is not None
1043
~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
857 return False
858 else:
--> 859 self._dispatch(tasks)
860 return True
861
~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
775 with self._lock:
776 job_idx = len(self._jobs)
--> 777 job = self._backend.apply_async(batch, callback=cb)
778 # A job can complete so quickly than its callback is
779 # called before we get here, causing self._jobs to
~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py in _logistic_regression_path(X, y, pos_class, Cs, fit_intercept, max_iter, tol, verbose, solver, coef, class_weight, dual, penalty, intercept_scaling, multi_class, random_state, check_input, max_squared_sum, sample_weight, l1_ratio)
760 options={"iprint": iprint, "gtol": tol, "maxiter": max_iter}
761 )
--> 762 n_iter_i = _check_optimize_result(
763 solver, opt_res, max_iter,
764 extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
~\anaconda3\lib\site-packages\sklearn\utils\optimize.py in _check_optimize_result(solver, result, max_iter, extra_warning_msg)
241 " https://scikit-learn.org/stable/modules/"
242 "preprocessing.html"
--> 243 ).format(solver, result.status, result.message.decode("latin1"))
244 if extra_warning_msg is not None:
245 warning_msg += "\n" + extra_warning_msg
AttributeError: 'str' object has no attribute 'decode'
rfc = RandomForestClassifier( )
rfc. fit( X_train, y_train)
RandomForestClassifier()
from sklearn. model_selection import cross_val_score
lr = LogisticRegression( C= 100 )
scores = cross_val_score( lr, X_train, y_train, cv= 10 )
C:\Users\hp\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:548: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "C:\Users\hp\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "C:\Users\hp\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
File "C:\Users\hp\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Users\hp\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\hp\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\hp\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_