%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import zipfile
import re
import numpy as np
import torch
# 定义One-Hot编码函数defoneHotEncode(df, colNames):for col in colNames:
dummies = pd.get_dummies(df[col], prefix=col)
df = pd.concat([df, dummies],axis=1)
df.drop([col], axis=1, inplace=True)return df
# 处理离散数据for col in cate_cols:
data[col]= data[col].fillna('-1')
data = oneHotEncode(data, cate_cols)# 处理连续数据for col in num_cols:
data[col]= data[col].fillna(0)
data[col]=(data[col]-data[col].min())/(data[col].max()-data[col].min())# 处理(可能)无关数据
data.drop(['name','regionCode'], axis=1, inplace=True)
data.columns
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In[12], line 1
----> 1 lr1=RandomForestRegressor().fit(X_train,y_train)
File ~\anaconda3\envs\pytorch\lib\site-packages\sklearn\base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
1145 estimator._validate_params()
1147 with config_context(
1148 skip_parameter_validation=(
1149 prefer_skip_nested_validation or global_skip_validation
1150 )
1151 ):
-> 1152 return fit_method(estimator, *args, **kwargs)
File ~\anaconda3\envs\pytorch\lib\site-packages\sklearn\ensemble\_forest.py:456, in BaseForest.fit(self, X, y, sample_weight)
445 trees = [
446 self._make_estimator(append=False, random_state=random_state)
447 for i in range(n_more_estimators)
448 ]
450 # Parallel loop: we prefer the threading backend as the Cython code
451 # for fitting the trees is internally releasing the Python GIL
452 # making threading more efficient than multiprocessing in
453 # that case. However, for joblib 0.12+ we respect any
454 # parallel_backend contexts set at a higher level,
455 # since correctness does not rely on using threads.
--> 456 trees = Parallel(
457 n_jobs=self.n_jobs,
458 verbose=self.verbose,
459 prefer="threads",
460 )(
461 delayed(_parallel_build_trees)(
462 t,
463 self.bootstrap,
464 X,
465 y,
466 sample_weight,
467 i,
468 len(trees),
469 verbose=self.verbose,
470 class_weight=self.class_weight,
471 n_samples_bootstrap=n_samples_bootstrap,
472 )
473 for i, t in enumerate(trees)
474 )
476 # Collect newly grown trees
477 self.estimators_.extend(trees)
File ~\anaconda3\envs\pytorch\lib\site-packages\sklearn\utils\parallel.py:65, in Parallel.__call__(self, iterable)
60 config = get_config()
61 iterable_with_config = (
62 (_with_config(delayed_func, config), args, kwargs)
63 for delayed_func, args, kwargs in iterable
64 )
---> 65 return super().__call__(iterable_with_config)
File ~\anaconda3\envs\pytorch\lib\site-packages\joblib\parallel.py:1918, in Parallel.__call__(self, iterable)
1916 output = self._get_sequential_output(iterable)
1917 next(output)
-> 1918 return output if self.return_generator else list(output)
1920 # Let's create an ID that uniquely identifies the current call. If the
1921 # call is interrupted early and that the same instance is immediately
1922 # re-used, this id will be used to prevent workers that were
1923 # concurrently finalizing a task from the previous call to run the
1924 # callback.
1925 with self._lock:
File ~\anaconda3\envs\pytorch\lib\site-packages\joblib\parallel.py:1847, in Parallel._get_sequential_output(self, iterable)
1845 self.n_dispatched_batches += 1
1846 self.n_dispatched_tasks += 1
-> 1847 res = func(*args, **kwargs)
1848 self.n_completed_tasks += 1
1849 self.print_progress()
File ~\anaconda3\envs\pytorch\lib\site-packages\sklearn\utils\parallel.py:127, in _FuncWrapper.__call__(self, *args, **kwargs)
125 config = {}
126 with config_context(**config):
--> 127 return self.function(*args, **kwargs)
File ~\anaconda3\envs\pytorch\lib\site-packages\sklearn\ensemble\_forest.py:188, in _parallel_build_trees(tree, bootstrap, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight, n_samples_bootstrap)
185 elif class_weight == "balanced_subsample":
186 curr_sample_weight *= compute_sample_weight("balanced", y, indices=indices)
--> 188 tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
189 else:
190 tree.fit(X, y, sample_weight=sample_weight, check_input=False)
File ~\anaconda3\envs\pytorch\lib\site-packages\sklearn\base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
1145 estimator._validate_params()
1147 with config_context(
1148 skip_parameter_validation=(
1149 prefer_skip_nested_validation or global_skip_validation
1150 )
1151 ):
-> 1152 return fit_method(estimator, *args, **kwargs)
File ~\anaconda3\envs\pytorch\lib\site-packages\sklearn\tree\_classes.py:1320, in DecisionTreeRegressor.fit(self, X, y, sample_weight, check_input)
1290 @_fit_context(prefer_skip_nested_validation=True)
1291 def fit(self, X, y, sample_weight=None, check_input=True):
1292 """Build a decision tree regressor from the training set (X, y).
1293
1294 Parameters
(...)
1317 Fitted estimator.
1318 """
-> 1320 super()._fit(
1321 X,
1322 y,
1323 sample_weight=sample_weight,
1324 check_input=check_input,
1325 )
1326 return self
File ~\anaconda3\envs\pytorch\lib\site-packages\sklearn\tree\_classes.py:443, in BaseDecisionTree._fit(self, X, y, sample_weight, check_input, missing_values_in_feature_mask)
432 else:
433 builder = BestFirstTreeBuilder(
434 splitter,
435 min_samples_split,
(...)
440 self.min_impurity_decrease,
441 )
--> 443 builder.build(self.tree_, X, y, sample_weight, missing_values_in_feature_mask)
445 if self.n_outputs_ == 1 and is_classifier(self):
446 self.n_classes_ = self.n_classes_[0]
KeyboardInterrupt: