机器学习第1章-CSDN博客

本文链接：https://blog.csdn.net/m0_37351072/article/details/105673299

import pandas as pd

import numpy as np

from pandas import Series,DataFrame

import matplotlib.pyplot as plt

%matplotlib inline

np.random.seed(123456789)

[对random.seed的理解]url(https://blog.csdn.net/weixin_43901998/article/details/101602411)

random.seed() 会改变随机生成器的种子；传入的数值用于指定随机数生成时所用算法开始时所选定的整数值，如果使用相同的seed()值，则每次生成的随机数都相同；如果不设置这个值，则系统会根据时间来自己选择这个值，此时每次生成的随机数会因时间的差异而有所不同。

x = np.random.rand(500000,3)

array([[0.27544024, 0.37170215, 0.31980024],
       [0.81093632, 0.25564292, 0.50185747],
       [0.14973265, 0.31575346, 0.88972494],
       ...,
       [0.15939898, 0.81425114, 0.35042418],
       [0.38048023, 0.79058944, 0.73762802],
       [0.28124613, 0.24244432, 0.82073114]])

y = x.dot(np.array([4.2,5.7,10.8]))

dot

from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(x,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

lr.score(x,y)

1.0

lr.coef_

array([ 4.2,  5.7, 10.8])

x_test=np.array([2,4,6]).reshape(1,-1)

y_hat = lr.predict(x_test)

y_hat

array([96.])

x_test

array([[2, 4, 6]])

help(LinearRegression)

Help on class LinearRegression in module sklearn.linear_model.base:

class LinearRegression(LinearModel, sklearn.base.RegressorMixin, sklearn.base.MultiOutputMixin)
 |  LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=None)
 |  
 |  Ordinary least squares Linear Regression.
 |  
 |  Parameters
 |  ----------
 |  fit_intercept : boolean, optional, default True
 |      whether to calculate the intercept for this model. If set
 |      to False, no intercept will be used in calculations
 |      (e.g. data is expected to be already centered).
 |  
 |  normalize : boolean, optional, default False
 |      This parameter is ignored when ``fit_intercept`` is set to False.
 |      If True, the regressors X will be normalized before regression by
 |      subtracting the mean and dividing by the l2-norm.
 |      If you wish to standardize, please use
 |      :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on
 |      an estimator with ``normalize=False``.
 |  
 |  copy_X : boolean, optional, default True
 |      If True, X will be copied; else, it may be overwritten.
 |  
 |  n_jobs : int or None, optional (default=None)
 |      The number of jobs to use for the computation. This will only provide
 |      speedup for n_targets > 1 and sufficient large problems.
 |      ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
 |      ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
 |      for more details.
 |  
 |  Attributes
 |  ----------
 |  coef_ : array, shape (n_features, ) or (n_targets, n_features)
 |      Estimated coefficients for the linear regression problem.
 |      If multiple targets are passed during the fit (y 2D), this
 |      is a 2D array of shape (n_targets, n_features), while if only
 |      one target is passed, this is a 1D array of length n_features.
 |  
 |  intercept_ : array
 |      Independent term in the linear model.
 |  
 |  Examples
 |  --------
 |  >>> import numpy as np
 |  >>> from sklearn.linear_model import LinearRegression
 |  >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
 |  >>> # y = 1 * x_0 + 2 * x_1 + 3
 |  >>> y = np.dot(X, np.array([1, 2])) + 3
 |  >>> reg = LinearRegression().fit(X, y)
 |  >>> reg.score(X, y)
 |  1.0
 |  >>> reg.coef_
 |  array([1., 2.])
 |  >>> reg.intercept_ # doctest: +ELLIPSIS
 |  3.0000...
 |  >>> reg.predict(np.array([[3, 5]]))
 |  array([16.])
 |  
 |  Notes
 |  -----
 |  From the implementation point of view, this is just plain Ordinary
 |  Least Squares (scipy.linalg.lstsq) wrapped as a predictor object.
 |  
 |  Method resolution order:
 |      LinearRegression
 |      LinearModel
 |      sklearn.base.BaseEstimator
 |      sklearn.base.RegressorMixin
 |      sklearn.base.MultiOutputMixin
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, fit_intercept=True, normalize=False, copy_X=True, n_jobs=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  fit(self, X, y, sample_weight=None)
 |      Fit linear model.
 |      
 |      Parameters
 |      ----------
 |      X : array-like or sparse matrix, shape (n_samples, n_features)
 |          Training data
 |      
 |      y : array_like, shape (n_samples, n_targets)
 |          Target values. Will be cast to X's dtype if necessary
 |      
 |      sample_weight : numpy array of shape [n_samples]
 |          Individual weights for each sample
 |      
 |          .. versionadded:: 0.17
 |             parameter *sample_weight* support to LinearRegression.
 |      
 |      Returns
 |      -------
 |      self : returns an instance of self.
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __abstractmethods__ = frozenset()
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from LinearModel:
 |  
 |  predict(self, X)
 |      Predict using the linear model
 |      
 |      Parameters
 |      ----------
 |      X : array_like or sparse matrix, shape (n_samples, n_features)
 |          Samples.
 |      
 |      Returns
 |      -------
 |      C : array, shape (n_samples,)
 |          Returns predicted values.
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from sklearn.base.BaseEstimator:
 |  
 |  __getstate__(self)
 |  
 |  __repr__(self, N_CHAR_MAX=700)
 |      Return repr(self).
 |  
 |  __setstate__(self, state)
 |  
 |  get_params(self, deep=True)
 |      Get parameters for this estimator.
 |      
 |      Parameters
 |      ----------
 |      deep : boolean, optional
 |          If True, will return the parameters for this estimator and
 |          contained subobjects that are estimators.
 |      
 |      Returns
 |      -------
 |      params : mapping of string to any
 |          Parameter names mapped to their values.
 |  
 |  set_params(self, **params)
 |      Set the parameters of this estimator.
 |      
 |      The method works on simple estimators as well as on nested objects
 |      (such as pipelines). The latter have parameters of the form
 |      ``<component>__<parameter>`` so that it's possible to update each
 |      component of a nested object.
 |      
 |      Returns
 |      -------
 |      self
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from sklearn.base.BaseEstimator:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from sklearn.base.RegressorMixin:
 |  
 |  score(self, X, y, sample_weight=None)
 |      Returns the coefficient of determination R^2 of the prediction.
 |      
 |      The coefficient R^2 is defined as (1 - u/v), where u is the residual
 |      sum of squares ((y_true - y_pred) ** 2).sum() and v is the total
 |      sum of squares ((y_true - y_true.mean()) ** 2).sum().
 |      The best possible score is 1.0 and it can be negative (because the
 |      model can be arbitrarily worse). A constant model that always
 |      predicts the expected value of y, disregarding the input features,
 |      would get a R^2 score of 0.0.
 |      
 |      Parameters
 |      ----------
 |      X : array-like, shape = (n_samples, n_features)
 |          Test samples. For some estimators this may be a
 |          precomputed kernel matrix instead, shape = (n_samples,
 |          n_samples_fitted], where n_samples_fitted is the number of
 |          samples used in the fitting for the estimator.
 |      
 |      y : array-like, shape = (n_samples) or (n_samples, n_outputs)
 |          True values for X.
 |      
 |      sample_weight : array-like, shape = [n_samples], optional
 |          Sample weights.
 |      
 |      Returns
 |      -------
 |      score : float
 |          R^2 of self.predict(X) wrt. y.
 |      
 |      Notes
 |      -----
 |      The R2 score used when calling ``score`` on a regressor will use
 |      ``multioutput='uniform_average'`` from version 0.23 to keep consistent
 |      with `metrics.r2_score`. This will influence the ``score`` method of
 |      all the multioutput regressors (except for
 |      `multioutput.MultiOutputRegressor`). To specify the default value
 |      manually and avoid the warning, please either call `metrics.r2_score`
 |      directly or make a custom scorer with `metrics.make_scorer` (the
 |      built-in scorer ``'r2'`` uses ``multioutput='uniform_average'``).