特征工程4

import tqdm
import pandas as pd
import numpy as np
import pprint
df = pd.DataFrame(np.arange(10).reshape(2,5))
pprint.pprint(df)
   0  1  2  3  4
0  0  1  2  3  4
1  5  6  7  8  9
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')
stemmer.stem('interesting')
'interest'
import numpy as np
x = np.array([range(i, i+3) for i in [2,4,6]])
print(x)
[[2 3 4]
 [4 5 6]
 [6 7 8]]
?np.linspace
[1;31mSignature:[0m
[0mnp[0m[1;33m.[0m[0mlinspace[0m[1;33m([0m[1;33m
[0m    [0mstart[0m[1;33m,[0m[1;33m
[0m    [0mstop[0m[1;33m,[0m[1;33m
[0m    [0mnum[0m[1;33m=[0m[1;36m50[0m[1;33m,[0m[1;33m
[0m    [0mendpoint[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mretstep[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mdtype[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0maxis[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Return evenly spaced numbers over a specified interval.

Returns `num` evenly spaced samples, calculated over the
interval [`start`, `stop`].

The endpoint of the interval can optionally be excluded.

.. versionchanged:: 1.16.0
    Non-scalar `start` and `stop` are now supported.

Parameters
----------
start : array_like
    The starting value of the sequence.
stop : array_like
    The end value of the sequence, unless `endpoint` is set to False.
    In that case, the sequence consists of all but the last of ``num + 1``
    evenly spaced samples, so that `stop` is excluded.  Note that the step
    size changes when `endpoint` is False.
num : int, optional
    Number of samples to generate. Default is 50. Must be non-negative.
endpoint : bool, optional
    If True, `stop` is the last sample. Otherwise, it is not included.
    Default is True.
retstep : bool, optional
    If True, return (`samples`, `step`), where `step` is the spacing
    between samples.
dtype : dtype, optional
    The type of the output array.  If `dtype` is not given, infer the data
    type from the other input arguments.

    .. versionadded:: 1.9.0

axis : int, optional
    The axis in the result to store the samples.  Relevant only if start
    or stop are array-like.  By default (0), the samples will be along a
    new axis inserted at the beginning. Use -1 to get an axis at the end.

    .. versionadded:: 1.16.0

Returns
-------
samples : ndarray
    There are `num` equally spaced samples in the closed interval
    ``[start, stop]`` or the half-open interval ``[start, stop)``
    (depending on whether `endpoint` is True or False).
step : float, optional
    Only returned if `retstep` is True

    Size of spacing between samples.

See Also
--------
arange : Similar to `linspace`, but uses a step size (instead of the
         number of samples).
geomspace : Similar to `linspace`, but with numbers spaced evenly on a log
            scale (a geometric progression).
logspace : Similar to `geomspace`, but with the end points specified as
           logarithms.

Examples
--------
>>> np.linspace(2.0, 3.0, num=5)
array([2.  , 2.25, 2.5 , 2.75, 3.  ])
>>> np.linspace(2.0, 3.0, num=5, endpoint=False)
array([2. ,  2.2,  2.4,  2.6,  2.8])
>>> np.linspace(2.0, 3.0, num=5, retstep=True)
(array([2.  ,  2.25,  2.5 ,  2.75,  3.  ]), 0.25)

Graphical illustration:

>>> import matplotlib.pyplot as plt
>>> N = 8
>>> y = np.zeros(N)
>>> x1 = np.linspace(0, 10, N, endpoint=True)
>>> x2 = np.linspace(0, 10, N, endpoint=False)
>>> plt.plot(x1, y, 'o')
[<matplotlib.lines.Line2D object at 0x...>]
>>> plt.plot(x2, y + 0.5, 'o')
[<matplotlib.lines.Line2D object at 0x...>]
>>> plt.ylim([-0.5, 1])
(-0.5, 1)
>>> plt.show()
[1;31mFile:[0m      d:\anaconda3\lib\site-packages\numpy\core\function_base.py
[1;31mType:[0m      function
np.random.normal(0,2,(3,3))  # ĺˆ›ĺťşä¸€ä¸Ş3*3 ĺ‡ĺ€źä¸ş0ďźŒ 斚塎为2 çš„ć­Łć€ĺˆ†ĺ¸ƒçš„éšćœşć•°ć•°çť„
array([[-0.97665752, -0.94550985, -2.76966276],
       [-5.28791998,  3.79721287, -1.31434729],
       [-4.02718968,  0.16028551, -1.02969409]])
np.random.randint(0,10,(3,3))  # ĺˆ›ĺťşä¸€ä¸Ş3*3 的【0ďźŒ10ďź‰ĺŒşé—´çš„éšćœşć•´ĺž‹ć•°çť„
array([[4, 0, 4],
       [4, 3, 0],
       [1, 3, 0]])
np.eye(3)  # ĺˆ›ĺťşä¸€ä¸Ş3* 3 çš„ĺ•ä˝çŸŠé˜ľ
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
from matplotlib import pyplot as plt
import numpy as np
X = np.linspace(0,10,100)
plt.plot(X,np.sin(X))
plt.plot(X,np.cos(X))
plt.show()
plt.savefig('my_figure.png')

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-g1x4cXTf-1614244742236)(output_15_0.png)]

<Figure size 432x288 with 0 Axes>
# ä¸¤ç§éŁŽć źçš„ç”ťĺ›žćŽĽĺŁ
## MATLAB éŁŽć źćŽĽĺŁ
plt.figure()

plt.subplot(2,1,1)   # (čĄŒďźŒĺˆ—ďźŒĺ­ĺ›žçź–ĺˇ)
plt.plot(X,np.sin(X))

plt.subplot(2,1,2)
plt.gca()
plt.plot(X,np.cos(X));

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-NeAKyOyU-1614244742242)(output_18_0.png)]

  • čż™ç§ćŽĽĺŁćœ€é‡čŚçš„ç‰šć€§ć˜Żćœ‰çŠść€çš„ďźšĺŽƒäźšćŒçť­čˇŸč¸Şâ€˜ĺ˝“ĺ‰çš„â€™ĺ›žĺ˝˘ĺ’Œĺć ‡č˝´ďźŒć‰€äťĽplt ĺ‘˝äť¤éƒ˝ĺŻäťĽĺş”ç”¨ă€‚
    ĺŻäťĽä˝żç”¨plt.gcf() (čŽˇĺ–ĺ˝“ĺ‰ĺ›žĺ˝˘) ĺ’Œplt.gca() (čŽˇĺ–ĺ˝“ĺ‰ĺć ‡č˝´)。
# é˘ĺ‘ĺŻščąĄćŽĽĺŁ
fig,ax = plt.subplots(2)  # ĺ
ˆĺˆ›ĺťşĺ›žĺ˝˘ç˝‘ć źďźŒax ć˜Żä¸€ä¸ŞĺŒ
名两个Axes寚蹥的数睄

ax[0].plot(X,np.sin(X))
ax[1].plot(X,np.cos(X));

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-6EpRdqQe-1614244742244)(output_21_0.png)]

# çŽ€ć˜“çşżĺ˝˘ĺ›ž
%matplotlib inline 
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import numpy as np
fig = plt.figure()
ax = plt.axes()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-K6H95aYc-1614244742248)(output_24_0.png)]

fig = plt.figure()
ax =plt.axes()

x = np.linspace(0,10,100)
ax.plot(x,np.sin(x));

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-WDXSVAMG-1614244742251)(output_25_0.png)]

rng = np.random.RandomState(0)
x = rng.randn(100)
y = rng.randn(100)
colors = rng.rand(100)
sizes = 1000 * rng.rand(100)

plt.scatter(x,y,c = colors, s = sizes, alpha = 0.3, cmap = 'viridis') # alphaĺ‚ć•°č°ƒć•´é€ć˜ŽĺşŚ
plt.colorbar(); # ć˜žç¤şé˘œč‰˛ćĄ

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-E31aXr60-1614244742253)(output_26_0.png)]



from sklearn.datasets import load_iris

iris = load_iris()
from sklearn.datasets import load_iris

iris = load_iris()
features = iris.data.T

plt.scatter(features[0],features[1],alpha = 0.2,
           s = 100 * features[3], c = iris.target,cmap = 'viridis')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.colorbar();

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-EcradQaU-1614244742254)(output_30_0.png)]


已标记关键词 清除标记
相关推荐
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页