# 特征工程4

import tqdm
import pandas as pd
import numpy as np
import pprint
df = pd.DataFrame(np.arange(10).reshape(2,5))
pprint.pprint(df)
0  1  2  3  4
0  0  1  2  3  4
1  5  6  7  8  9
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')
stemmer.stem('interesting')
'interest'
import numpy as np
x = np.array([range(i, i+3) for i in [2,4,6]])
print(x)
[[2 3 4]
[4 5 6]
[6 7 8]]
?np.linspace
[1;31mSignature:[0m
[0mnp[0m[1;33m.[0m[0mlinspace[0m[1;33m([0m[1;33m
[0m    [0mstart[0m[1;33m,[0m[1;33m
[0m    [0mstop[0m[1;33m,[0m[1;33m
[0m    [0mnum[0m[1;33m=[0m[1;36m50[0m[1;33m,[0m[1;33m
[0m    [0mendpoint[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mretstep[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mdtype[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0maxis[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Return evenly spaced numbers over a specified interval.

Returns num evenly spaced samples, calculated over the
interval [start, stop].

The endpoint of the interval can optionally be excluded.

.. versionchanged:: 1.16.0
Non-scalar start and stop are now supported.

Parameters
----------
start : array_like
The starting value of the sequence.
stop : array_like
The end value of the sequence, unless endpoint is set to False.
In that case, the sequence consists of all but the last of num + 1
evenly spaced samples, so that stop is excluded.  Note that the step
size changes when endpoint is False.
num : int, optional
Number of samples to generate. Default is 50. Must be non-negative.
endpoint : bool, optional
If True, stop is the last sample. Otherwise, it is not included.
Default is True.
retstep : bool, optional
If True, return (samples, step), where step is the spacing
between samples.
dtype : dtype, optional
The type of the output array.  If dtype is not given, infer the data
type from the other input arguments.

axis : int, optional
The axis in the result to store the samples.  Relevant only if start
or stop are array-like.  By default (0), the samples will be along a
new axis inserted at the beginning. Use -1 to get an axis at the end.

Returns
-------
samples : ndarray
There are num equally spaced samples in the closed interval
[start, stop] or the half-open interval [start, stop)
(depending on whether endpoint is True or False).
step : float, optional
Only returned if retstep is True

Size of spacing between samples.

--------
arange : Similar to linspace, but uses a step size (instead of the
number of samples).
geomspace : Similar to linspace, but with numbers spaced evenly on a log
scale (a geometric progression).
logspace : Similar to geomspace, but with the end points specified as
logarithms.

Examples
--------
>>> np.linspace(2.0, 3.0, num=5)
array([2.  , 2.25, 2.5 , 2.75, 3.  ])
>>> np.linspace(2.0, 3.0, num=5, endpoint=False)
array([2. ,  2.2,  2.4,  2.6,  2.8])
>>> np.linspace(2.0, 3.0, num=5, retstep=True)
(array([2.  ,  2.25,  2.5 ,  2.75,  3.  ]), 0.25)

Graphical illustration:

>>> import matplotlib.pyplot as plt
>>> N = 8
>>> y = np.zeros(N)
>>> x1 = np.linspace(0, 10, N, endpoint=True)
>>> x2 = np.linspace(0, 10, N, endpoint=False)
>>> plt.plot(x1, y, 'o')
[<matplotlib.lines.Line2D object at 0x...>]
>>> plt.plot(x2, y + 0.5, 'o')
[<matplotlib.lines.Line2D object at 0x...>]
>>> plt.ylim([-0.5, 1])
(-0.5, 1)
>>> plt.show()
[1;31mFile:[0m      d:\anaconda3\lib\site-packages\numpy\core\function_base.py
[1;31mType:[0m      function
np.random.normal(0,2,(3,3))  # ĺˆ›ĺťşä¸€ä¸Ş3*3 ĺ‡ĺ€źä¸ş0ďźŒ ć–šĺˇŽä¸ş2 çš„ć­Łć€ĺˆ†ĺ¸ƒçš„éšćœşć•°ć•°çť„
array([[-0.97665752, -0.94550985, -2.76966276],
[-5.28791998,  3.79721287, -1.31434729],
[-4.02718968,  0.16028551, -1.02969409]])
np.random.randint(0,10,(3,3))  # ĺˆ›ĺťşä¸€ä¸Ş3*3 çš„ă€0ďźŒ10ďź‰ĺŒşé—´çš„éšćœşć•´ĺž‹ć•°çť„
array([[4, 0, 4],
[4, 3, 0],
[1, 3, 0]])
np.eye(3)  # ĺˆ›ĺťşä¸€ä¸Ş3* 3 çš„ĺ•ä˝çŸŠé˜ľ
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
from matplotlib import pyplot as plt
import numpy as np
X = np.linspace(0,10,100)
plt.plot(X,np.sin(X))
plt.plot(X,np.cos(X))
plt.show()
plt.savefig('my_figure.png')

<Figure size 432x288 with 0 Axes>
# ä¸¤ç§éŁŽć źçš„ç”ťĺ›žćŽĽĺŁ
## MATLAB éŁŽć źćŽĽĺŁ
plt.figure()

plt.subplot(2,1,1)   # (čĄŒďźŒĺˆ—ďźŒĺ­ĺ›žçź–ĺˇ)
plt.plot(X,np.sin(X))

plt.subplot(2,1,2)
plt.gca()
plt.plot(X,np.cos(X));

• čż™ç§ćŽĽĺŁćœ€é‡čŚçš„ç‰šć€§ć˜Żćœ‰çŠść€çš„ďźšĺŽƒäźšćŒçť­čˇŸč¸Şâ€˜ĺ˝“ĺ‰çš„â€™ĺ›žĺ˝˘ĺ’Œĺć ‡č˝´ďźŒć‰€äťĽplt ĺ‘˝äť¤éƒ˝ĺŻäťĽĺş”ç”¨ă€‚
ĺŻäťĽä˝żç”¨plt.gcf() (čŽˇĺ–ĺ˝“ĺ‰ĺ›žĺ˝˘) ĺ’Œplt.gca() (čŽˇĺ–ĺ˝“ĺ‰ĺć ‡č˝´)ă€‚
# é˘ĺ‘ĺŻščąĄćŽĽĺŁ
fig,ax = plt.subplots(2)  # ĺ
ˆĺˆ›ĺťşĺ›žĺ˝˘ç˝‘ć źďźŒax ć˜Żä¸€ä¸ŞĺŒ
ĺŤä¸¤ä¸ŞAxesĺŻščąĄçš„ć•°çť„

ax[0].plot(X,np.sin(X))
ax[1].plot(X,np.cos(X));

# çŽ€ć˜“çşżĺ˝˘ĺ›ž
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import numpy as np
fig = plt.figure()
ax = plt.axes()

fig = plt.figure()
ax =plt.axes()

x = np.linspace(0,10,100)
ax.plot(x,np.sin(x));

rng = np.random.RandomState(0)
x = rng.randn(100)
y = rng.randn(100)
colors = rng.rand(100)
sizes = 1000 * rng.rand(100)

plt.scatter(x,y,c = colors, s = sizes, alpha = 0.3, cmap = 'viridis') # alphaĺ‚ć•°č°ƒć•´é€ć˜ŽĺşŚ
plt.colorbar(); # ć˜žç¤şé˘œč‰˛ćĄ

features = iris.data.T

plt.scatter(features[0],features[1],alpha = 0.2,
s = 100 * features[3], c = iris.target,cmap = 'viridis')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.colorbar();

08-23 1933

07-31 1万+
08-26 877
07-25 4335
12-23 197
07-20 1万+
02-22 1925
06-16 4090
06-17 3万+
09-01 5896
12-30 4127