Numpy数据分析模块-学习笔记

最新推荐文章于 2023-07-12 02:01:29 发布

小绿叶ya

最新推荐文章于 2023-07-12 02:01:29 发布

阅读量901

点赞数

本文链接：https://blog.csdn.net/qq_41147659/article/details/120018754

版权

NumPy 数组统计分析矩阵运算通用函数

关键词由CSDN通过智能技术生成

文章目录

数组属性
数组创建与转变
索引和切片
数据组合与拆分
- 数据组合
- 数据拆分
掌握 NumPy 矩阵与通用函数
- np.dot() 计算点积
- 练习：计算100个点的距离矩阵
使用Numpy进行统计分析
- 文件读写：文本文件（TXT，csv)和二进制文件
- - 数据统计分析
作业

常用库：Numpy Matplotlib Pandas Scikit-Learn

import numpy as np
np.array([1,2])  # 将list转为多为数组

array([1, 2])

数组属性

a = np.array([[ 1,2,3],[4,5,6]])

array.ndim维数

a = np.array([[ 1,2,3],[4,5,6]])
a.ndim  #维数

array.shape 形状大小

a = np.array([[ 1,2,3],[4,5,6]])
a.shape  # 形状大小

(2, 3)

array.size 元素总数

a = np.array([[ 1,2,3],[4,5,6]])
a.size

array.dtype 数据类型

https://www.runoob.com/numpy/numpy-dtype.html

a = np.array([[ 1,2,3],[4,5,6]])
a.dtype  # 有32位就会有 8 16 32 64位等，为了限制资源浪费，为了提高计算效率

dtype('int32')

array.itemsize 字节大小

a = np.array([[ 1,2,3],[4,5,6]])
a.itemsize  # 字节大小，Int32的字节大小是4

数组创建与转变

array() 函数

array(object, dtype=None, copy=True, order=None, subok=False, ndmin=0)

object
dtype=None 数据类型
copy=True
order=None
subok=False
ndmin=0)

# 把object对象转为多维数组
a = np.array([[ 1,2,3],[4,5,6]])
print(a.dtype)
b = np.array(a,dtype=np.float32)  # float32 Python里面是没有的，要用np里面的
print(b.dtype)

int32
float32

a = [[2,3],[4,5]]
a[0][1]
# np.array([[1,2],[3,4,5]])  # 不支持列数不对应
b = np.array(a)
b[0,1]

np.array([1,'s'])[0]  # 会自动把数据类型统一起来

'1'

help() / ? 查看帮助文档

# 查看帮助文档
np.array?

# 查看帮助文档
help(np.array)

Help on built-in function array in module numpy.core.multiarray:

array(...)
    array(object, dtype=None, copy=True, order=None, subok=False, ndmin=0)
    
    Create an array.
    
    Parameters
    ----------
    object : array_like
        An array, any object exposing the array interface, an
        object whose __array__ method returns an array, or any
        (nested) sequence.
    dtype : data-type, optional
        The desired data-type for the array.  If not given, then
        the type will be determined as the minimum type required
        to hold the objects in the sequence.  This argument can only
        be used to 'upcast' the array.  For downcasting, use the
        .astype(t) method.
    copy : bool, optional
        If true (default), then the object is copied.  Otherwise, a copy
        will only be made if __array__ returns a copy, if obj is a
        nested sequence, or if a copy is needed to satisfy any of the other
        requirements (`dtype`, `order`, etc.).
    order : {'C', 'F', 'A'}, optional
        Specify the order of the array.  If order is 'C', then the array
        will be in C-contiguous order (last-index varies the fastest).
        If order is 'F', then the returned array will be in
        Fortran-contiguous order (first-index varies the fastest).
        If order is 'A' (default), then the returned array may be
        in any order (either C-, Fortran-contiguous, or even discontiguous),
        unless a copy is required, in which case it will be C-contiguous.
    subok : bool, optional
        If True, then sub-classes will be passed-through, otherwise
        the returned array will be forced to be a base-class array (default).
    ndmin : int, optional
        Specifies the minimum number of dimensions that the resulting
        array should have.  Ones will be pre-pended to the shape as
        needed to meet this requirement.
    
    Returns
    -------
    out : ndarray
        An array object satisfying the specified requirements.
    
    See Also
    --------
    empty, empty_like, zeros, zeros_like, ones, ones_like, fill
    
    Examples
    --------
    >>> np.array([1, 2, 3])
    array([1, 2, 3])
    
    Upcasting:
    
    >>> np.array([1, 2, 3.0])
    array([ 1.,  2.,  3.])
    
    More than one dimension:
    
    >>> np.array([[1, 2], [3, 4]])
    array([[1, 2],
           [3, 4]])
    
    Minimum dimensions 2:
    
    >>> np.array([1, 2, 3], ndmin=2)
    array([[1, 2, 3]])
    
    Type provided:
    
    >>> np.array([1, 2, 3], dtype=complex)
    array([ 1.+0.j,  2.+0.j,  3.+0.j])
    
    Data-type consisting of more than one element:
    
    >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
    >>> x['a']
    array([1, 3])
    
    Creating an array from sub-classes:
    
    >>> np.array(np.mat('1 2; 3 4'))
    array([[1, 2],
           [3, 4]])
    
    >>> np.array(np.mat('1 2; 3 4'), subok=True)
    matrix([[1, 2],
            [3, 4]])

array.T 转置

a = np.array([[ 1,2,3],[4,5,6]])
a.T  # 转置

array([[1, 4],
       [2, 5],
       [3, 6]])

arange() 返回ndarray 类似range()

print(list(range(10)))  #返回列表 
print(np.arange(10))  # 返回ndarray
print(np.arange(2,10))
print(np.arange(2,10,2))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0 1 2 3 4 5 6 7 8 9]
[2 3 4 5 6 7 8 9]
[2 4 6 8]

array.reshape() 重塑形状

a.reshape(shape, order=‘C’)

order=‘C’ 按行进行操作，order=‘F’ 按列进行操作

np.arange(10).reshape(2,5)
a.reshape

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

np.arange(10).reshape(5,-1)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

import numpy as np
a = np.arange(10)
a.reshape([2,5])
a.reshape((2,5),order='F')

array([[0, 2, 4, 6, 8],
       [1, 3, 5, 7, 9]])

a = np.array([[2,3],[4,5]])
a.reshape([1,4])  # 返回依旧是二维数组 
a.reshape(4)  # 一维

array([2, 3, 4, 5])

array.ravel() 展为一维数组

a = np.array([[2,3],[4,5]])
print(a.ravel())
print(a.ravel('F'))

[2 3 4 5]
[2 4 3 5]

array.flatten() 展为一维数组

a = np.array([[1, 2, 3],
              [4, 5, 6]])
print(a.flatten(order='C'))  # 按行展平
print(a.flatten('F'))  # 按列展平

[1 2 3 4 5 6]
[1 4 2 5 3 6]

linspace() 创建等差数列

import numpy as np
np.arange(1.3,10,2.1)
# 创建等差数列
print(np.linspace(1.3,10,5)) 
print(np.linspace(1.3,10,5,endpoint=False))  # 比如画圆的时候，就不要最后一个点

[  1.3     3.475   5.65    7.825  10.   ]
[ 1.3   3.04  4.78  6.52  8.26]

logspace() 创建等比数列

# 创建等比数列
print(10**np.linspace(2,7,6))
print(np.logspace(2,7,6))
np.logspace(2,7,6,base=2)  # 2**2，2**3，2**4, 2**5, 2**6, 2**7

[  1.3     3.475   5.65    7.825  10.   ]
[ 1.3   3.04  4.78  6.52  8.26]
[  1.00000000e+02   1.00000000e+03   1.00000000e+04   1.00000000e+05
   1.00000000e+06   1.00000000e+07]
[  1.00000000e+02   1.00000000e+03   1.00000000e+04   1.00000000e+05
   1.00000000e+06   1.00000000e+07]





array([   4.,    8.,   16.,   32.,   64.,  128.])

zeros() 全零数组

# 全零数组
np.zeros(shape=(2,3))
np.zeros([2,3])

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

ones() 全一数组

# 全一数组
np.ones((2,3,2))

array([[3, 0, 0],
       [0, 4, 0],
       [0, 0, 5]])

eys() 单位数组

# 单位数组（类似单位矩阵）  返回的是二维数组
np.eye(3)  
np.eye(3,4)

diag() 对角数组

# 指定对角线元素的数组
np.diag([3,4,5])  # 似乎圆括号，方括号都可以的呢

np.random 模块

.seed() 随机种子

# 生成随机数
np.random.seed(123)  # 随机种子,下面生成的数据，都是“特定的”随机数
# 生成0-1之间的随机数
np.random.rand(2,5)

array([[ 0.69646919,  0.28613933,  0.22685145,  0.55131477,  0.71946897],
       [ 0.42310646,  0.9807642 ,  0.68482974,  0.4809319 ,  0.39211752]])

.rand() 生成0-1间随机数

rand(d0, d1, …, dn)

# 生成随机数
np.random.seed(123)  # 随机种子,下面生成的数据，都是“特定的”随机数
# 生成0-1之间的随机数
np.random.rand(2,5)

array([[ 0.69646919,  0.28613933,  0.22685145,  0.55131477,  0.71946897],
       [ 0.42310646,  0.9807642 ,  0.68482974,  0.4809319 ,  0.39211752]])

.random() 生成0-1间随机数

random_sample(size=None)

# 生成随机数
np.random.seed(123)  # 随机种子,下面生成的数据，都是“特定的”随机数
# 生成0-1之间的随机数
np.random.random(size=(2,5))  # 后开，取不到1

array([[ 0.69646919,  0.28613933,  0.22685145,  0.55131477,  0.71946897],
       [ 0.42310646,  0.9807642 ,  0.68482974,  0.4809319 ,  0.39211752]])

.randint() 生成随机整数

randint(low, high=None, size=None, dtype=‘l’)

low:最小值闭
high:最大值开

np.random.seed(123)
# 生成20-40范围内的5个随机整数
np.random.randint(20,40,5)  # 左闭右开

array([33, 22, 22, 26, 37])

.randn() 生成服从标准正太分布的随机数

randn(d0, d1, …, dn)

# 生成服从标准正太分布的随机数
np.random.randn(2,5)

array([[-1.10098526, -1.4103012 , -0.74765132, -0.98486761, -0.74856868],
       [ 0.24036728, -1.85563747, -1.7794548 , -2.75022426, -0.23415755]])

索引和切片

跟列表类似，但是他可以

选取b[b>5]这样的值

b = np.arange(12).reshape(3,4)
print(b)
b[0,1]
b[0:2,:]  # 0到1 的数据
b[:2,2:]
b[:-1,2:-1]  # -1那一行没有被取到, 先行后列
b[[False,True,False],]  # 这行我运行的有点问题
b[:,[False,True,False,True]]  # 我的版本暂时用不了，需要更加高级的版本才可以
b[:,::2]

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


D:\yianzhuang\Anaconda3\lib\site-packages\ipykernel\__main__.py:7: FutureWarning: in the future, boolean array-likes will be handled as a boolean array index
D:\yianzhuang\Anaconda3\lib\site-packages\ipykernel\__main__.py:8: FutureWarning: in the future, boolean array-likes will be handled as a boolean array index





array([[ 0,  2],
       [ 4,  6],
       [ 8, 10]])

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

b[b<5]  #

array([0, 1, 2, 3, 4])

数据组合与拆分

数据组合

a1 = np.zeros((3,4))
a2 = np.ones((3,3))
a3 = np.ones((4,4))

np.concatenate((a1,a3),axis=0)  # (y)纵向拼接
np.concatenate((a1,a2),axis=1)  # (x)横向拼接
# np.stack((a1,a3),axis=1)
np.vstack((a1,a3))  # (y)纵向堆叠
np.hstack((a1,a2))  # (x)横向堆叠

array([[ 0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  1.,  1.,  1.]])

数据拆分

b = np.zeros((4,8))
np.split(b,2,axis=0)  # 从y中间切
np.split(b,2,axis=1)
np.vsplit(b,2)
np.hsplit(b,2)

[array([[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]]), array([[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]])]

order=‘C’ 按行进行操作
order=‘F’ 按列进行操作
axis=0 纵向操作(列)
axis=1 横向操作(行)
v: vertical 纵向操作(列)
h:horizontal 横向操作(行)

掌握 NumPy 矩阵与通用函数

➢ 矩阵与数相乘：matr1*3

➢ 矩阵相加减：matr1±matr2

➢ 矩阵相乘：matr1*matr2

➢ 矩阵对应元素相乘：np.multiply(matr1,matr2)

➢ 矩阵特有属性：

T 返回自身的转置
H 返回自身的共轭转置
I 返回自身的逆矩阵
A 返回自身数据的2维数组的一个视图

import numpy as np
[i**2 for i in [2, 4, 6]]
np.array([2, 4, 6]) ** 2

array([ 4, 16, 36])

np.array([2, 4, 6]) * np.array([1, 3, 5])  # 像高代那样子矩阵相乘
# np.ones((3, 4)) * np.array([[1, 2, 3, 4]])
# np.ones((3, 4)) * np.array([[1], [2], [3]])

array([ 2, 12, 30])

np.dot() 计算点积

np.ones((3, 3)) * np.ones((3, 3))  # 像高代那样子矩阵相乘
np.ones((3, 3)).dot(np.ones((3, 3)))

array([[ 3.,  3.,  3.],
       [ 3.,  3.,  3.],
       [ 3.,  3.,  3.]])

# 利用Numpy生成100个点
x_data = np.float32(np.random.rand(100,2))  # 100*2的随机数
# y=0.1x1+0.2x2+0.3
y_data = np.dot(x_data,[[0.100], [0.200]]) + 0.300

y_data

array([[0.34377144],
       [0.4822699 ],
       [0.47270257],
       [0.5429002 ],
       [0.4446515 ],
       [0.41895153],
       [0.46160278],
       [0.35242171],
       [0.39926914],
       [0.37863993],
       [0.36839944],
       [0.45223075],
       [0.4844957 ],
       [0.38942822],
       [0.38661988],
       [0.47165079],
       [0.4736063 ],
       [0.54205135],
       [0.3943022 ],
       [0.43250963],
       [0.58474337],
       [0.43178422],
       [0.36389577],
       [0.52146471],
       [0.38848259],
       [0.515346  ],
       [0.3966455 ],
       [0.51638931],
       [0.45166914],
       [0.34500174],
       [0.43153216],
       [0.51620744],
       [0.45619509],
       [0.35270077],
       [0.53806841],
       [0.37142498],
       [0.43810284],
       [0.32008261],
       [0.5004158 ],
       [0.53061476],
       [0.30304388],
       [0.53346578],
       [0.52081376],
       [0.48111758],
       [0.34872289],
       [0.3534609 ],
       [0.52881261],
       [0.54632378],
       [0.49003446],
       [0.53594987],
       [0.44018516],
       [0.51843384],
       [0.42568139],
       [0.40949556],
       [0.47334818],
       [0.46801003],
       [0.59626866],
       [0.41834676],
       [0.52260355],
       [0.55379124],
       [0.57461348],
       [0.44406661],
       [0.41910173],
       [0.48424855],
       [0.38779417],
       [0.5063579 ],
       [0.37603597],
       [0.40196129],
       [0.3444668 ],
       [0.34311828],
       [0.56288853],
       [0.31467596],
       [0.37000017],
       [0.41471657],
       [0.43326844],
       [0.35139896],
       [0.3676246 ],
       [0.42597461],
       [0.53759727],
       [0.50381383],
       [0.40748895],
       [0.44420369],
       [0.3407497 ],
       [0.40756499],
       [0.5170897 ],
       [0.36818179],
       [0.53165449],
       [0.47257836],
       [0.41966039],
       [0.39179082],
       [0.56509062],
       [0.35622665],
       [0.45097599],
       [0.38673676],
       [0.39065598],
       [0.38149583],
       [0.46576837],
       [0.47541111],
       [0.30483321],
       [0.48833709]])

# 特殊的二维数组：矩阵matrix
a = np.matrix(np.ones((3, 3)))
a * a

matrix([[ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.]])

import numpy as np
b = np.mat('2 3 4; 5 6 7; 8 9 10')

b
# a

matrix([[ 2,  3,  4],
        [ 5,  6,  7],
        [ 8,  9, 10]])

np.bmat('a b')

matrix([[  1.,   1.,   1.,   2.,   3.,   4.],
        [  1.,   1.,   1.,   5.,   6.,   7.],
        [  1.,   1.,   1.,   8.,   9.,  10.]])

b*3
np.multiply(b, b)  # 矩阵对应位置的数值相乘
# # [1, 2]*3

matrix([[  4,   9,  16],
        [ 25,  36,  49],
        [ 64,  81, 100]])

b.A   # 二维数组类型
# a.I   # 逆矩阵（报错：a和b均没有逆矩阵）
b.H  # 共轭转置
b.T  # 转置

matrix([[ 2,  5,  8],
        [ 3,  6,  9],
        [ 4,  7, 10]])

通用函数

# 四则运算
np.array([1, 2, 3]) + np.array([2, 3, 4])

array([3, 5, 7])

np.any(np.array([1,2,3])>1)  # 存在一个
np.all(np.array([1,2,3])>1)  # 所有都要满足

False

# 比较运算: == != < > <= >=
np.array([1, 2, 3]) < 2
# 与或非
(np.array([1, 2, 3])>0) & (np.array([1, 2, 3])<2)
(np.array([1, 2, 3])>0) | (np.array([1, 2, 3])<2)
ind = (np.array([1, 2, 3])>1)
np.array([1, 2, 3])[~ind]
np.any(np.array([1, 2, 3])> 1)
np.all(np.array([1, 2, 3])> 1)

False

练习：计算100个点的距离矩阵

a = [1, 1]
b = [2, 2]
# 在低维空间内衡量距离的方法常用的是欧式距离
# 曼哈顿距离
# 高维空间：夹角余弦距离
np.sqrt((2-1)**2+(2-1)**2)

1.4142135623730951

# 练习：计算100个点的距离矩阵
# 1 生成100个样本点
np.random.seed(123)
data = np.random.rand(100,2)

# 3 100个样本距离计算
dist = np.zeros((100,100))
for i in range(100):
    for j in range(i+1,100):  # range(0,i)
        # 2 距离计算
        dist[i,j] = dist[j,i] = np.sqrt(sum((data[i] - data[j])**2))
dist

array([[ 0.        ,  0.53931329,  0.13888478, ...,  0.75327364,
         0.80722249,  0.27748836],
       [ 0.53931329,  0.        ,  0.50902789, ...,  0.21488021,
         0.27444315,  0.49504387],
       [ 0.13888478,  0.50902789,  0.        , ...,  0.72220996,
         0.75882525,  0.38936222],
       ..., 
       [ 0.75327364,  0.21488021,  0.72220996, ...,  0.        ,
         0.10891833,  0.67905899],
       [ 0.80722249,  0.27444315,  0.75882525, ...,  0.10891833,
         0.        ,  0.76314582],
       [ 0.27748836,  0.49504387,  0.38936222, ...,  0.67905899,
         0.76314582,  0.        ]])

(data[0] - data[1])**2

array([ 0.22054081,  0.07031801])

使用Numpy进行统计分析

文件读写：文本文件（TXT，csv)和二进制文件

# 二进制文件(单个数组)
import numpy as np
data1 = np.random.rand(3,5)
import os 
if not os.path.exists('./tmp/'):
    os.mkdir('./tmp/')
np.save('./tmp/test1.npy ',data)  # 这个只能保存一个数组

# 读取npy二进制文件
np.load('./tmp/test1.npy')

data2 = np.array(['a','b','c','d','e'])
np.savez('./tmp/test2.npz',score=data,subject=data2,)  # 能保存多个数组

data_read = np.load('./tmp/test2.npz')
data_read.files

['subject', 'score']

data_read['subject']  # arr_0,arr_1 都是默认来的

array(['a', 'b', 'c', 'd', 'e'], 
      dtype='<U1')

# 文本文件
np.savetxt('./tmp/test3.txt',data,delimiter=',',fmt='%.2f')  # delimiter分隔符号；
                                                            #fmt数据的保存形式小数点后两位的数据

np.savetxt('./tmp/test3.csv',data,delimiter=',',fmt='%.2f')  # delimiter分隔,小数点后两位的数据

np.loadtxt('./tmp/test3.csv',delimiter=',')  # 数据不能有缺失

array([[ 0.7 ,  0.29],
       [ 0.23,  0.55],
       [ 0.72,  0.42],
       [ 0.98,  0.68],
       [ 0.48,  0.39],
       [ 0.34,  0.73],
       [ 0.44,  0.06],
       [ 0.4 ,  0.74],
       [ 0.18,  0.18],
       [ 0.53,  0.53],
       [ 0.63,  0.85],
       [ 0.72,  0.61],
       [ 0.72,  0.32],
       [ 0.36,  0.23],
       [ 0.29,  0.63],
       [ 0.09,  0.43],
       [ 0.43,  0.49],
       [ 0.43,  0.31],
       [ 0.43,  0.89],
       [ 0.94,  0.5 ],
       [ 0.62,  0.12],
       [ 0.32,  0.41],
       [ 0.87,  0.25],
       [ 0.48,  0.99],
       [ 0.52,  0.61],
       [ 0.12,  0.83],
       [ 0.6 ,  0.55],
       [ 0.34,  0.3 ],
       [ 0.42,  0.68],
       [ 0.88,  0.51],
       [ 0.67,  0.59],
       [ 0.62,  0.67],
       [ 0.84,  0.08],
       [ 0.76,  0.24],
       [ 0.19,  0.57],
       [ 0.1 ,  0.89],
       [ 0.63,  0.72],
       [ 0.02,  0.59],
       [ 0.56,  0.16],
       [ 0.15,  0.7 ],
       [ 0.32,  0.69],
       [ 0.55,  0.39],
       [ 0.93,  0.84],
       [ 0.36,  0.04],
       [ 0.3 ,  0.4 ],
       [ 0.7 ,  1.  ],
       [ 0.36,  0.76],
       [ 0.59,  0.69],
       [ 0.15,  0.4 ],
       [ 0.24,  0.34],
       [ 0.51,  0.67],
       [ 0.11,  0.13],
       [ 0.32,  0.66],
       [ 0.85,  0.55],
       [ 0.85,  0.38],
       [ 0.32,  0.35],
       [ 0.17,  0.83],
       [ 0.34,  0.55],
       [ 0.58,  0.52],
       [ 0.  ,  0.99],
       [ 0.91,  0.21],
       [ 0.29,  0.52],
       [ 0.9 ,  0.98],
       [ 0.26,  0.56],
       [ 0.81,  0.39],
       [ 0.73,  0.16],
       [ 0.6 ,  0.87],
       [ 0.98,  0.08],
       [ 0.43,  0.2 ],
       [ 0.45,  0.55],
       [ 0.09,  0.3 ],
       [ 0.93,  0.57],
       [ 0.46,  0.75],
       [ 0.74,  0.05],
       [ 0.71,  0.84],
       [ 0.17,  0.78],
       [ 0.29,  0.31],
       [ 0.67,  0.11],
       [ 0.66,  0.89],
       [ 0.7 ,  0.44],
       [ 0.44,  0.77],
       [ 0.57,  0.08],
       [ 0.58,  0.81],
       [ 0.34,  0.93],
       [ 0.75,  0.57],
       [ 0.75,  0.08],
       [ 0.86,  0.82],
       [ 0.91,  0.13],
       [ 0.08,  0.14],
       [ 0.4 ,  0.42],
       [ 0.56,  0.12],
       [ 0.2 ,  0.81],
       [ 0.47,  0.81],
       [ 0.01,  0.55],
       [ 0.93,  0.58],
       [ 0.21,  0.72],
       [ 0.38,  0.67],
       [ 0.03,  0.64],
       [ 0.03,  0.74],
       [ 0.47,  0.12]])

np.genfromtxt('./tmp/test3.csv',delimiter=',')  # 数据可以有缺失

array([[ 0.7 ,  0.29],
       [ 0.23,  0.55],
       [ 0.72,  0.42],
       [ 0.98,  0.68],
       [ 0.48,  0.39],
       [ 0.34,  0.73],
       [ 0.44,  0.06],
       [ 0.4 ,  0.74],
       [ 0.18,  0.18],
       [ 0.53,  0.53],
       [ 0.63,  0.85],
       [ 0.72,  0.61],
       [ 0.72,  0.32],
       [ 0.36,  0.23],
       [ 0.29,  0.63],
       [ 0.09,  0.43],
       [ 0.43,  0.49],
       [ 0.43,  0.31],
       [ 0.43,  0.89],
       [ 0.94,  0.5 ],
       [ 0.62,  0.12],
       [ 0.32,  0.41],
       [ 0.87,  0.25],
       [ 0.48,  0.99],
       [ 0.52,  0.61],
       [ 0.12,  0.83],
       [ 0.6 ,  0.55],
       [ 0.34,  0.3 ],
       [ 0.42,  0.68],
       [ 0.88,  0.51],
       [ 0.67,  0.59],
       [ 0.62,  0.67],
       [ 0.84,  0.08],
       [ 0.76,  0.24],
       [ 0.19,  0.57],
       [ 0.1 ,  0.89],
       [ 0.63,  0.72],
       [ 0.02,  0.59],
       [ 0.56,  0.16],
       [ 0.15,  0.7 ],
       [ 0.32,  0.69],
       [ 0.55,  0.39],
       [ 0.93,  0.84],
       [ 0.36,  0.04],
       [ 0.3 ,  0.4 ],
       [ 0.7 ,  1.  ],
       [ 0.36,  0.76],
       [ 0.59,  0.69],
       [ 0.15,  0.4 ],
       [ 0.24,  0.34],
       [ 0.51,  0.67],
       [ 0.11,  0.13],
       [ 0.32,  0.66],
       [ 0.85,  0.55],
       [ 0.85,  0.38],
       [ 0.32,  0.35],
       [ 0.17,  0.83],
       [ 0.34,  0.55],
       [ 0.58,  0.52],
       [ 0.  ,  0.99],
       [ 0.91,  0.21],
       [ 0.29,  0.52],
       [ 0.9 ,  0.98],
       [ 0.26,  0.56],
       [ 0.81,  0.39],
       [ 0.73,  0.16],
       [ 0.6 ,  0.87],
       [ 0.98,  0.08],
       [ 0.43,  0.2 ],
       [ 0.45,  0.55],
       [ 0.09,  0.3 ],
       [ 0.93,  0.57],
       [ 0.46,  0.75],
       [ 0.74,  0.05],
       [ 0.71,  0.84],
       [ 0.17,  0.78],
       [ 0.29,  0.31],
       [ 0.67,  0.11],
       [ 0.66,  0.89],
       [ 0.7 ,  0.44],
       [ 0.44,  0.77],
       [ 0.57,  0.08],
       [ 0.58,  0.81],
       [ 0.34,  0.93],
       [ 0.75,  0.57],
       [ 0.75,  0.08],
       [ 0.86,  0.82],
       [ 0.91,  0.13],
       [ 0.08,  0.14],
       [ 0.4 ,  0.42],
       [ 0.56,  0.12],
       [ 0.2 ,  0.81],
       [ 0.47,  0.81],
       [ 0.01,  0.55],
       [ 0.93,  0.58],
       [ 0.21,  0.72],
       [ 0.38,  0.67],
       [ 0.03,  0.64],
       [ 0.03,  0.74],
       [ 0.47,  0.12]])

a = np.array([[9,None,90],[9,None,90]])
np.savetxt('./tmp/test4.txt',a,fmt='%s')

np.loadtxt('./tmp/test4.txt',delimiter=',')

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-190-39a7815e078b> in <module>()
----> 1 np.loadtxt('./tmp/test4.txt',delimiter=',')


D:\yianzhuang\Anaconda3\lib\site-packages\numpy\lib\npyio.py in loadtxt(fname, dtype, comments, delimiter, converters, skiprows, usecols, unpack, ndmin)
    928 
    929             # Convert each value according to its column and store
--> 930             items = [conv(val) for (conv, val) in zip(converters, vals)]
    931             # Then pack it according to the dtype's nesting
    932             items = pack_items(items, packing)


D:\yianzhuang\Anaconda3\lib\site-packages\numpy\lib\npyio.py in <listcomp>(.0)
    928 
    929             # Convert each value according to its column and store
--> 930             items = [conv(val) for (conv, val) in zip(converters, vals)]
    931             # Then pack it according to the dtype's nesting
    932             items = pack_items(items, packing)


D:\yianzhuang\Anaconda3\lib\site-packages\numpy\lib\npyio.py in floatconv(x)
    657         if b'0x' in x:
    658             return float.fromhex(asstr(x))
--> 659         return float(x)
    660 
    661     typ = dtype.type


ValueError: could not convert string to float: b'9 None 90'

np.genfromtxt('./tmp/test4.txt',delimiter='%s')

array([ nan,  nan])

np.loadtxt提供的参数相对比较少，所以很多情况无法处理;np.genfromtxt参数相对较大，能处理如缺失值等问题

数据统计分析

# 排序
a = np.array([[1,3,2],
          [3,2,1],
          [2,0,3]])
# a.sort()
# a.sort(axis=0)
a.sort(axis=1)
a

array([[1, 2, 3],
       [1, 2, 3],
       [0, 2, 3]])

name = np.array(list('abcd'))
score = np.array([90,89,87,99])
# score.sort()
# name[score == score[-1]]
name[np.argsort(score)]

array(['c', 'b', 'a', 'd'], 
      dtype='<U1')

score.sort?

np.max(score)  # 最大值
np.min(score)  # 最小值
np.argmax(score)  # 最大值所在的位置
np.argmin(score)  # 最小值所在的位置

# 重复、去重
a = np.array(list('asdasd'))
print(a)
np.unique(a)

['a' 's' 'd' 'a' 's' 'd']





array(['a', 'd', 's'], 
      dtype='<U1')

print(np.tile([1,2],3))
print(np.array([1,2]*3))

[1 2 1 2 1 2]
[1 2 1 2 1 2]

np.repeat(['a','b'],3)  # 一个重复完三次之后，再到下一个
np.repeat(['a','b'],[2,5])  # 第一个重复了2次，第二个重复了5次o

array(['a', 'a', 'b', 'b', 'b', 'b', 'b'], 
      dtype='<U1')

a = np.ones((3,4))
print(np.sum(a))
print(np.sum(a,axis=0))
print(sum(a))  # 按照列求和
print(np.sum(a,axis=1))

12.0
[ 3.  3.  3.  3.]
[ 3.  3.  3.  3.]
[ 4.  4.  4.]

print(np.mean(a))
print(np.mean(a,axis=0))
print(np.mean(a,axis=1))

1.0
[ 1.  1.  1.  1.]
[ 1.  1.  1.]

# 我做的
# 读取iris数据集中的花萼长度数据（已保存为csv格式），
# 并对其进行排序、
# 去重，
# 并求出和、
# 累积和、
# 均值、 
# 标准差、
# 方差、
# 最小值、
# 最大值

# 读取数据
data = np.genfromtxt('iris_sepal_length.csv',delimiter=',')

# 并对其进行排序、
data.sort()

# 去重，
data2 = np.unique(data)

# 并求出和、
s1 = np.sum(data2)

# 累积和、
s2 = np.sum(data2)

# 均值、   有问题
mean = np.mean(data2)

# 标准差、
std = np.std(data2)

# 方差、
var = np.var(data2)

# 最小值、
mi = np.min(data2)

# 最大值
ma = np.max(data2)

print('unique:'data2)
print('s1:',s1)
print('s2:',s2)
print('mean:',mean)
print('std:',std)
print('var:',var)
print('mi:',mi)
print('ma:',ma)

[ 4.3  4.4  4.5  4.6  4.7  4.8  4.9  5.   5.1  5.2  5.3  5.4  5.5  5.6  5.7
  5.8  5.9  6.   6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9  7.   7.1  7.2
  7.3  7.4  7.6  7.7  7.9]
s1: 210.4
s2: 210.4
mean: 6.01142857143
std: 1.02894437683
var: 1.05872653061
mi: 4.3
ma: 7.9

# 老师做的
# 读取iris数据集中的花萼长度数据（已保存为csv格式），
# 并对其进行排序、
# 去重，
# 并求出和、
# 累积和、
# 均值、 
# 标准差、
# 方差、
# 最小值、
# 最大值

# 读取iris数据集中的花萼长度数据（已保存为csv格式），
data = np.loadtxt('iris_sepal_length.csv',delimiter=',')
# 并对其进行排序、
data.sort()
# 去重，
np.unique(data)
# 并求出和、
np.sum(data)
# 累积和、
np.cumsum(data)
# 均值、 
np.mean(data)
# 标准差、
np.std(data)
# 方差、
np.var(data)
# 最小值、
np.min(data)
# 最大值
np.max(data)

7.9000000000000004

# 确认n值
n = 10000

# 求宽度
width = 2*np.pi/n
# import math
# [math.sin(i) for i in np.array([0,1,1])]
# np.sin(np.array([0,1,2]))

# 求高度
t = np.linspace(0, 2*np.pi, n, endpoint=False)
height = abs(np.sin(t))

# 求面积
sq = width*height

# 求和
sum(sq)

3.999999868405276

作业

# 例子:巧用蒙特卡洛算法求pi值
# 让计算机每次随机生成两个0到1之问的数,看这两个实数是否在单位圆内

# 生成一系列随机点,统计单位圆内的点数与总点数,(圆面积和正方形面积之比为Pi:4,
# Pi为圆周率),当随机点取得越多(但即使取10的9次方个随机点时,其结果也仅
# 在前4位与圆周率吻合)时,其结果越接近于圆周率

# 落在圆的点个数:总个数=1/4*Pi*r**2:1 = 1/4*Pi:1
import numpy as np
n=100000 # 总个数 
# 生成一系列随机点
np.random.seed(123)
p = np.random.rand(n,2)
# 计算点到圆心的距离
d = np.zeros(n)
for i in range(n):
    d[i] = np.sqrt((p[i][0]**2+p[i][1]**2))
# 统计单位圆内的点数
in_cir = d[d<=1]
len(in_cir)
# 落在圆的点个数:总个数
print('落在圆的点个数:总个数 =',len(in_cir)/n)
# 1/4*Pi:1
print('原计算机1/4*Pi:1 = ',np.pi/4)

落在圆的点个数:总个数 = 0.7839
原计算机1/4*Pi:1 =  0.7853981633974483

小绿叶ya

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫