numpy总结40个

# coding=utf-8

'''
numpy总结
'''

import numpy as np

# 1、导入numpy作为np,并查看版本
print(np.__version__)

# 2、创建从0到9的一维数字数组
arr = np.arange(10)
print(arr)

# 3、创建一个numpy数组元素值全为True(真)的数组
arr = np.full((3,3), True, dtype=bool)
print(arr)
arr = np.ones((3,3), dtype=bool)
print(arr)
# 全为False
arr = np.zeros((3,3), dtype=bool)
print(arr)

# 4、从 arr 中提取所有的奇数
arr = np.array([0,1,2,3,4,5,6,7,8,9])
arr = arr[arr % 2 == 1]
print(arr)

# 5、将arr中的所有奇数替换为-1
arr = np.arange(10)
arr[arr % 2 == 1] = -1
print(arr)

# 6、将arr中的所有奇数替换为-1,而不改变arr
arr = np.arange(10)
second_arr = np.where(arr % 2 == 1, -1, arr)
print(second_arr)

# 7、将一维数组转换为2行的2维数组
arr = np.arange(10)
arr = arr.reshape(2,-1)
print(arr)

# 8、垂直堆叠数组a和数组b
a = np.arange(10).reshape(2, -1)
b = np.repeat(1, 10).reshape(2, -1)
# axis = 0代表垂直方向 1代表水平方向
vertical_stack = np.concatenate([a, b], axis=0)
print(vertical_stack)
vertical_stack = np.vstack([a,b])
print(vertical_stack)
vertical_stack = np.r_[a,b]
print(vertical_stack)

# 9、将数组a和数组b水平堆叠
a = np.arange(10).reshape(2, -1)
b = np.repeat(1, 10).reshape(2, -1)
Horizontal_stacking = np.concatenate([a,b], axis=1)
print(Horizontal_stacking)
Horizontal_stacking = np.hstack([a,b])
print(Horizontal_stacking)
Horizontal_stacking = np.c_[a,b]
print(Horizontal_stacking)

# 10、在无硬编码的情况下生成numpy中的自定义序列
a = np.array([1, 2, 3])
# 重复a的每个元素三次
farr = np.repeat(a, 3)
print(farr)
# 将a重复三次
sarr = np.tile(a, 3)
print(sarr)
final_arr = np.concatenate([farr, sarr], axis=0)
print(final_arr)
final_arr = np.hstack([farr, sarr])
print(final_arr)
final_arr = np.r_[farr, sarr]
print(final_arr)

# 11、获取数组a和数组b之间的公共项
a = np.array([1,2,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
c = np.intersect1d(a, b)
print(c)
print(c.dtype)
c = set()
for i in a:
    for j in b:
        if i==j:
            c.add(i)
print(c)

# 12、从数组a中删除数组b中的所有项
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])
c = np.setdiff1d(a,b)
print(c)

# 13、获取a和b元素匹配的位置
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
c = np.where(a == b)
print(c)

# 14、获取5到10之间的所有项目
a = np.array([2,6,1,9,10,3,27])
index = np.where((a >= 5) & (a <= 10))
print(a[index])
index = np.where(np.logical_and(a >= 5, a <= 10))
print(a[index])
b = a[(a >=5 ) & (a <= 10)]
print(b)

# 15、转换适用于两个标量的函数maxx,以处理两个数组
def maxx(x, y):
    if x>y :
        return x
    else:
        return y
a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])
'''
numpy.vectorize的使用,将函数向量化
numpy.vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False, signature=None)
pyfunc :python函数或方法
otypes : 输出数据类型
'''
pair_max = np.vectorize(maxx, otypes=[float])
c = pair_max(a, b)
print(c)

# 16、在数组arr中交换列1和2
arr = np.arange(9).reshape(3,3)
sarr = arr[:, [1,0,2]]
print(sarr)

# 17、交换数组arr中的第1和第2行
arr = np.arange(9).reshape(3,3)
sarr = arr[[1,0,2]]
print(sarr)
sarr = arr[[1,0,2], :]
print(sarr)

# 18、反转二维数组arr的行
arr = np.arange(9).reshape(3,3)
arr = arr[::-1]
print(arr)

# 19、反转二维数组arr的列
arr = np.arange(9).reshape(3,3)
arr = arr[:,::-1]
print(arr)

# 20、创建一个形状为5x3的二维数组,以包含5到10之间的随机十进制数
#arr = np.arange(9).reshape(3.3)
'''
numpy.random.randint(low, high=None, size=None, dtype='l')
函数的作用是,返回一个随机整型数,范围从低(包括)到高(不包括),即[low, high)
如果没有写参数high的值,则返回[0,low)的值
low: int,生成的数值最低要大于等于low,(hign = None时,生成的数值要在[0, low)区间内)
high: int (可选).如果使用这个值,则生成的数值在[low, high)区间
size: int or tuple of ints(可选).输出随机数的尺寸,比如size = (m * n* k)则输出同规模即m * n* k个随机数。默认是None的,仅仅返回满足要求的单一随机数。
dtype: dtype(可选)
'''
#实现一个五行三列的5~9之间的取值
print(np.random.randint(low=5, high=10, size=(5,3)))
#实现五行三列的0~1之间的取值
print(np.random.random((5,3)))
#5到10之间的随机十进制数
rand_arr = np.random.randint(low=5, high=10, size=(5,3)) + np.random.random((5,3))
print(rand_arr)

rand_arr = np.random.uniform(5, 10, size=(5,3))
print(rand_arr)

# 21、只打印或显示numpy数组rand_arr的小数点后3位
rand_arr = np.random.random([5, 3])
np.set_printoptions(precision=3)
print(rand_arr[:4])

# 22、通过e式科学记数法来打印rand_arr(如1e10)
np.random.seed(100)
rand_arr = np.random.random([3,3]) / 1e3
print(rand_arr)

np.set_printoptions(suppress=False)
np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3
print(rand_arr)
rand_arr = np.set_printoptions(suppress=True, precision=6)
print(rand_arr)

# 23、将numpy数组a中打印的项数限制为最多6个元素
a = np.arange(15)
np.set_printoptions(threshold=6)
print(a)

# 24、打印完整的numpy数组a而不截断
np.set_printoptions(threshold=np.nan)
print(a)

# 25、导入鸢尾属植物数据集,保持文本不变
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
print(iris[:3])

# 26、从前面问题中导入的一维鸢尾属植物数据集中提取文本列的物种
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)
species = np.array([row[4] for row in iris_1d])
print(species[:4])

# 27、通过省略鸢尾属植物数据集种类的文本字段,将一维鸢尾属植物数据集转换为二维数组iris_2d
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)
# 取前四列
#字符串类型
iris_2d = np.array([row.tolist()[:4] for row in iris_1d])
print(iris_2d)
#自定义数据类型
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
print(iris_2d)

# 28、求出鸢尾属植物萼片长度的平均值、中位数和标准差(第1列)
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
print(iris_2d)
mu, med, sd = np.mean(iris_2d), np.median(iris_2d), np.std(iris_2d)
print(mu, med, sd)

# 29、规范化数组,使数组的值正好介于0和1之间
# 创建一种标准化形式的鸢尾属植物间隔长度,其值正好介于0和1之间,这样最小值为0,最大值为1
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
Smax, Smin = sepallength.max(), sepallength.min()
S = (sepallength - Smin)/(Smax - Smin)
# 或者是
S = (sepallength - Smin)/sepallength.ptp()
print(S)

# 30、计算sepallength的softmax分数
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
# 或者
sepallength = np.genfromtxt(url, delimiter=',', dtype='object')
sepallength = np.array([float(row[0]) for row in sepallength])
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x/e_x.sum(axis=0)
print(softmax(sepallength))

# 31、找到鸢尾属植物数据集的第5和第95百分位数,从小到大排序后的第五和第95的数
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
print(np.percentile(sepallength, q=[5, 95]))

# 32、在iris_2d数据集中的20个随机位置插入np.nan值
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')
i, j = np.where(iris_2d)
np.random.seed(100)
# nan即为float类型的空值
iris_2d[np.random.choice((i), 20), np.random.choice((j), 20)] = np.nan
print(iris_2d[10:20])

# 33、在iris_2d的sepallength中查找缺失值的数量和位置(第1列)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
print("缺失值的数量为:", np.isnan(iris_2d[:, 0]).sum())
print("缺失值的位置为:", np.where(np.isnan(iris_2d[:, 0])))

# 34、根据两个或多个条件过滤numpy数组,过滤具有petallength(第3列)> 1.5 和 sepallength(第1列)< 5.0 的iris_2d行
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
condition = (iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)
print(iris_2d[condition])

# 35、选择没有任何nan值的iris_2d行
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
any_nan_in_row = np.array([~np.any(np.isnan(row)) for row in iris_2d])
print(iris_2d[any_nan_in_row][:5])
#或者
iris_2d[np.sum(np.isnan(iris_2d), axis=1) == 0][:5]

# 36、在iris_2d中找出SepalLength(第1列)和PetalLength(第3列)之间的相关性
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
# corrcoef计算皮尔逊积矩相关系数,计算iris_2d第一列与第三列的相关性
corr_arr = np.corrcoef(iris_2d[:, 0], iris_2d[:, 2])
print(corr_arr)
# 通过np.corrcoef(iris[:, 0], iris[:, 2])[0,1]来获取皮尔逊相关系数
print(corr_arr[0,1])
# 或者
from scipy.stats.stats import pearsonr
corr, p_value = pearsonr(iris_2d[:, 0], iris_2d[:, 2])
print(corr)

# 37、找出iris_2d是否有任何缺失值
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
is_nan = np.isnan(iris_2d).any()
print(is_nan)

# 38、在numpy数组中将所有出现的nan替换为0
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
iris_2d[np.isnan(iris_2d)] = 0

# 39、找出鸢尾属植物物种中的独特值和独特值的数量
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
species = np.array([row.tolist()[4] for row in iris])
# 该函数是去除数组中的重复数字,并进行排序之后输出
print(np.unique(species, return_counts=True))

# 40、将iris_2d的花瓣长度(第3列)加入以形成文本数组
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
petal_length_bin = np.digitize(iris[:, 2].astype("float"), [0, 3, 5, 10])
label_map = {1: 'small', 2:'medium', 3:'large', 4:'np.nan'}
petal_length_cat = [label_map[x] for x in petal_length_bin]
print(petal_length_cat[:4])









 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值