产生随机多维数组(小数):a = np.random.random((3,3))
产生随机多维数组(整数):a = np.random.randint(0,10,size=[3,3])
求某列的最大值:max_value = a[:,1].max()
或者max_value = np.max(a[:,1])
求某列最大值的索引:max_index = a[:,1].argmax()
或者max_index = np.argmax(a[:,1])
判断两个数组是否完全相同:https://blog.csdn.net/tintinetmilou/article/details/78555486
numpy数组A按某列逆序排序:
ind_sort = A[:, 3].argsort()[::-1] # 获取按某列逆序排序的索引
A_sort = A[ind_sort]
#读取文件
import numpy as np
world_alcohol = np.genfromtxt("world_alcohol.csv", dtype="U75", skip_header=1, delimiter=",")
#返回的world_alcohol是一个list of lists
country_is_algeria = world_alcohol[:, 2] == "Algeria" #把world_alcohol的第3列与"Algeria"比较,返回一个布尔列表
country_algeria = world_alcohol[country_is_algeria, :] #返回world_alcohol中country_is_algeria为真的那一行
is_algeria_and_1986 = (world_alcohol[:, 0]=="1986") & (world_alcohol[:, 2]=="Algeria")
rows_with_algeria_and_1986 = world_alcohol[is_algeria_and_1986, :] #返回true的那些行构成矩阵
#把第一列中的字符串“1986”换成“2014”,两种方法:
#方法一:
world_alcohol[:,0][world_alcohol[:,0] == '1986'] = '2014'
#方法二:
first_column_1986 = world_alcohol[:, 0] == "1986"
world_alcohol[first_column_1986, 0] = "2014"
#将缺失值置0(字符串)
is_value_empty = world_alcohol[:, 4] == ""
world_alcohol[is_value_empty, 4] = "0"
#将U75类型转化为浮点数类型
alcohol_consumption = world_alcohol[:, 4]
alcohol_consumption = alcohol_consumption.astype(float)
total_alcohol = alcohol_consumption.sum() #求和
#把country和alcohol consumption对应组成字典
totals = {}
is_year = world_alcohol[:,0] == "1989"
year = world_alcohol[is_year,:]
for country in countries:
is_country = year[:,2] == country
country_consumption = year[is_country,:]
alcohol_column = country_consumption[:,4]
is_empty = alcohol_column == ''
alcohol_column[is_empty] = "0"
alcohol_column = alcohol_column.astype(float)
totals[country] = alcohol_column.sum()
#遍历字典找值最大的键值对
highest_value = 0
highest_key = None
for country in totals:
consumption = totals[country]
if highest_value < consumption:
highest_value = consumption
highest_key = country
nan
and na
values are types of missing data:
nan
:stands for “not a number”, is a data type used to represent missing values.
na
:stands for “not available”, when the value doesn’t exist.
nan和inf值判断
np.isnan(dataset)
np.isinf(dataset)
np.isfinite(dataset)
np.argwhere(np.isnan(dataset)
np.argwhere(np.isinf(dataset)
np.any(np.isnan(dataset))
np.any(np.isinf(dataset))
np.all(np.isfinite(dataset))