Numpy学习笔记
导入包
import numpy as np
array、arange、dtype
a1 = np.array([1,2,3])
a1 = np.array(range(10))
type(a1)
t4 = np.array(range(1,4),dtype=float)
round random 随机生成小数,保存后两位
#numpy的小数
from random import random
a = np.array([random() for i in range(10)])
t8 = np.round(a,2)
numpy读取本地数据和索引
数据为kaggle网站数据
us_file_path = "data/US_vedio_data_numbers.csv"
gb_file_path = "data/GB_vedio_data_numbers.csv"
t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int")
下图为函数所对应的参数
取行
print(t1[2])
print(t1[2:])
取不连续的多行
print(t1[[2,8,10]])
取列
print(t1[:,0])
#连续的多列
print(t1[:,2:])
#不连续的多列
print(t1[:,[0,2]])
#取多行多列
a = t1[2, 3]
b = t1[2:5,1:4]
取不相邻的点(0,0)(2,1)
c = t1[[0,2],[0,1]]
print(b)
print(type(a))
print(c)
shape() reshape() 得到维度
a1 = np.array(range(12))
a1.shape
a1.reshape((3,4))
a2 = np.arange(24).reshape(2,3,4)
展开函数 flatten()
a3 = a2.flatten()
布尔索引
t8<10
t8[t8<10]
三元运算符
np.where(t8<8,0,10)
clip()裁剪:比2小的都为2,比5大的都为5
t9.clip(2,5)
竖直拼接
np.vstack(x,y)
水平拼接
np.hstack(x,y)
行交换、列交换
t[[1,2],:] = t[[2,1],:]
t[:,[0,2]] = t[:,[2,0]]
nan
a = np.arange(12).reshape(3,4).astype("float")
a[1,2:] = np.nan
将nan的部分替换
for i in range(a.shape[1]):
temp_col = a[:,i]
nan_num = np.nonzero(temp_col != temp_col)
if nan_num != 0:
temp_col[np.isnan(temp_col)] = temp_col[temp_col == temp_col].mean()
print(temp_col)
一个小例子
import matplotlib.pyplot as plt
import numpy as np
us_file_path = "data/US_vedio_data_numbers.csv"
gb_file_path = "data/GB_vedio_data_numbers.csv"
t_us = np.loadtxt(us_file_path,delimiter=",",dtype="int")
t_us = t_us[t_us[:,1] <= 500000]
#取评论的数据,最后一列
t_us_comments = t_us[:,-1]
t_us_like = t_us[:,1]
plt.figure(figsize=(20,8),dpi = 80)
plt.scatter(t_us_like,t_us_comments)
plt.show()