1. 引言
2. 安装
3. numpy基本属性
import numpy as np
array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(array)
print("矩阵的维度", array.ndim)
print("矩阵的形状", array.shape)
print("矩阵的大小", array.size)
print("矩阵的类型", array.dtype)
4. numpy创建array
import numpy as np
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int8)
b = np.zeros((3, 3), dtype=np.int8)
c = np.ones((3, 3), dtype=np.int8)
d = np.empty((3, 3), dtype=np.int8) #生成空矩阵
e = np.arange(12).reshape((3, 4)) #重新定义形状
f = np.linspace(0, 10, 20) #生成n点线段
g = np.random.random((2 ,4)) #生成随机矩阵
h = g[np.newaxis, ...] #创建新的维度
print(g)
5. numpy基础运算
import numpy as np
#每个元素
a = np.array([10, 20, 30, 40])
b = np.arange(4)
c = a - b #矩阵减法
d = a - b #矩阵加e法
e = a * b #矩阵元素乘法(逐个相乘)
f = a **2 #矩阵元素的幂(逐个相乘)
g = 10*np.sin(a) #矩阵中每个值的函数值
h = a == 30 #矩阵每个元素的大小关系,返回布尔值
#整体矩阵
i = np.arange(4).reshape((2, 2))
j = np.arange(4).reshape((2, 2))
k = np.dot(i, j) #矩阵的乘法 或者 k = a.dot(b)
l = np.sum(i, axis=0) #axis=1求行和,axis=0求列和
m = np.min(i)
n = np.max(i)
print(l)
import numpy as np
#每个元素
A = np.arange(2, 14).reshape((3, 4))
a = np.argmin(A) #最小值的索引
b = np.mean(A, axis=0) #求平均值
c = np.median(A) #求中位数
d = np.cumsum(A) #n个数的和的值
e = np.diff(A) #累差
f = np.sort(A) #逐行排序
g = np.transpose(A) #矩阵的转置
h = np.clip(A, 5, 9) #截取数
print(h)
6. numpy索引
import numpy as np
A = np.arange(3, 15).reshape((3, 4))
print(A[2]) #输出某行
print(A[1][1]) #索引某个值 或者 A[1, 1]
print(A[1, :]) #用 : 代替所有的数,或者从某个数到某个数
for row in A: #迭代每一行
print(row)
for colum in A.T:
print(colum)
for item in A.flat: #
print(item)
7. numpy array合并
import numpy as np
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])
a = np.vstack((A, B)) #上下合并
b = np.hstack((A, B)) #左右合并
c = A[np.newaxis, :] #行上增加一个维度
d = np.concatenate((c, c, c), axis=1) #对各个矩阵的合并
print(d)
8. numpy array分割
import numpy as np
A = np.arange(12).reshape((3, 4))
a = np.split(A, 3, axis=0) #对矩阵进行分块,只能均等分块
b = np.array_split(A, 3, axis=1) #对矩阵进行分块,可以不均等分块
c = np.vsplit(A, 3)
d = np.hsplit(A, 2)
print(d)
9. numpy array 浅复制和深复制
import numpy as np
a = np.arange(4)
b = a #浅复制
c = a.copy() #深复制
a[0] = 2
print(b is a)
print(c is a)
10. pandas基本介绍
import pandas as pd
import numpy as np
s = pd.Series([1, 2, 3, 6, np.nan, 44, 1])
datas = pd.date_range('20200830', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=datas, columns=['a', 'b', 'c','d'])
df1 = pd.DataFrame(np.random.randn(6, 4)) #默认索引
df2 = pd.DataFrame({'A':datas, 'B':4}) #字典类型定义
print(df2.dtypes) #查看每一列类型
print(df.index) #每一行名字
print(df.columns) #每一列名字
print(df.values) #所有值
print(df2.describe()) #对每一列描述
print(df2.sort_index(axis=1, ascending=False)) #排序
11. panda选择数据
import pandas as pd
import numpy as np
s = pd.Series([1, 2, 3, 6, np.nan, 44, 1])
datas = pd.date_range('20200830', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=datas, columns=['A', 'B', 'C','D'])
# print(df['A'], df.A) #列索引
# print(df[0:3]) #选择行
# #通过标签索引
# print(df.loc['2020-08-30'])
# print(df.loc[:, ['A']])
#通过位置索引
print(df.iloc[3, 1])
12. pandas设置值
import pandas as pd
import numpy as np
s = pd.Series([1, 2, 3, 6, np.nan, 44, 1], index=[6, 5, 4, 3 ,2 ,1, 0])
datas = pd.date_range('20200830', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=datas, columns=['A', 'B', 'C','D'])
df.iloc[2, 2] = 111
df.loc['2020-08-30', 'A'] = 222
df[df.A < 0] = 0
df['F'] = np.nan
print(s)
13. pandas处理丢失数据
可以读取的格式:
import pandas as pd
import numpy as np
data = pd.read_csv(r'dating.csv')
print(data)
data.to_excel('dating_new.xls')
注:dating.csv文件
milage, Liters, Consumtime, target
49920,8.326976,0.953952,3
14488,7.153469,1.673904,2
26052,1.441871,0.895124,1
75136,1.3147394,0.428964,1
38344,1.669788,0.134296,1
14. pandas合并concatenating
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.ones((3, 4))*0, columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4))*1, columns=['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3, 4))*2, columns=['a', 'b', 'c', 'd'])
#concatentating
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
#print(res)
#join, ['inner', 'outer'] inner为交(合并相同的),outer为并(都合并)
df3 = pd.DataFrame(np.ones((3, 4))*0, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4))*1, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = pd.concat([df3, df4], join='inner', ignore_index=True)
print(res)
#append
res = df1.append([df2, df2], ignore_index=True)
print(res)
15. pandas合并merge
import pandas as pd
import numpy as np
# 一个key合并
left = pd.DataFrame({'key':['K0','K1','K2','K3'], 'A':['A0','A1','A2','A3'], 'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'], 'C':['C0','C1','C2','C3'], 'D':['D0','D1','D2','D3']})
# print(left)
# print(right)
res = pd.merge(left, right, on='key')
# print(res)
# 两个key合并
left = pd.DataFrame({'key1':['K0','K0','K1','K2'], 'key2':['K0','K1','K2','K3'], 'A':['A0','A1','A2','A3'], 'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K0','K2','K3'], 'key2':['K0','K1','K2','K3'], 'C':['C0','C1','C2','C3'], 'D':['D0','D1','D2','D3']})
print(left)
print(right)
# how = 'inner', 'outer', 'left', 'right'
res = pd.merge(left, right, on=['key1', 'key2'], how='outer') #只合并key值相同的
print(res)
16. pandas plot图标
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#Series
data = pd.Series(np.random.randn(1000), index=np.arange(1000))
data = data.cumsum()
#DateFrame
data = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list("ABCD"))
data = data.cumsum()
#print(data.head())
#data.plot()
# plt.plot()
'''
# plot方法
'bar', 'hist', 'box', 'kde', 'area', 'scatter', 'hexbin', 'pie'
'''
data_plot = data.plot.scatter(x='A',y='B', color='Orange', label='Class1')
data.plot.scatter(x='C',y='D', color='Blue', label='Class2', ax=data_plot)
plt.show()