numpy_learn
# -*- coding:UTF-8 -*-
# @time: 2022-01-19 14:38
# @file: numpy_learn.py
# @software: PyCharm
# author CJH
import numpy as np
# 创建数组,再转化为矩阵
array=np.array([[1,2,3],
[4,5,6]])
print(array)
print('number of dim:',array.ndim) #二维数组
print('shape:',array.shape) #输出矩阵的行列数
print('array_size:',array.size) #矩阵的大小
a = np.array([[2,23,4],
[3,33,34]],dtype=np.double) #设置浮点数,float
print(a.dtype)
b=np.ones((4,5),dtype=np.double) #生成3,4纬的矩阵
print(b)
a1=np.arange(12).reshape(3,4) #生成1,2....11数列
print(a1)
a2=np.linspace(1,10,20).reshape(4,5) #等差数列生成20,1~10
print(a2)
b_a2=b-a2 #逐个元素相减
print(b_a2)
b_a22=b_a2**2
print(b_a22)
c=10*np.sin(b_a22) #三角函数,大小判断输出
print(c<5)
a_mut=np.array([[1,1],
[2,2]],dtype=np.double)
b_mut=np.arange(4).reshape((2,2))
# 运行矩阵乘法
cmul = a_mut*b_mut #逐个相乘
cdot = np.dot(a_mut,b_mut) #矩阵乘法 c_dot = a_mut.dot(b_mut)
print(cmul)
print(cdot)
a_random = np.random.random((2,4))*10 #随机生成2,4的矩阵
print(a_random)
print(np.sum(a_random,axis=0))
print(np.max(a_random,axis=0))
print(np.min(a_random,axis=1)) #axis=1,求行的。。得出列向量,axis=0求列的。。。得出行向量
# 寻找最大值的索引
# array=np.array([[1,2,3],
# [4,5,6]])
print(np.argmax(array,axis=1))
print(np.max(array,axis=1)) #加上arg求索引,不加arg求数值
#python中索引使用[]中括号
print(np.mean(array,axis=1)) #mean求平均数
print(array.mean(axis=1))
print(np.median(array,axis=1)) # 求中位数
print(np.cumsum(array,axis=0)) #前缀和 ,0列求和,1行求和
print(np.diff(array,axis=1)) #前项差
print(array.T) #矩阵转置 np.transpose(array)
print(array.dot(array.T)) #矩阵乘法 未重新赋值,不影响原本矩阵的数值
print(np.clip(array,2,7)) #截数字,小于2的变为2,大于7的变为7
array1=np.arange(3,15).reshape(3,4)
print(array1)
print(array1[1][2]) #[1][2]=[1,2]
print(array1[1][:]) #:打印全部行or全部列 [1,:]
for i in array1:
print(i) #迭代输出每一个,先输出行,如要先输出列,使用array1.T,将其转置再迭代
#python先遍历行
A = np.array([[1,1,1]])
B = np.array([[2,2,2]]) #列不相等,无法进行列合并
print(np.hstack((A,B))) #行合并
print(np.vstack((A,B))) #列合并
print(A[np.newaxis,:].T) #在行上加了一个维度,方便转置为列向量,再对行向量进行转置,
print(B.shape[0]) #如果在一开始就加入2个[]就不需要在后期加维度
print(B.shape[1])
print(A.T)
C = np.concatenate((A,B,B,A),axis=0) #任意指定在行还是列合并
print(C)
# 分割矩阵,二维数组
A_1=np.arange(12).reshape((3,4))
print(A_1)
print(np.split(A_1,2,axis=1))
#C_1=A_1[:,0][np.newaxis,:].T*1 #在赋值后加上*1,使得python重新创建一个变量
C_1 = A_1.copy()[:,0]
print(C_1) #在矩阵后加上[np.newaxis,:]可以扩充向量为二维向量
A_1[0,0]=1
print(C_1)
# pandas_learn
# -*- coding:UTF-8 -*-
# @time: 2022-01-21 11:17
# @file: pd_learn.py
# @software: PyCharm
# author CJH
import pandas as pd
import numpy as np
s = pd.Series([1,3,6,np.nan,44,10])
print(s)
dates = pd.date_range('20160101',periods=6)
print(dates)
df = pd.DataFrame(np.arange(24).reshape(6,4),index=dates,columns=['a','b','c','d'])
print(df)
df1=pd.DataFrame(np.arange(12).reshape(3,4))
print(df1)
#DataFrame转化为二维矩阵,并且可以加标签
print(df.describe())
print(df.sort_index(axis=0,ascending=False))
print(df['a'],df.a) #选取某一列
print(df[0:3],df['20160101':'20160104']) #选择行
print(df.loc[:,['a','b']])
df.iloc[2,2]=1111 #根据编号定位
print(df)
df.a[df.a>4]=0
print(df)
print(df.dropna(axis=0,how='any')) #丢掉包括nan的行,how=all全部是nan才丢弃
df.iloc[0,0]=np.nan
print(df)
#print(df.fillna(value=100)) #将nan替换
print(df.isnull()) #判断是否缺失数据
print((np.any(df.isnull())==True)) #判断是否丢失数据
## important pandas导入导出
data_1 = pd.read_csv('student.csv')
print(data_1)
print(data_1.iloc[0,0])
data_1.to_pickle('student.pickle') #保存成pickle文件
## pandas 合并列表
# concatenating
data1=pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
data2=pd.DataFrame(np.ones((3,4))*1,columns=['f','b','c','d'])
data3=pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
#print(data1,'\n',data2)
res = pd.concat([data1,data2,data3],axis=0,ignore_index=True) #axis=0竖向的合并,忽略列项标签
print(res)
res2 = pd.concat([data1,data2],join='inner',ignore_index=True) #列方向合并,inner合并都有,outer全部保留
print(res2)
res1 = pd.concat([data1,data2],axis=1) #横向合并
print(res1)
res3=data1.append(data3,ignore_index=True)
print(res3)
s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
res4=res3.append(s1,ignore_index=True) #合并
print(res4)
# merge 索引,key对照的合并
left = pd.DataFrame({'Key':['K0','K1','K2','K3'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'Key':['K0','K1','K2','K4'],
'A':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']})
## 利用merge合并left和right
res5 = pd.merge(left,right,on=['Key'],how='inner') #利用Key标签合并on = ['key1','key2']可以有多个key
# 默认使用inner合并,只合并相同的部分
print(res5)
res6 = pd.merge(left,right,on='Key',suffixes=['_left','_right'],how='outer')
print(res6)
## pandas 画图
import matplotlib.pyplot as plt
# plot data
#Series
# data =pd.Series(np.random.randn(1000),index=np.arange(1000))
# data = data.cumsum()
#DataFrame
data = pd.DataFrame(np.random.randn(1000,4),index=np.arange(1000),columns=list("ABCD"))
data =data.cumsum()
print(data.head(3))
# 打印scatter
# 'bar','hist','box','kde',
data.plot()
plt.show()
# 绘制散点图
ax = data.plot.scatter(x='A',y='B',color='DarkBlue',label='Class 1')
data.plot.scatter(x='A',y='C',color='DarkGreen',label='Class 2',ax=ax) # 将多个线条绘制到同一个图上
data.plot.scatter(x='A',y='D',color='Black',label='Class 3',ax=ax)
plt.show()