numpy模块
用于数据分析,对numpy数组(既有行又有列)——矩形进行科学运算
import numpy as np
# 创建numpy数组
arr1 = np.array([1,2,3])
arr2 = np.arrat([4,5,6])
print(arr1*arr2)
# [4,10,18]
# 二维数组
arr = np.array([
[1,2,3],
[4,5,6]
])
print(arr)
numpy数组的属性
import numpy as np
arr = np.array([
[1,2,3],
[4,5,6]
])
# T 数组的转置(对高维数组而言) --> 行列互换,转置
print(arr,'\n,arr.T)
# size 数组元素的个数
print(arr.size) # 6
# shape 数组的维度大小(以元组形式)
print(arr.shape) #(2,3)
# astpye 类型转换
arr = arr.astype(np.float64)
print(arr)
切片numpy数组
arr = np.array([
[1,2,3],
[4,5,6]
])
print(:,:) # 行,列
print(0,0) # 1
赋值
arr = np.array([
[1,2,3],
[4,5,6]
])
arr[0.0]=0
print(arr) # [[0 2 3] [4 5 6]]
逻辑取值
arr = np.array([
[1,2,3],
[4,5,6]
])
print(arr[arr>4]) # [5 6]
数组的合并
arr1 = np.array([
[1,2,3],
[4,5,6]
])
arr2 = np.array([
[7,8,9],
[a,b,c]
])
print(np.hstack((arr1,arr2))) # 只能放元组 行行进行拼接
print(np.vstack((arr1,arr2))) # 列与列进行拼接
print(np.concatenate((arr1,arr2),axis=1)) # 默认以列何并 0表示列,1表是行
通过函数创建numpy数组
print(np.ones((2,3)))
print(np.zeros((2,3)))
print(np.linspace(1,100,10))
arr1=np.zeros((1,12))
print(arr1.reshape((3,4))) #重构形状
numpy数组数学和统计方法
arr = np.array([[1, 2, 3], [4, 5, 6], [9, 8, 9]])
print(np.sum(arr)) # 47
numpy.random生成随机数
np.radom.seed(1)
pint(np.random.random((3,4)))
matplotlib模块
画图
from matplotlib import pyplot as plt # 约定俗成
from matplotlib.font_manager import FontProperties # 修改字体
font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc')
plt.style.use('ggplot') # 设置背景
clas = ['3班', '4班', '5班', '6班']
students = [50, 55, 45, 60]
clas_index = range(len(clas))
# [0,1,2,3] [50,55,45,60]
plt.bar(clas_index,students,color='darkblue')
plt.xlabel('学生',fontproperties=font)
plt.ylabel('学生人数',fontproperties=font)
plt.title('班级-学生人数',fontproperties=font,fontsize=20,fontweight=25)
plt.xticks(clas_index,clas,fontproperties=font)
plt.show()
pandas模块
import pandas as pd
np.random.seed(10)
index = pd.date_range('2019-01-01', periods=6, freq='M')
print(index)
columns = ['c1', 'c2', 'c3', 'c4']
print(columns)
val = np.random.randn(6, 4)
print(val)
df = pd.DataFrame(index=index, columns=columns, data=val)
print(df)
# 保存文件,读出成文件
df.to_excel('date_c.xlsx')
# 读出文件
df = pd.read_excel('date_c.xlsx', index_col=[0])
print(df)
# 按照index取值
# print(df['2019-01-31'])
print(df.loc['2019-01-31'])
print(df.loc['2019-01-31':'2019-05-31'])
# 按照values取值
print(df)
print(df.iloc[0, 0])
df.iloc[0, :] = 0
print(df)