python数据分析库查询表

请叫我大贤知

于 2024-03-28 16:56:22 发布

阅读量1.2k

点赞数 29

分类专栏：笔记文章标签： python numpy 机器学习数据分析 pandas

本文链接：https://blog.csdn.net/qq_49001613/article/details/137117484

版权

笔记专栏收录该内容

7 篇文章 0 订阅

订阅专栏

数据分析（1)

1、numPy

1.1 生成随机数组

1.1.1 rand

生成0-1之间的 n 个数

# 导入模块
import numpy as np

# 随机生成 5 个0-1的一维数组
print(np.random.rand(5))

# 随机生成 5 行 8 列的0-1的二维数组
print(np.random.rand(5,8))

1.1.2 randn

随机生成正态分布

# 导入模块
import numpy as np
"""
    正态分布代表着概率的分布情况，是统计学中的一个重要概念
"""
# 随机生成满足正态分布的 5 个一维数组
print(np.random.randn(10))

# 随机生成满足正态分布的 2 行 5 列的二位数组
print(np.random.randn(2, 5))

1.1.3 randint

随机生成整数

# 导入模块
import numpy as np

"""
    正态分布代表着概率的分布情况，是统计学中的一个重要概念
"""
# 随机生成 [0,5) 的一个数字
print(np.random.randint(5))

# 随机生成 [2,9) 的一个数字 左闭右开
print(np.random.randint(2, 9))

# 随机生成 [2,9] 的 5 个数字
print(np.random.randint(2, 9, 5))

# 随机生成 [2,9] 的 2 行 5 列的二位数组
print(np.random.randint(2, 9, (2, 5)))

1.1.4 normal

随机生成正态分布的数组

# 导入模块
import numpy as np
print(np.random.normal(10, 10, 10))

1.1.5 shape 修改数组行列

修改行和列

import numpy as np  # 导入模块

f1 = np.array([[1, 2, 3], [4, 5, 6]])
# 改变成3行两列
f1.shape = 3, 2
print(f1)

1.2 生成数组方式

1.2.1 array

简单数组

import numpy as np  # 导入模块

# 创建数组
a1 = np.array([0, 0.5, 10])

# 输出数组
print(a1[2])

用法

import numpy as np

list1 = [1, 2, 3]

# dtype: 设置类型
# ndmin: 设置数据维数·

arr1 = np.array(list1, dtype=float, ndmin=2)

arr2 = np.array(list1, dtype=np.float_, ndmin=2)

print(arr1)
print(arr2)

print("类型为:", arr1.dtype)
print("类型为:", arr2.dtype)

1.2.2 empty

方法使用未初始化

import numpy as np

# 此方法生成的都是随机数
# shape: 指定数组维数
# dtype: 指定数组类型

# 创建二维数数组
n1 = np.empty(shape=[5, 6], dtype=np.int8)

# 创建一维数组
n2 = np.empty(shape=5, dtype=np.int8)

print(n1)
print(n2)

1.2.3 zeros

方法使用以零填充

import numpy as np

# 此方法传创建一个以零填充的数组
# shape: 指定数组维数
# dtype: 设置数组类型
# order: ‘C’代表存储顺序行优先，‘F’代表列优先

# 创建二位数组
z1 = np.zeros(shape=(5, 6), dtype=int, order='C')

# 创建一位数组
z2 = np.zeros(shape=5, dtype=int, order='C')

print(z1)
print(z2)

1.2.4 ones

方法使用以一填充的

import numpy as np

# 生成数组，以1来填充
# shape: 指定数组维数
# dtype: 设置数组类型
# order: ‘C’代表存储顺序行优先，‘F’代表列优先

# 创建二维数组
one1 = np.ones(shape=[5, 6], dtype=np.int_)

# 创建一维数组
one2 = np.ones(shape=[5], dtype=np.int_)

print(one1)
print(one2)

1.2.5 full

方法使用可以自行指定以几填充

import numpy as np

# 一维数组：含有6个数值，指定为9
# 二维数组：4 行 5 列 指定值为6
# shape: 指定数组维数
# fill_value: 设置填充数字
# dtype: 设置数组类型
# order: ‘C’代表存储顺序行优先，‘F’代表列优先

n1 = np.full(shape=6, fill_value=9, dtype=int, order='C')
n2 = np.full(shape=[4, 5], fill_value=6, dtype=int, order='C')

print(n1)
print("数组类型:", n1.dtype)
print(n2)
print("数组类型:", n2.dtype)

1.3 数值范围数组

1.3.1 arange

实现方法数值范围数组

import numpy as np

# 设置一个从0到10 步长为0.1的数组
# start: 步数起始值
# stop: 终止值（但不包含这个值）
# step: 步数
# dtype: 设置数据类型

n1 = np.arange(start=0, stop=10, step=0.1, dtype=float)

print(n1)

1.3.2 linspace

生成等差数列

import numpy as np

# 创建元素都是一的等差数列,含有6个值
# start: 开始
# stop: 结束
# num: 多少个数字
# dtype: 类型
# endpoint: 是否包含最后一个
arr1 = np.linspace(start=0, stop=10, num=5, dtype=int, endpoint=False)

print(arr1)

1.3.3 logspace

生成等比数列

import numpy as np
# 等比位3的数组，含有18个数值
# start: 开始
# stop: 结束
# num: 多少个数字
# base: 底数
# dtype: 类型
# endpoint: 是否包含最后一个
arr1 = np.logspace(start=0, stop=17, num=18, base=3, dtype=np.uint64, endpoint=True)

print(arr1)

1.4 根据数组创建数组

1.4.1 frombuffer

字符串转数组

import numpy as np

# dtype: 设置类型
# offset: 读取的起始位置
# count: 读取的数量

s = b'hello hongyaa'
n1 = np.frombuffer(s, dtype='S1')   # 读取所有的数据
n2 = np.frombuffer(s, dtype='S1', offset=6)  # 设置读取的起始位置为6
n3 = np.frombuffer(s, dtype='S1', count=4, offset=6)    # 设置读取数量为4

print(n1)
print(n2)
print(n3)

1.4.2 fromiter

通过迭代创建数组

import numpy as np
iterable = (x * 2 for x in range(5))
n = np.fromiter(iterable, dtype='int')
print(n)

1.4.3 zeros_like

通过数组创建以0填充的数组

import numpy as np
n = np.zeros_like([[0, 1, 2], [1, 5, 6]], dtype=int)
print(n)

1.4.4 ones_like

通过数组创建以1填充的数组

import numpy as np
n = np.ones_like([[0, 5, 6], [0, 5, 6]], dtype=int)
print(n)

1.4.5 full_like

根据一个数组创建一个指定值创建的数组

import numpy as np
# fill_value: 指定的值
n = np.full_like([[0, 5, 9], [5, 7, 9]], fill_value=9, dtype=int)
print(n)

2、matplotlib.pyplot

绘制坐标

import numpy as np  # 导入数据库
import matplotlib.pyplot as plt  # 导入坐标库

# 通过arange生成数组
x = np.arange(1, 10, 0.01)

# 斜率为1
y = x

# 绘制坐标图
plt.plot(x,y)

# 显示坐标图
plt.show()

数据分析（2）

1、Pandas

1.1 安装环境

pip install Pandas

1.2 读取excel

import pandas as pd  # 导入Pandas

# 解决数据输出列名不齐的问题
pd.set_option("display.unicode.east_asian_width", True)

# 读取excel文件
df = pd.read_excel('data.xlsx')

# 输出读取的excel内容
print(df.head())

会遇到的一些问题

import pandas as pd  # 导入Pandas

# 解决列名不齐
pd.set_option('display.unicode.east_asian_width',True)

# 解决行列显示不全
pd.set_option('display.max_rows',1000)
pd.set_option('display.max_columns',1000)

1.3 Series对象

1.3.1 添加一列数据

import pandas as pd # 导入模块

# 生成单列表
s1 = pd.Series([88, 60, 75])

# 输出
print(s1)

1.3.2 手动设置Series索引

import pandas as pd # 导入模块

# 手动设置一维列表索引
s1 = pd.Series([88, 60, 78], index=[1, 2, 3])


# 手动设置一维列表索引
s2 = pd.Series([88, 60, 78], index=["小明", "大名", "Tom"])

# 输出
print(s1)
print(s2)

1.3.3 Series 的索引

通过位置索引获取值

import pandas as pd # 导入模块

# 手动设置一维列表索引
s1 = pd.Series([88, 60, 78])
# 通过位置获取值
print(s1[0])

注：Serices对象不能通过[-1]定位索引

通过标签索引

import pandas as pd # 导入模块

# 手动设置一维列表索引
s2 = pd.Series([88, 60, 78], index=["小明", "大名", "Tom"])

# 通过标签输出
print(s2['小明'])
print(s2['大名'])

通过标签切片索引

import pandas as pd # 导入模块

# 手动设置一维列表索引
s2 = pd.Series([88, 60, 78], index=["小明", "大名", "Tom"])

# 通过标签输出    区间为 [ ]
print(s2['小明':'大名'])

通过位置切片获取数据

import pandas as pd # 导入模块

# 手动设置一维列表索引
s1 = pd.Series([88, 60, 78])

# 输出    区间[ )
print(s1[1:3])

1.3.4 获取Series的索引和值

import pandas as pd  # 导入模块

# 创建数组
s1 = pd.Series([88, 60, 70])

# 获取index
print(s1.index)

# 获取值
print(s1.values)

1.4 DataFrame 对象

创建

import pandas as pd  # 导入模块

# 解决数据输出列名不对齐的问题
pd.set_option('display.unicode.east_asian_width', True)

data = [[100, 105, 99],
        [105, 88, 115],
        [100, 50, 61]]

index = [0, 1, 3]    # 相当于横排坐标

columns = ["语文", "数学", "英语"]    # 相当于字段

# 创建DataFrame数据
df = pd.DataFrame(data=data, index=index, columns=columns)

print(df)

# 遍历Data
for col in df.columns:
    series = df[col]
    print(series)

通过字典创建

import pandas as pd  # 导入模块

# 解决数据输出列名不对齐的问题
pd.set_option('display.unicode.east_asian_width', True)

df = pd.DataFrame({
    "语文": [110, 105, 99],
    "数学": [100, 50, 10],
    "英语": [50, 10, 30],
    "班级": "高一七班"
}, index={0, 1, 2})
print(df)

1.5 常规导入

1.5.1 excel导入

导入指定Sheet页

import pandas as pd

# 解决列名不齐问题
pd.set_option('display.unicode.east_asian_width',True)

df = pd.read_excel('1月.xlsx',sheet_name='莫寒')   # 导入莫寒页

print(df.head())

指定列索引导入Excel

import pandas as pd

# 解决列名不齐问题
pd.set_option('display.unicode.east_asian_width',True)

df = pd.read_excel('1月.xlsx', header=1)  # 设置第一行为列索引

print(df.head())