Pandas需要先读取表格类型的数据,然后进行分析
示例:
import pandas as pd
import pymysql
# 读取csv文件
df = pd.read_csv("./ml-latest-small/ratings.csv",
encoding="utf-8",
sep=",", # 分隔符,.csv文件默认为","
)
# print(df)
df1 = df.head(10) # 读取前10行
print(df1)
df5 = df.tail(10) # 读取后10行
print(df5)
df_shape = df.shape # 返回DataFrame形状
print(df_shape)
df_col = df.columns # 返回DataFrame的columns
print(df_col)
df_index = df.index # 返回DataFrame的index
print(df_index)
df_dtype = df.dtypes # 返回每列的数据类型组成的Series
print(df_dtype, type(df_dtype))
# 读取txt文件
df2 = pd.read_csv("./ml-latest-small/demo.txt",
sep="\t", # 用制表符作为分隔符
header=None, # 没有头
names=["pdata", "pv", "uv"]) # 指定列名
print(df2)
# 读取xlsx文件
df3 = pd.read_excel("./ml-latest-small/demo2.xlsx")
print(df3)
# 读取mysql
conn = pymysql.connect(host="qianshaoqing.mysql.rds.aliyuncs.com",
user="omron",
password="omron@2021",
database="officesystem")
df4 = pd.read_sql("select * from random_amount", # 查询语句
con=conn) # 传入连接
print(df4)