001查看前10行数据
from pandas import read_csv
# 03.采用pandas 导入数据
filename = 'Pima_Indians.csv'
names = ['preg','plas','pres','skin','test','mess','pedi','age','class']
data = read_csv(filename,names=names)
print(data.shape)
#001查看前10行数据
peek = data.head(10)
print(peek)
002.sahpe 属性查看数据维度:多少行多少列
from pandas import read_csv
# 002.sahpe 属性查看数据维度:多少行多少列
filename = 'Pima_Indians.csv'
names = ['preg','plas','pres','skin','test','mess','pedi','age','class']
data = read_csv(filename,names=names)
print(data.shape)
003.type 属性查看数据类型:int,float
from pandas import read_csv
# 003.type 属性查看数据类型:int,float
filename = 'Pima_Indians.csv'
names = ['preg','plas','pres','skin','test','mess','pedi','age','class']
data = read_csv(filename,names=names)
print(data.dtypes)
004.describe() 方法查看统计内容的描述: max,min,etc.
from pandas import read_csv
# 004.describe() 方法查看统计内容的描述: max,min,etc
filename = 'Pima_Indians.csv'
names = ['preg','plas','pres','skin',
'test','mess','pedi','age','class']
data = read_csv(filename,names=names)
print(data.describe())
005.查看数据分类分布状况 groupby().class()
from pandas import read_csv
# 005.查看数据分类分布状况 groupby().class()
filename = 'Pima_Indians.csv'
names = ['preg','plas','pres','skin','test','mess','pedi','age','class']
data = read_csv(filename,names=names)
print(data.groupby('class').size())
006.corr()方法——计算属性相互影响的矩阵
from pandas import read_csv
from pandas import set_option
# 006.corr()方法——计算属性相互影响的矩阵
# 显示数据相关性
filename = 'Pima_Indians.csv'
names = ['preg','plas','pres','skin','test','mess','pedi','age','class']
data = read_csv(filename,names=names)
set_option("display.width",100)
#设置数据的精度
set_option("precision",2)
print(data.corr(method='pearson'))
007.计算数据的正态分布偏离程度
from pandas import read_csv
# 007.计算数据的正态分布偏离程度
filename = 'Pima_Indians.csv'
names = ['preg','plas','pres','skin','test','mess','pedi','age','class']
data = read_csv(filename,names=names)
print(data.skew())