#返回数据框维度的元组
df.shape
(569, 32)
#返回列的数据类型
df.dtypes
id int64
diagnosis object
radius_mean float64
texture_mean float64
perimeter_mean float64
area_mean float64
smoothness_mean float64
compactness_mean float64
concavity_mean float64
concave_points_mean float64
symmetry_mean float64
fractal_dimension_mean float64
radius_SE float64
texture_SE float64
perimeter_SE float64
area_SE float64
smoothness_SE float64
compactness_SE float64
concavity_SE float64
concave_points_SE float64
symmetry_SE float64
fractal_dimension_SE float64
radius_max float64
texture_max float64
perimeter_max float64
area_max float64
smoothness_max float64
compactness_max float64
concavity_max float64
concave_points_max float64
symmetry_max float64
fractal_dimension_max float64
dtype: object
#虽然供诊断的数据类型是对象,但进一步的#调查显示,它是字符串
type(df['diagnosis'][0])
str
#显示数据框的简明摘要,#包括每列非空值的数量
df.info()
RangeIndex: 569 entries, 0 to 568
Data columns (total 32 columns):
id 569 non-null int64
diagnosis 569 non-null object
radius_mean 569 non-null float64
texture_mean 548 non-null float64
perimeter_mean 569 non-null float64
area_mean 569 non-null float64
smoothness_mean 521 non-null float64
compactness_mean 569 non-null float64
concavity_mean 569 non-null float64
concave_points_mean 569 non-null float64
symmetry_mean 504 non-null float64
fractal_dimension_mean 569 non-null float64
radius_SE 569 non-null float64
texture_SE 548 non-null float64
perimeter_SE 569 non-null float64
area_SE 569 non-null float64
smoothness_SE 521 non-null float64
compactness_SE 569 non-null float64
concavity_SE 569 non-null float64
concave_points_SE 569 non-null float64
symmetry_SE 504 non-null float64
fractal_dimension_SE 569 non-null float64
radius_max 569 non-null float64
texture_max 548 non-null float64
perimeter_max 569 non-null float64
area_max 569 non-null float64
smoothness_max 521 non-null float64
compactness_max 569 non-null float64
concavity_max 569 non-null float64
concave_points_max 569 non-null float64
symmetry_max 504 non-null float64
fractal_dimension_max 569 non-null float64
dtypes: float64(30), int64(1), object(1)
memory usage: 142.3+ KB
想要获取累加,最大值,最小值。均值等数据时:
#返回每列数据的有效描述性统计
df.describe()
#返回数据框中的前几行#默认返回前五行
df.head()#但是也可以指定你希望返回的行数
df.head(20)
# `.tail()` 返回最后几行,但是也可以指定你希望返回的行数
df.tail(2)