按列名选择列:
df = pd.DataFrame({'id': [0, 1],
'age': [20, 26],
'disease': ["T2DM", "HUA"]})
df
# 输出:
# id age disease
# 0 0 20 T2DM
# 1 1 26 HUA
"""按列名选择单列"""
df['age'] # 返回Series
df.loc[:, 'age'] # 返回Series
# 两种方法的输出一样:
# 0 20
# 1 26
# Name: age, dtype: int64
"""按列名选择多列"""
df[['age', 'disease']] # 返回df
df.loc[:, ['age', 'disease']] # 返回df
# 两种方法的输出一样:
# age disease
# 0 20 T2DM
# 1 26 HUA
选择列名含特定字符串的列:
df = pd.DataFrame({'id': [0, 1],
'2020_age': [20, 26],
'2021_age': [21, 27],
'2020_weight': [50, 60],
'2021_weight': [55, 65]})
df
# 输出:
# id 2020_age 2021_age 2020_weight 2021_weight
# 0 0 20 21 50 55
# 1 1 26 27 60 65
"""选择列名含特定字符串的列"""
df.filter(regex='2020')
# 输出:
# 2020_age 2020_weight
# 0 20 50
# 1 26 60
df.filter(regex='weight')
# 输出:
# 2020_weight 2021_weight
# 0 50 55
# 1 60 65
选择特定数据类型的列:
df = pd.DataFrame({'id': [0, 1],
'age': [20, 26],
'disease': ["T2DM", "HUA"],
'UA': [360.1, 420.8],
'smoke': [False, True]})
df
# 输出:
# id age disease UA smoke
# 0 0 20 T2DM 360.1 False
# 1 1 26 HUA 420.8 True
"""选择数据类型是int的列"""
df.select_dtypes(include=['int'])
# 输出:
# id age
# 0 0 20
# 1 1 26
"""选择数据类型是float的列"""
df.select_dtypes(include=['float'])
# 输出:
# UA
# 0 360.1
# 1 420.8
"""选择数据类型是数值(int和float)的列"""
df.select_dtypes(include=['int', 'float'])
# 输出:
# id age UA
# 0 0 20 360.1
# 1 1 26 420.8
"""选择数据类型是str的列"""
df.select_dtypes(include=['object'])
# 输出:
# disease
# 0 T2DM
# 1 HUA
"""选择数据类型是bool的列"""
df.select_dtypes(include=['bool'])
# 输出:
# smoke
# 0 False
# 1 True