1、新建dataframe
import pandas as pd
a = pd.DataFrame([[1,2,3],
[4,5,6],
[7,8,9]])
print(a)
a = pd.DataFrame([[1,2,3],
[4,5,6],
[7,8,9]],columns = ["feature_1", "feature_2", "label"])
print(a)
#-----展示-----
0 1 2
0 1 2 3
1 4 5 6
2 7 8 9
feature_1 feature_2 label
0 1 2 3
1 4 5 6
2 7 8 9
2、读取csv文件
import pandas as pd
# 读取csv文件 以','分开
f = open('hour.csv')
df = pd.read_csv(f, sep=",")
print(df)
3、查看dataframe字段信息
a.info()
#------展示-----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
feature_1 3 non-null int64
feature_2 3 non-null int64
label 3 non-null int64
dtypes: int64(3)
memory usage: 152.0 bytes
4、查看dataframe统计信息
a.describe()
#------展示-----
a b c
count 3.0 3.0 3.0
mean 4.0 5.0 6.0
std 3.0 3.0 3.0
min 1.0 2.0 3.0
25% 2.5 3.5 4.5
50% 4.0 5.0 6.0
75% 5.5 6.5 7.5
max 7.0 8.0 9.0
5、删除dataframe列
del a['feature_1']
a.drop(columns=['feature_1'])
#------展示-----
feature_2 label
0 2 3
1 5 6
2 8 9
6、修改dataframe列名
a.columns = ['a','b','c']
print(a)
#------展示-----
a b c
0 1 2 3
1 4 5 6
2 7 8 9
7、 获取dataframe列名
for i in a.columns:
print(i)
#------展示-----
feature_1
feature_2
label
8、获取dataframe的Series
# 获取某一行
print(a)
print(a.iloc[0,:])
# 获取某一列
print(a.iloc[:,1])
print(a['a'])
#------展示-----
a b c
0 1 2 3
1 4 5 6
2 7 8 9
a 1
b 2
c 3
Name: 0, dtype: int64
0 2
1 5
2 8
Name: b, dtype: int64
0 1
1 4
2 7
Name: a, dtype: int64
9、合并dataframe
# 横向合并
print(pd.concat([a,a],axis=1))
# 纵向合并
print(pd.concat([a,a],axis=0))
#------展示-----
a b c a b c
0 1 2 3 1 2 3
1 4 5 6 4 5 6
2 7 8 9 7 8 9
a b c
0 1 2 3
1 4 5 6
2 7 8 9
0 1 2 3
1 4 5 6
2 7 8 9
10、数据去重
df = pd.DataFrame([[1,2,3],[2,3,4],[1,2,3]])
df.drop_duplicates(inplace=True)
print(df)
#------展示-----
0 1 2
0 1 2 3
1 2 3 4
11、替换DF中的字符串
df2 = pd.DataFrame([[1,2,3],[2,3,'%'],[1,2,3]])
print(df2)
df2.replace('%',4,inplace = True, regex = True)
print(df2)
#------展示-----
0 1 2
0 1 2 3
1 2 3 %
2 1 2 3
0 1 2
0 1 2 3
1 2 3 4
2 1 2 3