pandas复习

import numpy as np
import pandas as pd
from pandas import Series ,DataFrame
s1=pd.Series([1,2,3,4],index=['a','b','c','d'])
s1
a    1
b    2
c    3
d    4
dtype: int64
s1.values
array([1, 2, 3, 4], dtype=int64)
s1.index
Index(['a', 'b', 'c', 'd'], dtype='object')
s2=pd.Series(np.arange(10))
s2
0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int32
s3=pd.Series({'a':1,'b':3,'c':2,'d':4})
s3
a    1
b    3
c    2
d    4
dtype: int64
s3['a']
1
s3[s3>2]#value 值大于2
b    3
d    4
dtype: int64
s1.to_dict()# Series 转字典
{'a': 1, 'b': 2, 'c': 3, 'd': 4}
pd.isnull(s3)
a    False
b    False
c    False
d    False
dtype: bool
pd.notnull(s3)
a    True
b    True
c    True
d    True
dtype: bool
s1=s1.reindex(index=['a',"b","c","d","e","f"],fill_value=0)#添加index,需要把以前的值写上 file_value 表示新增加的值是多少
s1['g']=1# 这也是增加新值
s1
a    1
b    2
c    3
d    4
e    0
f    0
g    1
dtype: int64
s1=s1.reindex(index=['a',"b","c","d","e","f","k"],method="ffill")#ffill表示和最近的一个值一样,这里和"g"一样,但是g被删了
s1
a    1
b    2
c    3
d    4
e    0
f    0
k    1
dtype: int64
s1.drop('f')#删除f
a    1
b    2
c    3
d    4
e    0
k    1
dtype: int64
s4=Series([1,2,np.nan,3,4],index=['A','B','C','D','E'])
s4
A    1.0
B    2.0
C    NaN
D    3.0
E    4.0
dtype: float64
s4.isnull()
A    False
B    False
C     True
D    False
E    False
dtype: bool
s4.notnull()
A     True
B     True
C    False
D     True
E     True
dtype: bool
s4.dropna()# 删除nan
A    1.0
B    2.0
D    3.0
E    4.0
dtype: float64
s5=Series(np.random.randn(6),index=[['1','1','1','2','2','2'],['a','b','c','a','b','c']])
s5
1  a    1.422298
   b    1.634116
   c   -0.038048
2  a    0.857658
   b    0.416641
   c   -0.025056
dtype: float64
print(s5['1'])
print(s5['1']['a'])
print(s5[:,'a'])
a    1.422298
b    1.634116
c   -0.038048
dtype: float64
1.4222976799728857
1    1.422298
2    0.857658
dtype: float64
df=s5.unstack()#2级的Series可以转换为DataFrame
df
abc
11.4222981.634116-0.038048
20.8576580.416641-0.025056
df=DataFrame([s5['1'],s5['2']])
df
abc
01.4222981.634116-0.038048
10.8576580.416641-0.025056
s6=df.unstack()
s6
a  0    1.422298
   1    0.857658
b  0    1.634116
   1    0.416641
c  0   -0.038048
   1   -0.025056
dtype: float64
s6=df.T.unstack()
s6
0  a    1.422298
   b    1.634116
   c   -0.038048
1  a    0.857658
   b    0.416641
   c   -0.025056
dtype: float64
s1
a    1
b    2
c    3
d    4
e    0
f    0
k    1
dtype: int64
s1=s1.replace(0,100)# 把0替换为100 [1,2,3]==>[3,2,1] 也可直接传入
s1
a      1
b      2
c      3
d      4
e    100
f    100
k      1
dtype: int64
s2=s1
s1+s2 #对应位置相加
a      2
b      4
c      6
d      8
e    200
f    200
k      2
dtype: int64
s2=s1.sort_values(ascending=False)#False 降序,True 升序
s2
f    100
e    100
d      4
c      3
b      2
k      1
a      1
dtype: int64
s2=s1.sort_index(ascending=False)# False 降序,True 升序
s2
k      1
f    100
e    100
d      4
c      3
b      2
a      1
dtype: int64
s11=Series([1,2,np.nan],index=['a','b','c'])
s12=Series([3,4,5],index=['a','b','c'])
s11
a    1.0
b    2.0
c    NaN
dtype: float64
s12
a    3
b    4
c    5
dtype: int64
pd.concat([s11,s12])
a    1.0
b    2.0
c    NaN
a    3.0
b    4.0
c    5.0
dtype: float64
pd.concat([s11,s12],axis=1)
01
a1.03
b2.04
cNaN5
s11.combine_first(s12) #s12 填充 s11 中的数值
a    1.0
b    2.0
c    5.0
dtype: float64

df=pd.read_csv("E:\微云\Python3数据分析与挖掘建模实战\书籍+随堂源码+说明\sample_code\data\HR.csv")
type(df)
pandas.core.frame.DataFrame
df.head()#前5行 里面可以传入数字表示第几行
satisfaction_levellast_evaluationnumber_projectaverage_monthly_hourstime_spend_companyWork_accidentleftpromotion_last_5yearsdepartmentsalary
00.380.5321573010saleslow
10.800.8652626010salesmedium
20.110.8872724010salesmedium
30.720.8752235010saleslow
40.370.5221593010saleslow
columns=df.columns
columns[0]#第一列名
'satisfaction_level'
df['satisfaction_level'].head() #获取某一列的值
0    0.38
1    0.80
2    0.11
3    0.72
4    0.37
Name: satisfaction_level, dtype: float64
df_new=DataFrame(df,columns=['satisfaction_level','last_evaluation'],index=[0,1,2,3,4,5])#挑出两列
df_new
satisfaction_levellast_evaluation
00.380.53
10.800.86
20.110.88
30.720.87
40.370.52
50.410.50
df_new["n1"]=range(0,6)#长度必须一致
df_new
satisfaction_levellast_evaluationn1
00.380.530
10.800.861
20.110.882
30.720.873
40.370.524
50.410.505
df_new["n2"]=pd.Series(np.arange(10,16))#添加行
df_new
satisfaction_levellast_evaluationn1n2
00.380.53010
10.800.86111
20.110.88212
30.720.87313
40.370.52414
50.410.50515
df_new["n1"]=pd.Series(["A","B"],index=[0,1])#修改值 0 1行
df_new
satisfaction_levellast_evaluationn1n2
00.380.53A10
10.800.86B11
20.110.88NaN12
30.720.87NaN13
40.370.52NaN14
50.410.50NaN15
data={"A":[1,2,3],"B":[4,5,6]}
df_data=DataFrame(data)#初始化时可以传入字典
df_data
AB
014
125
236
df_data.T 
012
A123
B456
for row in df_data.iterrows():
    #print(row)#(0,A 1 B 4) (1,A,2 B,5)
    for i in row:
        print(i)
0
A    1
B    4
Name: 0, dtype: int64
1
A    2
B    5
Name: 1, dtype: int64
2
A    3
B    6
Name: 2, dtype: int64
df1=DataFrame(np.random.rand(8).reshape([4,2]),index=['A','B','C','D'],columns=["c1","c2"])
df1
c1c2
A0.8753100.741149
B0.8726370.423594
C0.8786500.546773
D0.2159500.200086
df1=df1.reindex(index=['A','B','C','D','E'],columns=["c1","c2"])#添加index 和 columns
df1
c1c2
A0.5676410.813240
B0.6307690.560537
C0.4772350.675111
D0.2230980.593948
ENaNNaN
df1.reindex(index=['A','B'])
c1c2
A0.5676410.813240
B0.6307690.560537
df1.drop('A',axis=0)#删除行
c1c2
B0.6307690.560537
C0.4772350.675111
D0.2230980.593948
ENaNNaN
df1.drop('c1',axis=1)#删除列
c2
A0.813240
B0.560537
C0.675111
D0.593948
ENaN
m=1
n=np.nan
m+n# 任何值和nan相加都是nan
nan
df1.dropna(axis=0)#只要某一行中有nan就删除 1时是列
c1c2
A0.5676410.813240
B0.6307690.560537
C0.4772350.675111
D0.2230980.593948
df1.dropna(axis=0,how='any')#any :只要这一行有这个nan 就删除 all 时表示这一行都是nan时才会删除
c1c2
A0.5676410.813240
B0.6307690.560537
C0.4772350.675111
D0.2230980.593948
df1.dropna(thresh=2,axis=1)# 有一个是nan时就会被删除  thresh=2 nan的个数大于2就会被删除
c1c2
A0.5676410.813240
B0.6307690.560537
C0.4772350.675111
D0.2230980.593948
ENaNNaN
df1.fillna(value=2)# nan值填充为2 value={'c1':1,'c2':2}
c1c2
A0.5676410.813240
B0.6307690.560537
C0.4772350.675111
D0.2230980.593948
E2.0000002.000000
df2=DataFrame(np.arange(16).reshape(4,4),index=[['a','a','b','b'],[1,2,1,2]],columns=[['BJ','BJ','SH','GZ'],[8,9,6,7]])
df2
BJSHGZ
8967
a10123
24567
b1891011
212131415
df2["BJ"][8]["a"][2] 
4
df1["c4"]={"A":1,"B":2,"C":3,"D":4}
df1
c1c2c4
A0.8753100.741149A
B0.8726370.423594B
C0.8786500.546773C
D0.2159500.200086D
map_data={"A":1,"B":2,"D":3,"C":4}
df1['c3']=df1['c4'].map(map_data)
df1
c1c2c4c3
A0.8753100.741149A1
B0.8726370.423594B2
C0.8786500.546773C4
D0.2159500.200086D3
df4=df1
df4+df1
c1c2c4c3
A1.7506201.482299AA2
B1.7452740.847187BB4
C1.7572991.093546CC8
D0.4319010.400171DD6
df4.sum(axis=0)
c1    2.84255
c2     1.9116
c4       ABCD
c3         10
dtype: object
df4.sum(axis=1)
A    2.616459
B    3.296231
C    5.425423
D    3.416036
dtype: float64
df4.describe()
c1c2c3
count4.0000004.0000004.000000
mean0.7106370.4779002.500000
std0.3298000.2266961.290994
min0.2159500.2000861.000000
25%0.7084650.3677171.750000
50%0.8739730.4851832.500000
75%0.8761450.5953673.250000
max0.8786500.7411494.000000
df4
c1c2c4c3
A0.8753100.741149A1
B0.8726370.423594B2
C0.8786500.546773C4
D0.2159500.200086D3
df4["c1"].sort_values()
D    0.215950
B    0.872637
A    0.875310
C    0.878650
Name: c1, dtype: float64
df4.sort_values("c1")
c1c2c4c3
D0.2159500.200086D3
B0.8726370.423594B2
A0.8753100.741149A1
C0.8786500.546773C4
df4.sort_index()
c1c2c4c3
A0.8753100.741149A1
B0.8726370.423594B2
C0.8786500.546773C4
D0.2159500.200086D3
df4.index=['a','b','c','d']#重命名 df4.index=Series(['a','b','c','d'])
df4
c1c2c4c3
a0.8753100.741149A1
b0.8726370.423594B2
c0.8786500.546773C4
d0.2159500.200086D3
df4.index=df4.index.map(str.upper)
df4
c1c2c4c3
A0.8753100.741149A1
B0.8726370.423594B2
C0.8786500.546773C4
D0.2159500.200086D3
df4.rename(index=str.lower,columns=str.upper)
C1C2C4C3
a0.8753100.741149A1
b0.8726370.423594B2
c0.8786500.546773C4
d0.2159500.200086D3
df4.rename(index={'A':"a"})
c1c2c4c3
a0.8753100.741149A1
B0.8726370.423594B2
C0.8786500.546773C4
D0.2159500.200086D3
[print(x) for x in [1,2]]
1
2





[None, None]
def test_map(x):
    return x+"?"
df8=df4.rename(index=test_map)
df5=DataFrame({"A":[1,2],"B":[3,4],"D":[5,6]},index=['a','b'])
df6=DataFrame({"A":[1,4],"B":[3,4],"D":[5,6]},index=['a','b'])
df5
ABD
a135
b246
df6
ABD
a135
b446
pd.merge(df5,df6)# 列一样的打印出来
ABD
0135
pd.merge(df5,df6,on="A")
AB_xD_xB_yD_y
013535
pd.merge(df5,df6,on="A",how="left")
AB_xD_xB_yD_y
01353.05.0
1246NaNNaN
pd.merge(df5,df6,on="A",how="right")
AB_xD_xB_yD_y
013.05.035
14NaNNaN46
arr1=np.arange(9).reshape(3,3)
arr1
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
arr2=np.arange(9).reshape(3,3)
arr2
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
np.concatenate([arr1,arr2],axis=1)
array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])
np.concatenate([arr1,arr2],axis=0)
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
df1
c1c2c4c3
A0.8753100.741149A1
B0.8726370.423594B2
C0.8786500.546773C4
D0.2159500.200086D3
df8
c1c2c4c3
A?0.8753100.741149A1
B?0.8726370.423594B2
C?0.8786500.546773C4
D?0.2159500.200086D3
pd.concat([df1,df8])
c1c2c4c3
A0.8753100.741149A1
B0.8726370.423594B2
C0.8786500.546773C4
D0.2159500.200086D3
A?0.8753100.741149A1
B?0.8726370.423594B2
C?0.8786500.546773C4
D?0.2159500.200086D3
df20=pd.concat([df1,df8],axis=0)
df20["c1"]["D?"]=np.nan
df20
D:\Anaconda\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
c1c2c4c3
A0.8753100.741149A1
B0.8726370.423594B2
C0.8786500.546773C4
D0.2159500.200086D3
A?0.8753100.741149A1
B?0.8726370.423594B2
C?0.8786500.546773C4
D?NaN0.200086D3
df20=df20.combine_first(df8)#df8 中数据填充df20 中的数据
df20
c1c2c4c3
A0.8753100.741149A1
A?0.8753100.741149A1
B0.8726370.423594B2
B?0.8726370.423594B2
C0.8786500.546773C4
C?0.8786500.546773C4
D0.2159500.200086D3
D?0.2159500.200086D3
df20['c4']=df20['c4'].apply(str.lower)
df20
c1c2c4c3
A0.8753100.741149a1
A?0.8753100.741149a1
B0.8726370.423594b2
B?0.8726370.423594b2
C0.8786500.546773c4
C?0.8786500.546773c4
D0.2159500.200086d3
D?0.2159500.200086d3
def foo(line): #函数形参是DataFrame的一列或者一个或者一行的值
    items=line+'_A'
    return items
df20['c4'].apply(foo)#df20['c4'] 的值就是foo 函数的实参
A     a_A
A?    a_A
B     b_A
B?    b_A
C     c_A
C?    c_A
D     d_A
D?    d_A
Name: c4, dtype: object
del df20['c3'] #删除c3 这一列
df20
c1c2c4
A0.8753100.741149a
A?0.8753100.741149a
B0.8726370.423594b
B?0.8726370.423594b
C0.8786500.546773c
C?0.8786500.546773c
D0.2159500.200086d
D?0.2159500.200086d
df20.size
24
df20['c4'].unique()# 显示出df20['c4']这一行重复的值
array(['a', 'b', 'c', 'd'], dtype=object)
len(df20['c4'].unique())
4
df20['c4'].duplicated() #与前一行的值相同就现实True
A     False
A?     True
B     False
B?     True
C     False
C?     True
D     False
D?     True
Name: c4, dtype: bool
df21=df20.drop_duplicates(['c4'])# 把重复的数据删除 一删删一行
df21
c1c2c4
A0.8753100.741149a
B0.8726370.423594b
C0.8786500.546773c
D0.2159500.200086d
df22=df20.drop_duplicates(['c4'],keep='last')# 把重复的数据删除 一删删一行 如果有重复的保留最后一行
df22
c1c2c4
A?0.8753100.741149a
B?0.8726370.423594b
C?0.8786500.546773c
D?0.2159500.200086d
from datetime import datetime
t1=datetime(2020,8,15)
t1
datetime.datetime(2020, 8, 15, 0, 0)
df22=df22.rename(index={"A?":datetime(2020,8,1),"B?":datetime(2020,8,2),"C?":datetime(2020,9,1),"D?":datetime(2020,9,2)})
df22
c1c2c4c3
2020-08-010.8753100.741149a2020-08-01
2020-08-020.8726370.423594b2020-08-02
2020-09-010.8786500.546773c2020-09-01
2020-09-020.2159500.200086d2020-09-02
df22["2020-09"]# df22["202009"]错误
c1c2c4c3
2020-09-010.878650.546773c2020-09-01
2020-09-020.215950.200086d2020-09-02
df22["2020"]
c1c2c4c3
2020-08-010.8753100.741149a2020-08-01
2020-08-020.8726370.423594b2020-08-02
2020-09-010.8786500.546773c2020-09-01
2020-09-020.2159500.200086d2020-09-02
df22[["c1","c2"]]
c1c2
2020-08-010.8753100.741149
2020-08-020.8726370.423594
2020-09-010.8786500.546773
2020-09-020.2159500.200086
df22.ix["2020-09"]#获得指定行
D:\Anaconda\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.
c1c2c4c3
2020-09-010.878650.546773c2020-09-01
2020-09-020.215950.200086d2020-09-02
date_list_new=pd.date_range("2020-01-01","2021-12-31") #periods=100 表示把起点到终点的时间分为100份
date_list_new
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10',
               ...
               '2021-12-22', '2021-12-23', '2021-12-24', '2021-12-25',
               '2021-12-26', '2021-12-27', '2021-12-28', '2021-12-29',
               '2021-12-30', '2021-12-31'],
              dtype='datetime64[ns]', length=731, freq='D')
ss=Series(np.random.randn(len(date_list_new)),index=date_list_new)
ss
2020-01-01    0.105096
2020-01-02    1.184459
2020-01-03    0.131104
2020-01-04   -1.421972
2020-01-05   -1.078701
                ...   
2021-12-27   -1.696502
2021-12-28   -1.013481
2021-12-29    1.865331
2021-12-30   -0.036189
2021-12-31   -0.294095
Freq: D, Length: 731, dtype: float64
ss_month=ss.resample("M").mean()#每个月求平均值
ss_month
2020-01-31   -0.248221
2020-02-29   -0.095330
2020-03-31    0.166915
2020-04-30   -0.168161
2020-05-31    0.036389
2020-06-30   -0.253172
2020-07-31    0.160701
2020-08-31    0.399025
2020-09-30    0.154235
2020-10-31   -0.017248
2020-11-30    0.132322
2020-12-31   -0.135380
2021-01-31   -0.131289
2021-02-28    0.006788
2021-03-31   -0.342274
2021-04-30    0.008750
2021-05-31   -0.253172
2021-06-30   -0.093458
2021-07-31    0.347516
2021-08-31   -0.334457
2021-09-30   -0.138226
2021-10-31    0.148497
2021-11-30   -0.275131
2021-12-31    0.015029
Freq: M, dtype: float64
ss.resample("H").ffill()# 填充小时
2020-01-01 00:00:00    0.105096
2020-01-01 01:00:00    0.105096
2020-01-01 02:00:00    0.105096
2020-01-01 03:00:00    0.105096
2020-01-01 04:00:00    0.105096
                         ...   
2021-12-30 20:00:00   -0.036189
2021-12-30 21:00:00   -0.036189
2021-12-30 22:00:00   -0.036189
2021-12-30 23:00:00   -0.036189
2021-12-31 00:00:00   -0.294095
Freq: H, Length: 17521, dtype: float64
ss
2020-01-01    0.105096
2020-01-02    1.184459
2020-01-03    0.131104
2020-01-04   -1.421972
2020-01-05   -1.078701
                ...   
2021-12-27   -1.696502
2021-12-28   -1.013481
2021-12-29    1.865331
2021-12-30   -0.036189
2021-12-31   -0.294095
Freq: D, Length: 731, dtype: float64
score=np.random.randint(25,100,size=20)
score
array([29, 49, 71, 81, 77, 82, 98, 42, 95, 77, 61, 41, 86, 80, 64, 66, 89,
       86, 50, 56])
bins=[0,59,70,80,100]
pd.cut(score,bins)
[(0, 59], (0, 59], (70, 80], (80, 100], (70, 80], ..., (59, 70], (80, 100], (80, 100], (0, 59], (0, 59]]
Length: 20
Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]
score_cat=pd.cut(score,bins)
pd.value_counts(score_cat)#统计求和
(80, 100]    7
(0, 59]      6
(70, 80]     4
(59, 70]     3
dtype: int64
score_cat=pd.cut(score,bins,labels=['low','ok','good','great'])#分数段变为标签
score_cat
[low, low, good, great, good, ..., ok, great, great, low, low]
Length: 20
Categories (4, object): [low < ok < good < great]
dfscore=DataFrame({"score":score,"d":score_cat})
dfscore
scored
029low
149low
271good
381great
477good
582great
698great
742low
895great
977good
1061ok
1141low
1286great
1380good
1464ok
1566ok
1689great
1786great
1850low
1956low
g=dfscore.groupby(dfscore["d"])
g.groups
{'low': Int64Index([0, 1, 7, 11, 18, 19], dtype='int64'),
 'ok': Int64Index([10, 14, 15], dtype='int64'),
 'good': Int64Index([2, 4, 9, 13], dtype='int64'),
 'great': Int64Index([3, 5, 6, 8, 12, 16, 17], dtype='int64')}
g1=g.get_group("low") #也可以传入两个参数 list 集合
g1
scored
029low
149low
742low
1141low
1850low
1956low
g1.mean()
score    44.5
dtype: float64
g1.max()
score     56
d        low
dtype: object
list(g)
[('low',     score    d
  0      29  low
  1      49  low
  7      42  low
  11     41  low
  18     50  low
  19     56  low), ('ok',     score   d
  10     61  ok
  14     64  ok
  15     66  ok), ('good',     score     d
  2      71  good
  4      77  good
  9      77  good
  13     80  good), ('great',     score      d
  3      81  great
  5      82  great
  6      98  great
  8      95  great
  12     86  great
  16     89  great
  17     86  great)]
gd=dict(list(g))
gd
{'low':     score    d
 0      29  low
 1      49  low
 7      42  low
 11     41  low
 18     50  low
 19     56  low, 'ok':     score   d
 10     61  ok
 14     64  ok
 15     66  ok, 'good':     score     d
 2      71  good
 4      77  good
 9      77  good
 13     80  good, 'great':     score      d
 3      81  great
 5      82  great
 6      98  great
 8      95  great
 12     86  great
 16     89  great
 17     86  great}
gd["low"]
scored
029low
149low
742low
1141low
1850low
1956low
def foo(data):
    return data.min()
gd["low"].agg(foo)#agg 可以传入自己参数
score     29
d        low
dtype: object
df=pd.read_csv("E:\微云\Python3数据分析与挖掘建模实战\书籍+随堂源码+说明\sample_code\data\HR.csv")
df.head()
satisfaction_levellast_evaluationnumber_projectaverage_monthly_hourstime_spend_companyWork_accidentleftpromotion_last_5yearsdepartmentsalary
00.380.5321573010saleslow
10.800.8652626010salesmedium
20.110.8872724010salesmedium
30.720.8752235010saleslow
40.370.5221593010saleslow
pd.pivot_table(df,index=['satisfaction_level','department'])#生成透视表
Work_accidentaverage_monthly_hourslast_evaluationleftnumber_projectpromotion_last_5yearstime_spend_company
satisfaction_leveldepartment
0.09IT0.000000271.3333330.8744441.05.9444440.0000004.333333
RandD0.600000265.0000000.9540001.06.6000000.0000004.000000
accounting0.000000278.2500000.7862501.06.1250000.0000004.000000
hr0.000000296.0000000.8528571.06.2142860.0000004.214286
management0.000000292.6666670.8633331.06.6666670.0000004.000000
...........................
1.00marketing0.000000222.7500000.8975000.04.5000000.0000002.000000
product_mng0.428571209.7142860.7185710.03.7142860.0000003.000000
sales0.166667196.7333330.7746670.03.7666670.0000003.500000
support0.166667185.6111110.7933330.03.6111110.1111113.388889
technical0.230769200.3846150.7084620.04.0769230.0000003.000000

901 rows × 7 columns

df2.to_csv("E://aa.csv")

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值