import pandas as pd
from pandas import Series
path="E:\\北风\\数据科学脚本\\Python_book\\5Preprocessing\\teleco_camp_orig.csv"
fo=open(path)
info=pd.read_csv(fo)
print(info)
ID Suc_flag ARPU PromCnt12 PromCnt36 PromCntMsg12 \
0 12 1 50.0 6 10 2
1 53 0 NaN 5 9 1
2 67 1 25.0 6 11 2
3 71 1 80.0 7 10 2
4 142 1 15.0 6 11 2
5 159 1 60.0 8 13 2
6 186 0 NaN 7 13 2
7 210 0 NaN 5 10 2
8 220 1 40.0 7 10 2
9 250 1 75.0 6 11 2
10 257 0 NaN 3 7 1
11 263 1 50.0 8 13 2
12 268 1 165.0 7 12 2
13 282 1 100.0 4 7 1
14 325 0 NaN 6 10 2
15 341 1 125.0 7 12 2
16 364 1 50.0 6 11 2
17 368 1 50.0 6 11 2
18 383 0 NaN 6 11 2
19 387 1 25.0 3 8 1
20 397 1 80.0 4 8 1
21 403 1 175.0 6 9 2
22 427 1 50.0 4 9 1
23 441 1 50.0 7 12 2
24 458 0 NaN 4 9 1
25 511 1 60.0 6 10 2
26 527 0 NaN 6 12 2
27 539 0 NaN 5 9 2
28 542 1 50.0 3 7 1
29 544 0 NaN 7 12 2
... ... ... ... ... ... ...
9656 190840 1 125.0 3 7 1
9657 190842 0 NaN 3 8 1
9658 190859 1 50.0 5 10 2
9659 190863 0 NaN 3 7 1
9660 190891 0 NaN 4 9 1
9661 190902 0 NaN 3 7 1
9662 190920 1 100.0 4 8 1
9663 190976 1 100.0 4 7 1
9664 191007 0 NaN 6 10 2
9665 191056 1 235.0 5 10 1
9666 191064 1 15.0 2 4 1
9667 191134 1 25.0 4 9 1
9668 191138 1 75.0 2 4 1
9669 191155 0 NaN 3 5 1
9670 191233 0 NaN 3 8 1
9671 191248 0 NaN 4 6 1
9672 191296 1 45.0 4 9 1
9673 191297 1 25.0 2 5 1
9674 191316 0 NaN 4 9 1
9675 191465 0 NaN 3 7 1
9676 191475 0 NaN 3 8 1
9677 191482 0 NaN 4 9 1
9678 191524 0 NaN 4 9 1
9679 191528 1 100.0 3 7 1
9680 191531 1 65.0 3 8 1
9681 191547 1 75.0 4 9 2
9682 191649 0 NaN 3 8 1
9683 191663 1 50.0 2 7 1
9684 191672 0 NaN 4 8 1
9685 191779 1 750.0 3 5 1
PromCntMsg36 Class Age Gender HomeOwner AvgARPU AvgHomeValue \
0 3 4 57.0 M H 49.894904 33400
1 4 3 55.0 M H 48.574742 37600
2 4 1 57.0 F H 49.272646 100400
3 4 1 52.0 F H 47.334953 39900
4 4 1 NaN F U 47.827404 47500
5 4 2 58.0 M U 48.673449 53000
6 4 2 NaN F U 48.560389 91000
7 3 3 54.0 F H 49.644237 66300
8 3 1 44.0 M H 48.454812 55000
9 4 3 60.0 F U 48.724262 46900
10 1 3 48.0 F H 52.561329 49000
11 4 3 57.0 U U 50.282952 215600
12 4 2 40.0 F U 59.931475 176500
13 2 2 37.0 M H 51.024610 111700
14 4 2 40.0 F H 64.073400 543500
15 4 3 53.0 F U 49.071800 51700
16 4 4 50.0 F H 52.912850 310000
17 4 1 56.0 M H 50.273664 225700
18 2 2 NaN M U 62.063946 243900
19 3 1 NaN F U 49.613249 78900
20 1 4 57.0 F H 55.480759 185700
21 3 3 50.0 U U 64.311114 76500
22 3 2 NaN M U 48.799586 70900
23 4 1 49.0 F U 47.961826 55000
24 4 3 58.0 F U 50.774517 168800
25 3 3 58.0 F U 49.595220 102400
26 4 1 52.0 F H 48.854248 68400
27 2 3 56.0 M H 67.266602 200300
28 1 1 NaN M U 52.166583 53500
29 4 2 50.0 F U 48.614598 26600
... ... ... ... ... ... ... ...
9656 1 2 45.0 M H 61.245030 48500
9657 3 3 45.0 F U 50.652000 53100
9658 4 2 58.0 F U 50.019617 228800
9659 3 3 58.0 M U 51.371073 193000
9660 3 2 53.0 M U 49.514441 46200
9661 1 4 54.0 M U 55.786752 146300
9662 3 2 NaN M H 59.048710 91600
9663 3 4 NaN U U 49.217583 55800
9664 3 1 57.0 F U 48.018731 89600
9665 1 3 29.0 M U 72.475934 566700
9666 1 1 57.0 F H 49.318261 29700
9667 3 1 NaN F U 49.504599 43100
9668 1 4 48.0 F H 51.313396 57100
9669 1 2 NaN M U 60.536940 45500
9670 1 2 NaN M U 61.969402 165200
9671 2 2 43.0 F H 57.117598 79000
9672 3 2 46.0 F U 50.119001 102400
9673 2 2 NaN F U 49.511860 49500
9674 3 4 55.0 F U 55.201555 381300
9675 3 4 57.0 M U 49.286275 0
9676 3 2 59.0 U U 57.317004 28400
9677 3 3 44.0 M U 50.932958 88000
9678 1 2 48.0 U U 52.648915 50000
9679 1 2 49.0 F U 52.769421 81400
9680 3 2 50.0 U U 49.815856 107300
9681 3 2 58.0 F U 58.403995 31800
9682 3 1 55.0 F U 49.166689 38200
9683 3 2 54.0 M U 49.434637 53800
9684 3 1 59.0 F U 48.634900 36600
9685 2 2 54.0 M U 62.081260 143200
AvgIncome
0 39460
1 33545
2 42091
3 39313
4 0
5 49487
6 0
7 49047
8 43927
9 47256
10 40043
11 0
12 61523
13 89077
14 165543
15 0
16 44543
17 71260
18 0
19 53504
20 93770
21 60721
22 0
23 19203
24 0
25 42324
26 62414
27 105000
28 0
29 39250
... ...
9656 47256
9657 61313
9658 0
9659 59239
9660 0
9661 48209
9662 0
9663 0
9664 0
9665 52283
9666 25991
9667 42565
9668 44164
9669 0
9670 79842
9671 62804
9672 28268
9673 36364
9674 95837
9675 0
9676 0
9677 55290
9678 0
9679 54137
9680 0
9681 42358
9682 42373
9683 0
9684 44023
9685 79635
[9686 rows x 14 columns]
series_age=info["Age"]
print(series_age[0:5])
0 57.0
1 55.0
2 57.0
3 52.0
4 NaN
Name: Age, dtype: float64
series_value=info["ID"]
print(series_value[0:5])
0 12
1 53
2 67
3 71
4 142
Name: ID, dtype: int64
series_age=info["Age"]
age_1=series_age.values
custom=Series(age_1,index=series_value)
print(custom[0:5])
ID
12 57.0
53 55.0
67 57.0
71 52.0
142 NaN
dtype: float64
custom[[33400,37600]]
AvgHomeValue
33400 57.0
33400 57.0
33400 NaN
33400 54.0
33400 56.0
33400 NaN
33400 55.0
37600 55.0
37600 48.0
37600 46.0
37600 42.0
37600 NaN
37600 55.0
37600 57.0
37600 NaN
37600 55.0
37600 56.0
37600 51.0
37600 46.0
37600 34.0
37600 55.0
dtype: float64
o_index=custom.index.tolist()
sorted_index=sorted(o_index)
custom_byidex=custom.reindex(sorted_index)
print(custom_byidex)
ID
12 57.0
53 55.0
67 57.0
71 52.0
142 NaN
159 58.0
186 NaN
210 54.0
220 44.0
250 60.0
257 48.0
263 57.0
268 40.0
282 37.0
325 40.0
341 53.0
364 50.0
368 56.0
383 NaN
387 NaN
397 57.0
403 50.0
427 NaN
441 49.0
458 58.0
511 58.0
527 52.0
539 56.0
542 NaN
544 50.0
...
190840 45.0
190842 45.0
190859 58.0
190863 58.0
190891 53.0
190902 54.0
190920 NaN
190976 NaN
191007 57.0
191056 29.0
191064 57.0
191134 NaN
191138 48.0
191155 NaN
191233 NaN
191248 43.0
191296 46.0
191297 NaN
191316 55.0
191465 57.0
191475 59.0
191482 44.0
191524 48.0
191528 49.0
191531 50.0
191547 58.0
191649 55.0
191663 54.0
191672 59.0
191779 54.0
Length: 9686, dtype: float64
children=custom[custom<18]
print(children)
ID
89765 16.0
148098 16.0
179339 16.0
dtype: float64
child=custom>18
child[:5]
ID
12 True
53 True
67 True
71 True
142 False
dtype: bool
import numpy as np
np.add(custom,custom)
ID
12 114.0
53 110.0
67 114.0
71 104.0
142 NaN
159 116.0
186 NaN
210 108.0
220 88.0
250 120.0
257 96.0
263 114.0
268 80.0
282 74.0
325 80.0
341 106.0
364 100.0
368 112.0
383 NaN
387 NaN
397 114.0
403 100.0
427 NaN
441 98.0
458 116.0
511 116.0
527 104.0
539 112.0
542 NaN
544 100.0
...
190840 90.0
190842 90.0
190859 116.0
190863 116.0
190891 106.0
190902 108.0
190920 NaN
190976 NaN
191007 114.0
191056 58.0
191064 114.0
191134 NaN
191138 96.0
191155 NaN
191233 NaN
191248 86.0
191296 92.0
191297 NaN
191316 110.0
191465 114.0
191475 118.0
191482 88.0
191524 96.0
191528 98.0
191531 100.0
191547 116.0
191649 110.0
191663 108.0
191672 118.0
191779 108.0
Length: 9686, dtype: float64
np.sin(custom)[0:5]
ID
12 0.436165
53 -0.999755
67 0.436165
71 0.986628
142 NaN
dtype: float64
np.max(custom)
60.0
custom.max()
60.0