import sys, time
class ShowProcess():
"""
显示处理进度的类
调用该类相关函数即可实现处理进度的显示
"""
i = 0
max_steps = 0
max_arrow = 50
infoDone = 'done'
def __init__(self, max_steps, infoDone = 'Done'):
self.max_steps = max_steps
self.i = 0
self.infoDone = infoDone
def show_process(self, i=None):
if i is not None:
self.i = i
else:
self.i += 1
num_arrow = int(self.i * self.max_arrow / self.max_steps)
num_line = self.max_arrow - num_arrow
percent = self.i * 100.0 / self.max_steps
process_bar = '[' + '#' * num_arrow + '-' * num_line + ']'\
+ '%.2f' % percent + '%' + '\r'
sys.stdout.write(process_bar)
sys.stdout.flush()
if self.i >= self.max_steps:
self.close()
def close(self):
print('')
print(self.infoDone)
self.i = 0
if __name__=='__main__':
max_steps = 4
process_bar = ShowProcess(max_steps, 'OK')
for i in range(max_steps):
process_bar.show_process()
time.sleep(0.1)
[##################################################]100.00%
OK
import pandas as pd
orders=pd.read_table('./data/chipotle.tsv')
orders.head()
| order_id | quantity | item_name | choice_description | item_price |
---|
0 | 1 | 1 | Chips and Fresh Tomato Salsa | NaN | $2.39 |
1 | 1 | 1 | Izze | [Clementine] | $3.39 |
2 | 1 | 1 | Nantucket Nectar | [Apple] | $3.39 |
3 | 1 | 1 | Chips and Tomatillo-Green Chili Salsa | NaN | $2.39 |
4 | 2 | 2 | Chicken Bowl | [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | $16.98 |
users=pd.read_table('./data/chipotle.tsv',header=None)
users.head()
| 0 | 1 | 2 | 3 | 4 |
---|
0 | order_id | quantity | item_name | choice_description | item_price |
1 | 1 | 1 | Chips and Fresh Tomato Salsa | NaN | $2.39 |
2 | 1 | 1 | Izze | [Clementine] | $3.39 |
3 | 1 | 1 | Nantucket Nectar | [Apple] | $3.39 |
4 | 1 | 1 | Chips and Tomatillo-Green Chili Salsa | NaN | $2.39 |
users.shape
(4623, 5)
type(users)
pandas.core.frame.DataFrame
users.describe(include=['object'])
| 0 | 1 | 2 | 3 | 4 |
---|
count | 4623 | 4623 | 4623 | 3377 | 4623 |
unique | 1835 | 10 | 51 | 1044 | 79 |
top | 926 | 1 | Chicken Bowl | [Diet Coke] | $8.75 |
freq | 23 | 4355 | 726 | 134 | 730 |
users.describe()
| 0 | 1 | 2 | 3 | 4 |
---|
count | 4623 | 4623 | 4623 | 3377 | 4623 |
unique | 1835 | 10 | 51 | 1044 | 79 |
top | 926 | 1 | Chicken Bowl | [Diet Coke] | $8.75 |
freq | 23 | 4355 | 726 | 134 | 730 |
users.columns
Int64Index([0, 1, 2, 3, 4], dtype='int64')
users.rename(columns = {'0':'order_id','1':'quantity','2':'item_name','3':'choice_description','4':'item_price'},inplace=
True)
users.columns
Int64Index([0, 1, 2, 3, 4], dtype='int64')
users_cols=['order_id','quantity','item_name','choice_description','item_price']
users.columns=users_cols
users.head()
| order_id | quantity | item_name | choice_description | item_price |
---|
0 | order_id | quantity | item_name | choice_description | item_price |
1 | 1 | 1 | Chips and Fresh Tomato Salsa | NaN | $2.39 |
2 | 1 | 1 | Izze | [Clementine] | $3.39 |
3 | 1 | 1 | Nantucket Nectar | [Apple] | $3.39 |
4 | 1 | 1 | Chips and Tomatillo-Green Chili Salsa | NaN | $2.39 |
use=pd.read_table('./data/chipotle.tsv',names=users_cols,header=0)
use.head()
| order_id | quantity | item_name | choice_description | item_price |
---|
0 | 1 | 1 | Chips and Fresh Tomato Salsa | NaN | $2.39 |
1 | 1 | 1 | Izze | [Clementine] | $3.39 |
2 | 1 | 1 | Nantucket Nectar | [Apple] | $3.39 |
3 | 1 | 1 | Chips and Tomatillo-Green Chili Salsa | NaN | $2.39 |
4 | 2 | 2 | Chicken Bowl | [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | $16.98 |
use.columns
Index(['order_id', 'quantity', 'item_name', 'choice_description',
'item_price'],
dtype='object')
use.columns=use.columns.str.replace(' ','_')
use.columns
Index(['order_id', 'quantity', 'item_name', 'choice_description',
'item_price'],
dtype='object')
use.item_price.sort_values()
261 $1.09
1805 $1.09
1030 $1.09
3020 $1.09
3021 $1.09
...
4547 $9.39
4391 $9.39
2600 $9.39
4241 $9.39
4390 $9.39
Name: item_price, Length: 4622, dtype: object
use.sort_values('item_price',ascending=False)
| order_id | quantity | item_name | choice_description | item_price |
---|
2624 | 1042 | 1 | Steak Salad Bowl | [Fresh Tomato Salsa, [Black Beans, Sour Cream,... | $9.39 |
4419 | 1762 | 1 | Steak Salad Bowl | [Roasted Chili Corn Salsa, [Fajita Vegetables,... | $9.39 |
4036 | 1615 | 1 | Steak Salad Bowl | [Fresh Tomato Salsa, [Fajita Vegetables, Chees... | $9.39 |
1825 | 738 | 1 | Barbacoa Salad Bowl | [Fresh Tomato Salsa, [Rice, Pinto Beans, Chees... | $9.39 |
3115 | 1243 | 1 | Carnitas Salad Bowl | [Tomatillo Green Chili Salsa, [Rice, Pinto Bea... | $9.39 |
... | ... | ... | ... | ... | ... |
3145 | 1254 | 1 | Canned Soda | [Diet Dr. Pepper] | $1.09 |
414 | 180 | 1 | Canned Soda | [Dr. Pepper] | $1.09 |
3162 | 1262 | 1 | Canned Soda | [Coca Cola] | $1.09 |
821 | 338 | 1 | Canned Soda | [Coca Cola] | $1.09 |
1457 | 591 | 1 | Canned Soda | [Sprite] | $1.09 |
4622 rows × 5 columns
use.head()
| order_id | quantity | item_name | choice_description | item_price |
---|
0 | 1 | 1 | Chips and Fresh Tomato Salsa | NaN | $2.39 |
1 | 1 | 1 | Izze | [Clementine] | $3.39 |
2 | 1 | 1 | Nantucket Nectar | [Apple] | $3.39 |
3 | 1 | 1 | Chips and Tomatillo-Green Chili Salsa | NaN | $2.39 |
4 | 2 | 2 | Chicken Bowl | [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | $16.98 |
import pandas as pd
import numpy as np
s2=pd.Series([4.0,6.5,-0.5,4.2],index=['d','b','a','c'])
print(s2)
d 4.0
b 6.5
a -0.5
c 4.2
dtype: float64
s2[['a','b','c']]
a -0.5
b 6.5
c 4.2
dtype: float64
dic1={'apple':5,'pen':3,'applepen':10}
s3=pd.Series(dic1)
print(s3)
apple 5
pen 3
applepen 10
dtype: int64
data={'year':[2014,2015,2016,2017],
'income':[10000,30000,50000,60000],
'pay':[5000,20000,30000,30000]
}
df1=pd.DataFrame(data,index=['a','b','c','d'])
df1
| year | income | pay |
---|
a | 2014 | 10000 | 5000 |
b | 2015 | 30000 | 20000 |
c | 2016 | 50000 | 30000 |
d | 2017 | 60000 | 30000 |
df2=pd.DataFrame(np.arange(12).reshape(3,4))
df2
df3=pd.DataFrame(np.arange(12).reshape(3,4),index=['a','c','b'],columns=[2,33,44,5])
df3
df1.columns
Index(['year', 'income', 'pay'], dtype='object')
df1.values
array([[ 2014, 10000, 5000],
[ 2015, 30000, 20000],
[ 2016, 50000, 30000],
[ 2017, 60000, 30000]], dtype=int64)
df1.describe()
| year | income | pay |
---|
count | 4.000000 | 4.000000 | 4.000000 |
mean | 2015.500000 | 37500.000000 | 21250.000000 |
std | 1.290994 | 22173.557826 | 11814.539066 |
min | 2014.000000 | 10000.000000 | 5000.000000 |
25% | 2014.750000 | 25000.000000 | 16250.000000 |
50% | 2015.500000 | 40000.000000 | 25000.000000 |
75% | 2016.250000 | 52500.000000 | 30000.000000 |
max | 2017.000000 | 60000.000000 | 30000.000000 |
df1.T
| a | b | c | d |
---|
year | 2014 | 2015 | 2016 | 2017 |
income | 10000 | 30000 | 50000 | 60000 |
pay | 5000 | 20000 | 30000 | 30000 |
df3.sort_index(axis=1)
df3.sort_values(by=44)
dates=pd.date_range('20190101',periods=6)
df1=pd.DataFrame(np.arange(24).reshape(6,4),index=dates,columns=['a','b','c','d'])
df1
| a | b | c | d |
---|
2019-01-01 | 0 | 1 | 2 | 3 |
2019-01-02 | 4 | 5 | 6 | 7 |
2019-01-03 | 8 | 9 | 10 | 11 |
2019-01-04 | 12 | 13 | 14 | 15 |
2019-01-05 | 16 | 17 | 18 | 19 |
2019-01-06 | 20 | 21 | 22 | 23 |
df1.a
2019-01-01 0
2019-01-02 4
2019-01-03 8
2019-01-04 12
2019-01-05 16
2019-01-06 20
Freq: D, Name: a, dtype: int32
df1.loc['20190101',['a','c']]
a 0
c 2
Name: 2019-01-01 00:00:00, dtype: int32
df1.loc[:,['a','c']]
| a | c |
---|
2019-01-01 | 0 | 2 |
2019-01-02 | 4 | 6 |
2019-01-03 | 8 | 10 |
2019-01-04 | 12 | 14 |
2019-01-05 | 16 | 18 |
2019-01-06 | 20 | 22 |
df1.iloc[2]
a 8
b 9
c 10
d 11
Name: 2019-01-03 00:00:00, dtype: int32
df1.iloc[[1,2,4],[1,3]]
| b | d |
---|
2019-01-02 | 5 | 7 |
2019-01-03 | 9 | 11 |
2019-01-05 | 17 | 19 |
df1.a > 6
2019-01-01 False
2019-01-02 False
2019-01-03 True
2019-01-04 True
2019-01-05 True
2019-01-06 True
Freq: D, Name: a, dtype: bool
df1[df1.a > 6]
| a | b | c | d |
---|
2019-01-03 | 8 | 9 | 10 | 11 |
2019-01-04 | 12 | 13 | 14 | 15 |
2019-01-05 | 16 | 17 | 18 | 19 |
2019-01-06 | 20 | 21 | 22 | 23 |
dates=np.arange(20190101,20190105)
df1=pd.DataFrame(np.arange(12).reshape(4,3),index=dates,columns=['a','b','c'])
df2=pd.DataFrame(df1,index=dates,columns=['a','b','c','d','e'])
df2
| a | b | c | d | e |
---|
20190101 | 0 | 1 | 2 | NaN | NaN |
20190102 | 3 | 4 | 5 | NaN | NaN |
20190103 | 6 | 7 | 8 | NaN | NaN |
20190104 | 9 | 10 | 11 | NaN | NaN |
s1=pd.Series([3,4,6],index=dates[:3])
s2=pd.Series([32,5,2],index=dates[1:])
df2['d']=s1
df2['e']=s2
df2
| a | b | c | d | e |
---|
20190101 | 0 | 1 | 2 | 3.0 | NaN |
20190102 | 3 | 4 | 5 | 4.0 | 32.0 |
20190103 | 6 | 7 | 8 | 6.0 | 5.0 |
20190104 | 9 | 10 | 11 | NaN | 2.0 |
df2.dropna(axis=0,how='any')
| a | b | c | d | e |
---|
20190102 | 3 | 4 | 5 | 4.0 | 32.0 |
20190103 | 6 | 7 | 8 | 6.0 | 5.0 |
df2.fillna(value=0)
| a | b | c | d | e |
---|
20190101 | 0 | 1 | 2 | 3.0 | 0.0 |
20190102 | 3 | 4 | 5 | 4.0 | 32.0 |
20190103 | 6 | 7 | 8 | 6.0 | 5.0 |
20190104 | 9 | 10 | 11 | 0.0 | 2.0 |
df2.isnull()
| a | b | c | d | e |
---|
20190101 | False | False | False | False | True |
20190102 | False | False | False | False | False |
20190103 | False | False | False | False | False |
20190104 | False | False | False | True | False |