关注微信号:小程在线
关注CSDN博客:程志伟的博客
Python 3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]
Type "copyright", "credits" or "license" for more information.
IPython 7.6.1 -- An enhanced Interactive Python.
import numpy as np
import pandas as pd
#### 1.1创建一个Series类型的数据 ####
price = pd.Series([150,488,192,689,410,266])
price
Out[1]:
0 150
1 488
2 192
3 689
4 410
5 266
dtype: int64
#查看数据的类型
type(price)
Out[2]: pandas.core.series.Series
price = pd.Series([150,488,192,689,410,266],name="price")
price
Out[3]:
0 150
1 488
2 192
3 689
4 410
5 266
Name: price, dtype: int64
temp = {'Mon': 33, 'Tue': 19, 'Wed': 15, 'Thu': 89, 'Fri': 11, 'Sat': -5, 'Sun': 9}
pd.Series(temp)
Out[4]:
Mon 33
Tue 19
Wed 15
Thu 89
Fri 11
Sat -5
Sun 9
dtype: int64
#简单的统计描述
price.mean()
Out[5]: 365.8333333333333
price.sum()
Out[6]: 2195
price.head(2)
Out[7]:
0 150
1 488
Name: price, dtype: int64
#取最后3个数
price.tail(3)
Out[8]:
3 689
4 410
5 266
Name: price, dtype: int64
#### 1.2添加索引 ####
price = pd.Series([15280,45888,15692,55689,28410,27566.3],
index=['wh','sh','hz','bj','gz','nj'])
price
Out[9]:
wh 15280.0
sh 45888.0
hz 15692.0
bj 55689.0
gz 28410.0
nj 27566.3
dtype: float64
#查看索引
price.index
Out[10]: Index(['wh', 'sh', 'hz', 'bj', 'gz', 'nj'], dtype='object')
#查看索引对应的数值
price['sh']
Out[12]: 45888.0
#给索引添加名字
price.index.name='city'
price
Out[13]:
city
wh 15280.0
sh 45888.0
hz 15692.0
bj 55689.0
gz 28410.0
nj 27566.3
dtype: float64
#用日期作为索引
dates=pd.date_range('2016-01-01','2016-6-01',freq='M')
dates
Out[14]:
DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
'2016-05-31'],
dtype='datetime64[ns]', freq='M')
tempature=pd.Series([13,15,20,27,29],index=dates)
tempature
Out[15]:
2016-01-31 13
2016-02-29 15
2016-03-31 20
2016-04-30 27
2016-05-31 29
Freq: M, dtype: int64
#索引时可以重复的
temp=pd.Series([13,15,20,27,29],index=[0,2,2,3,4])
temp
Out[16]:
0 13
2 15
2 20
3 27
4 29
dtype: int64
temp=pd.Series([13,15,20,27,29])
temp
Out[17]:
0 13
1 15
2 20
3 27
4 29
dtype: int64
temp[0]
Out[18]: 13
temp.loc[0:3] #取索引位
Out[23]:
0 13
1 15
2 20
3 27
dtype: int64
temp.iloc[0:3] #取顺序位
Out[24]:
0 13
1 15
2 20
dtype: int64
temp=pd.Series([13,15,20,27,29],index=['M','T','W','T','F'])
temp
Out[25]:
M 13
T 15
W 20
T 27
F 29
dtype: int64
temp['T']
Out[26]:
T 15
T 27
dtype: int64
temp.M
Out[27]: 13
#### 1.3数据的修改与删除 ####
price
Out[28]:
city
wh 15280.0
sh 45888.0
hz 15692.0
bj 55689.0
gz 28410.0
nj 27566.3
dtype: float64
price['wh']=16000
price
Out[29]:
city
wh 16000.0
sh 45888.0
hz 15692.0
bj 55689.0
gz 28410.0
nj 27566.3
dtype: float64
price.iloc[0]=15280
price
Out[30]:
city
wh 15280.0
sh 45888.0
hz 15692.0
bj 55689.0
gz 28410.0
nj 27566.3
dtype: float64
#append返回一个新的数据,不改变原来的数据
price.append(pd.Series([9500],index=['cd']))
Out[31]:
wh 15280.0
sh 45888.0
hz 15692.0
bj 55689.0
gz 28410.0
nj 27566.3
cd 9500.0
dtype: float64
price
Out[32]:
city
wh 15280.0
sh 45888.0
hz 15692.0
bj 55689.0
gz 28410.0
nj 27566.3
dtype: float64
#增加新的数据
price['cd']=9500
price
Out[34]:
city
wh 15280.0
sh 45888.0
hz 15692.0
bj 55689.0
gz 28410.0
nj 27566.3
cd 9500.0
dtype: float64
#使用set_value更改新的数据
price.set_value('bj',62000)
Out[35]:
city
wh 15280.0
sh 45888.0
hz 15692.0
bj 62000.0
gz 28410.0
nj 27566.3
cd 9500.0
dtype: float64
#使用del删除数据
del price['nj']
price
Out[37]:
city
wh 15280.0
sh 45888.0
hz 15692.0
bj 62000.0
gz 28410.0
cd 9500.0
dtype: float64
#使用布尔值筛选数据
price[price>50000]
Out[38]:
city
bj 62000.0
dtype: float64
#### 1.4数据的描述 ####
price.describe()
Out[40]:
count 6.000000
mean 29461.666667
std 20585.747940
min 9500.000000
25% 15383.000000
50% 22051.000000
75% 41518.500000
max 62000.000000
dtype: float64
#取25%,50%的分位数
price.describe(percentiles=[0.25,0.5])
Out[41]:
count 6.000000
mean 29461.666667
std 20585.747940
min 9500.000000
25% 15383.000000
50% 22051.000000
max 62000.000000
dtype: float64
#对非数值型数据统计
temp=pd.Series(['hw','apple','vivo','mi','hw','oppo','samsung','vivo'],dtype='category')
temp
Out[42]:
0 hw
1 apple
2 vivo
3 mi
4 hw
5 oppo
6 samsung
7 vivo
dtype: category
Categories (6, object): [apple, hw, mi, oppo, samsung, vivo]
temp.value_counts()
Out[43]:
vivo 2
hw 2
samsung 1
oppo 1
mi 1
apple 1
dtype: int64
#### 1.5向量化操作与广播 ####
#将数据乘以2
price * 2
Out[44]:
city
wh 30560.0
sh 91776.0
hz 31384.0
bj 124000.0
gz 56820.0
cd 19000.0
dtype: float64
#将数据增加1000
price+1000
Out[45]:
city
wh 16280.0
sh 46888.0
hz 16692.0
bj 63000.0
gz 29410.0
cd 10500.0
dtype: float64
#将数据取log
np.log(price)
Out[46]:
city
wh 9.634300
sh 10.733959
hz 9.660906
bj 11.034890
gz 10.254496
cd 9.159047
dtype: float64
#list类型数据
li=[2000,3000,3000]
#数据乘以2,表示数据重复2遍
li*2
Out[48]: [2000, 3000, 3000, 2000, 3000, 3000]
s=pd.Series([10,20,30,40])
s
Out[49]:
0 10
1 20
2 30
3 40
dtype: int64
s2=pd.Series([10,20,30],index=[2,3,4])
s2
Out[50]:
2 10
3 20
4 30
dtype: int64
#相同索引的才进行计算
s+s2
Out[51]:
0 NaN
1 NaN
2 40.0
3 60.0
4 NaN
dtype: float64
#改变数据的类型
s.apply(float)
Out[52]:
0 10.0
1 20.0
2 30.0
3 40.0
dtype: float64
s.astype(int)
Out[53]:
0 10
1 20
2 30
3 40
dtype: int32
#### 1.6数据的迭代 ####
for num in s:
print(num)
10
20
30
40
#数值存放在values里面
40 in s
Out[55]: False
40 in s.values
Out[56]: True
#0表示索引0
0 in s #series是key-value存储,所以这里实际是index o
Out[57]: True
#li是list类型的数据
li=[10,20,30,40]
#40是li的值
40 in li
Out[58]: True