import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt
index=1
if(index==1):
# 创建一个Dataframe
data = pd.DataFrame(np.arange(20).reshape(4, 5), index=list('abcd'), columns=list('ABCDE'))
print(data)
print('取出前三行')
d1=data[:3]
print(d1)
print('取出第三行')
d2=data[3:]
print(d2)
# print(data)# 取第一行数据,索引为'a'的行就是第一行,所以结果相同
print(data.iloc[0])
print('行',data.shape[0])
print('列',data.shape[1])
print(data.iloc[0].value_counts())
print(data.iloc[0].value_counts().index)
print(data.loc[:,['A']])
print(data.iloc[:,[0]])
print('*********',4,'***************************');
# print(data.loc[['a','b'],['A','B']])
print('*********',5,'***************************');
print(data.iloc[[0,1],[0,1]])
print('*********',6,'***************************');
print(data.loc[:,:])
print('*********',7,'***************************');
print(data.iloc[:,:])
print('*********',8,'***************************');
print(data.loc[data['A']==0])
print('*********',9,'***************************');
print(data.loc[(data['A']==0)&(data['B']==2)])
print('*********',10,'***************************');
print(data[data['A']==0]) #dataframe用法)
print('*********',11,'***************************');
print(data[data['A'].isin([0])]) #isin函数)
print('*********',12,'***************************');
print(data[(data['A']==0)&(data['B']==2)]) #dataframe用法)
print('*********',13,'***************************');
print(data[(data['A'].isin([0]))&(data['B'].isin([2]))]) #isin函数
if(index==2):
#Series
s = pd.Series([11., 12., 13.], name='S')
data = np.arange(21, 24)
df = pd.DataFrame({'A': [1., 2.], 'B': [3, 4]})
a = df.as_matrix(columns=['A', 'B'])
b = df.as_matrix(columns=['A', 'B'])
print(a.shape)
print(b.shape)
# b=df.values()
#
c = np.mat(a)
d = np.mat(b)
e = np.matmul(c, d)
print(type(e))
print(e)
A = df['A'].item()
print(type(A))
print(len(A))
for i in A[3::-1]:
print(df.loc[i, 'A'])
if(index==3):
#concat
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},index = [0, 1, 2, 3])
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
'B': ['B4', 'B5', 'B6', 'B7'],
'C': ['C4', 'C5', 'C6', 'C7'],
'D': ['D4', 'D5', 'D6', 'D7']},index = [4, 5, 6, 7])
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
'B': ['B8', 'B9', 'B10', 'B11'],
'C': ['C8', 'C9', 'C10', 'C11'],
'D': ['D8', 'D9', 'D10', 'D11']},index = [8, 9, 10, 11])
frames = [df1, df2, df3]
result = pd.concat(frames)
if(index==4):
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['', 'D1', 'D2', 'D3']},index = [0, 1, 2, 3])
df1.dropna(inplace=True)
print(df1)
# 转成array
s=np.array(df1)
print(s)
if(index==6):
df.eval("""
.....: e = 气温 + 湿度
.....: f = 气温 - 湿度
.....: g = 气温 / 2.0""", inplace=True)
df.eval('new1 = 气温 + 湿度 + PM2P5', inplace=True)
df.query()
df.eval("""
.....: e = 气温 + 湿度
.....: f = 气温 - 湿度
.....: g = 气温 / 2.0""", inplace=True)
if(index==7):
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df['B']=df['A'].apply(lambda x:1 if x==2 else -1)
S=df['A']
S=S.map(lambda x:x+3)
df['A']=S
print(df)
df.to_csv(r'D:\wks\wks_ml\demo\test.csv',index=False)
df.to_csv(r'D:\wks\wks_ml\demo\test1.csv',index=True)
if (index == 8):
df = pd.DataFrame({'A': [np.nan, 2], 'B': [3, 4]})
s=df['A']
s=s.fillna(value=-1)
df['A']=s
print(df)
if (index == 9):
# df = pd.DataFrame(np.arange(6).reshape(2, 3), index=("AA", "BB"), columns=["three", "two", "one"])
df = pd.DataFrame({'A': [np.nan, 2], 'B': [3, 4]})
df=df.loc[1,:].to_frame()
df2 = pd.DataFrame(df.values.T, index=df.columns, columns=df.index)
df2.reset_index(drop=True, inplace=True)
print(df2)
if (index == 10):
# 概率分布直方图
# 高斯分布
# 均值为0
mean = 0
# 标准差为1,反应数据集中还是分散的值
sigma = 1
x = mean + sigma * np.random.randn(10000)
fig, (ax0, ax1) = plt.subplots(nrows=2, figsize=(9, 6))
# 第二个参数是柱子宽一些还是窄一些,越大越窄越密
ax0.hist(x, 40, normed=1, histtype='bar', facecolor='yellowgreen', alpha=0.75)
##pdf概率分布图,一万个数落在某个区间内的数有多少个
ax0.set_title('pdf')
ax1.hist(x, 20, normed=1, histtype='bar', facecolor='pink', alpha=0.75, cumulative=True, rwidth=0.8)
# cdf累计概率函数,cumulative累计。比如需要统计小于5的数的概率
ax1.set_title("cdf")
fig.subplots_adjust(hspace=0.4)
plt.show()
sys.exit(0)