Python pandas tutorial


down load “pokemon.csv” at:
https://gist.github.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6
RAW:
https://gist.githubusercontent.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv
镜像文件:
https://blog.csdn.net/sway_wu/article/details/103590248

import pandas and basic set up

import pandas as pd
import numpy as np
import re

pd.set_option('display.width', None)
df = pd.read_csv('pokemon.csv')

read headers, data types, index, columns value

print(df.columns)
print(df.values)
print(len(df.index))
print(df.describe())

print first or last row of file

print(df.head(5))
print(df[0:5])

print(df.tail(6))
print(df[len(df.index)-6:len(df.index)])
print(df[-6:len(df.index)])

slicing records

print(df[['Name', 'HP']])
print(df.Name)

print(df[['HP', 'Attack']])
print(df.loc[:, ['HP', 'Attack']])

print(df.loc[3:5,['Total','Attack']])
print(df.iloc[3:6,[4,6]])

print(df.iloc[0:4])
print(df.iloc[1, 4])

print(df.loc[df['Type 1'] == "Grass"])

sorting/Describing Data

print(df.sort_values('Name',ascending=False))
print(df.sort_values(['Type 1','HP'], ascending=[True, False]))

renaming columns and moving columns

df. rename(columns = {'Sp. Atk' : 'sp_atk'}, inplace= True)
df. rename(columns = {'Sp. Def' : 'sp_def'}, inplace= True)
df = df[['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'sp_atk', \
         'sp_def', 'Speed', 'Total', 'Generation','Legendary']]
df.columns = ['#', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l']

assignment and Making changes to the data

df.loc[9, ['HP']] = 110
print(df.iloc[9:10])

df1 = df.where(pd.notnull(df), 'None')      # change NaN to None

df['Total_x'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
df['Total_y'] = df.iloc[:,5:11].sum(axis=1)

print(df.sort_values(['Total_x'], ascending=False))
df = df.drop(columns=['Total_x','Total'])

cols = list(df.columns.values)
print(cols)
df = df[cols[0:4] + [cols[len(cols)-1]] + cols[4:12]]
cols = list(df.columns.values)
print(cols)

save data_frame

df.to_csv('modified.txt', index=False, sep='\t')

Filtering None

print(df[df['Type 2'].notnull()])

temp_list1 = df['Type 2'].notnull()
temp_list2 = [not i for i in temp_list1]
print(df[temp_list2])

Filtering Data

print(df[df.HP >= 200])
print(df[df['Type 1'].isin(['Flying','Fairy'])])


df.loc[3:5, 'Speed'] = np.array([5]*3)  #change Speed value
df.loc[:,'Speed'] = np.array([5]*len(df))

df['av_Sp'] = (df['Sp. Atk'] + df['Sp. Def'])/2

new_df = df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] >= 70)]

new_df = new_df.reset_index(drop=True)

print(df.loc[df['Name'].str.contains('Mega')])
print(df.loc[~df['Name'].str.contains('Mega')])

Conditional Changes

print(df.loc[df['Type 1'].str.contains('FIRE|Grass', flags=re.I, regex=True)])
print(df.loc[df['Name'].str.contains('^pi[a-z]*', flags=re.I, regex=True)])
df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer'
df.loc[df['Total'] > 500, ['Generation','Legendary']] = ['Test 1', 'Test 2']

Aggregate Statistics(Group by)

print(df.groupby(['Type 1']).count().sort_values('#', ascending=True))
print(df.groupby(['Type 1']).mean().sort_values('Attack', ascending=True))
df_count = df.groupby(['Type 1']).count()
df_count = df.groupby(['Type 1','Type 2']).count()

iterate a data frame

for index, row in df.iterrows():
    print(index, row['Type 1'],row['HP'])

for df in pd.read_csv('modified.csv', chunksize=5):
    print(df)

new_df = pd.DataFrame(columns=df.columns)

print(new_df)

for df2 in pd.read_csv('pokemon.csv', chunksize=5):
    print(df2)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值