使用[pandas]处理CSV文件
import csv
import numpy as np
import pandas as pd
DATA_PATH = 'test.csv'
1 读CSV
data = pd.read_csv(DATA_PATH)
print('返回数据格式:', type(data))
col_labels = np.array(data.columns)
print('==> labels of cols:', col_labels)
values = np.array(data.values)
print('==> values:', values)
shape = data.shape
print('==> shape:', shape)
size = data.size
print('==> size:', size)
ndim = data.ndim
print('==> ndim:', ndim)
axes = data.axes
print('==> axes', axes)
names = data['name']
print('==> names:', names)
print()
print('===> Original data:')
data
返回数据格式: <class 'pandas.core.frame.DataFrame'>
==> labels of cols: ['name' 'sex' 'age' 'score']
==> values: [['Tom' 'man' 20 220]
['Jim' 'man' 22 225]
['Mary' 'woman' 24 205]]
==> shape: (3, 4)
==> size: 12
==> ndim: 2
==> axes [RangeIndex(start=0, stop=3, step=1), Index(['name', 'sex', 'age', 'score'], dtype='object')]
==> names: 0 Tom
1 Jim
2 Mary
Name: name, dtype: object
===> Original data:
| name | sex | age | score |
---|
0 | Tom | man | 20 | 220 |
---|
1 | Jim | man | 22 | 225 |
---|
2 | Mary | woman | 24 | 205 |
---|
2 增加列
id = pd.Series(['12138', '12139', '12140'], name='ID')
new_data = data.join(id)
new_data
| name | sex | age | score | ID |
---|
0 | Tom | man | 20 | 220 | 12138 |
---|
1 | Jim | man | 22 | 225 | 12139 |
---|
2 | Mary | woman | 24 | 205 | 12140 |
---|
3 增加行
def add_element(attr, val):
temp = attr.to_list()
temp.append(val)
return pd.Series(temp, name=attr.name)
def add_row(old_data, values):
index = old_data.columns.to_list()
temp = []
for i in range(len(index)):
temp.append(add_element(old_data[index[i]], values[i]))
new_data = pd.DataFrame(temp)
return new_data.T
new_data = add_row(new_data, ['Jane', 'woman', 20, 236, '12141'])
new_data
| name | sex | age | score | ID |
---|
0 | Tom | man | 20 | 220 | 12138 |
---|
1 | Jim | man | 22 | 225 | 12139 |
---|
2 | Mary | woman | 24 | 205 | 12140 |
---|
3 | Jane | woman | 20 | 236 | 12141 |
---|
4 删除列
new_data_2 = new_data.drop('ID', axis=1)
new_data_2
| name | sex | age | score |
---|
0 | Tom | man | 20 | 220 |
---|
1 | Jim | man | 22 | 225 |
---|
2 | Mary | woman | 24 | 205 |
---|
3 | Jane | woman | 20 | 236 |
---|
5 删除行
new_data_3 = new_data.drop([1,2], axis=0)
new_data_3
| name | sex | age | score | ID |
---|
0 | Tom | man | 20 | 220 | 12138 |
---|
3 | Jane | woman | 20 | 236 | 12141 |
---|
6 修改值
new_data['score'][0] = 300
new_data['name'][0] = 'Tim'
new_data
| name | sex | age | score | ID |
---|
0 | Tim | man | 20 | 300 | 12138 |
---|
1 | Jim | man | 22 | 225 | 12139 |
---|
2 | Mary | woman | 24 | 205 | 12140 |
---|
3 | Jane | woman | 20 | 236 | 12141 |
---|
7 写入CSV
new_data.to_csv('new_test.csv')