使用pandas处理CSV文件

使用[pandas]处理CSV文件

import csv
import numpy as np
import pandas as pd

DATA_PATH = 'test.csv'   # 文件路径

1 读CSV

# 读CSV
data = pd.read_csv(DATA_PATH)
print('返回数据格式:', type(data))

col_labels = np.array(data.columns)   # labels
print('==> labels of cols:', col_labels)
values = np.array(data.values)    # values
print('==> values:', values)
shape = data.shape                    # shape
print('==> shape:', shape)
size = data.size                      # size
print('==> size:', size)
ndim = data.ndim                      # ndim  
print('==> ndim:', ndim)
axes = data.axes                      # axes
print('==> axes', axes)

# 使用索引获取该列的所有值
# 返回数据格式为 pandas.core.series.Series
names = data['name']                  
print('==> names:', names)
# 
print()
print('===> Original data:')
data
返回数据格式: <class 'pandas.core.frame.DataFrame'>
==> labels of cols: ['name' 'sex' 'age' 'score']
==> values: [['Tom' 'man' 20 220]
 ['Jim' 'man' 22 225]
 ['Mary' 'woman' 24 205]]
==> shape: (3, 4)
==> size: 12
==> ndim: 2
==> axes [RangeIndex(start=0, stop=3, step=1), Index(['name', 'sex', 'age', 'score'], dtype='object')]
==> names: 0     Tom
1     Jim
2    Mary
Name: name, dtype: object

===> Original data:
namesexagescore
0Tomman20220
1Jimman22225
2Marywoman24205

2 增加列

# 增加列
id = pd.Series(['12138', '12139', '12140'], name='ID')
new_data = data.join(id) 
new_data
namesexagescoreID
0Tomman2022012138
1Jimman2222512139
2Marywoman2420512140

3 增加行

# 增加行
def add_element(attr, val):
    temp = attr.to_list()
    temp.append(val)
    return pd.Series(temp, name=attr.name)

def add_row(old_data, values):
    index = old_data.columns.to_list()
    temp = []
    for i in range(len(index)):
        temp.append(add_element(old_data[index[i]], values[i]))
    new_data = pd.DataFrame(temp)
    return new_data.T

new_data = add_row(new_data, ['Jane', 'woman', 20, 236, '12141'])
new_data
namesexagescoreID
0Tomman2022012138
1Jimman2222512139
2Marywoman2420512140
3Janewoman2023612141

4 删除列

new_data_2 = new_data.drop('ID', axis=1)
new_data_2
namesexagescore
0Tomman20220
1Jimman22225
2Marywoman24205
3Janewoman20236

5 删除行

new_data_3 = new_data.drop([1,2], axis=0)
new_data_3
namesexagescoreID
0Tomman2022012138
3Janewoman2023612141

6 修改值

# 修改值
new_data['score'][0] = 300
new_data['name'][0] = 'Tim'
new_data
namesexagescoreID
0Timman2030012138
1Jimman2222512139
2Marywoman2420512140
3Janewoman2023612141

7 写入CSV

# 写入新文件
new_data.to_csv('new_test.csv')
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值