python如何读取csv文件某几行某几列_[Python] 将csv文件中单个列的多个value拆分成多个列的单个value...

很久没有更新博客了,最近handle了一个项目需要处理大量csv文件,需求是将csv文件中某单个列的多个values(该多个vaules以回车分隔)拆分成多个相同列的单个value,由于python在处理脚本先天比shell更有优势,这里把自己的最近学到python知识加以应用,编写了一个脚本,供大家参考.

#!/usr/bin/env python

import csv

import sys

# Define the duality list

Csv_content_edited = []

# Define the file that needs to be handled.

try:

file_name = sys.argv[1]

new_file_name = file_name.split('.')[0] + '_new.' + file_name.split('.')[1]

except IndexError:

pass

# Define the collumns that need to be dealt with.

Field = ['Affects Version/s','Fix Version/s','Component/s']

"""

Search the index numbers of each components of "Field" list in the first row of csv file,

return the dic["field value":"the index of first_row"]

"""

def search_field_index(field, first_row):

fields_dic = {}

for x in first_row:

if x in field:

fields_dic[x] = first_row.index(x)

return fields_dic

'''

Find the maximum value number of each "Field" components in "Csv_content_edited" list,

then insert the actual number of the columns after each "Field" columns.

'''

def insert_max_col(field_index,Field_val):

# Define the list stored maximum number of the value of the field

index_len = []

# Define the list stored the value of the field

index_val = []

# Recursive the row from 2 ~ max of the csv field

for r in range(1,len(Csv_content_edited)):

# Deal with special string

c = repr(Csv_content_edited[r][field_index]).split("\\r")

index_len.append(len(c))

index_val.append(c)

# The maximum component of the "index_len" list

max_index_len = max(index_len)

# Clear " " and "'" string which are in the head or tail of each "index_val" components.

for iv in range(0,len(index_val)):

for _iv in range(0,len(index_val[iv])):

index_val[iv][_iv] = index_val[iv][_iv].strip(' \'')

# insert null value collumn after the field from "Field" list to the "Csv_content_edited" list

for i in range(0,len(Csv_content_edited)):

for m in range(0,max_index_len - 1):

Csv_content_edited[i].insert(field_index + m + 1,'')

# Update the first row of "Csv_content_edited" list to the field components from "Field" list

for i1 in range(field_index,field_index + max_index_len):

Csv_content_edited[0][i1] = Field_val

# Update the rest rows of "Csv_content_edited" list to the field components from "index_val" list

for i in range(1,len(Csv_content_edited)):

for i2 in range(field_index,field_index + max_index_len):

try:

Csv_content_edited[i][i2] = index_val[i-1][i2 - field_index]

except IndexError as s:

pass

# return Csv_content_edited[3],index_val,max_index_len

# return len(Csv_content_edited),len(index_val)

# print max_index_len

# print index_val[2][0]

# return len(index_val[2])

# Read and deal with the csv file.

def readContent(file_name):

# Read the csv file,then put it into list.

with open(file_name, 'r') as csvfile:

csv_reader = csv.reader(csvfile, delimiter=',')

for row in csv_reader:

if row[0] == 'Key':

first_row = []

first_row = row

# insert first row of csv to the list

Csv_content_edited.append(first_row)

else:

# insert rest row of csv to the list

Csv_content_edited.append(row)

else:

# Obtain all current "Field" index in "Csv_content_edited" list

fields_index = search_field_index(Field,first_row)

# Obtain the index of "Affects_Version" in in "Csv_content_edited" list

Affects_Version_index = fields_index[Field[0]]

# insert the matching value

insert_max_col(Affects_Version_index,Field[0])

# Obtain all current "Field" index in "Csv_content_edited" list

first_row_updated = Csv_content_edited[0]

fields_index = search_field_index(Field,first_row_updated)

# Obtain the index of "Affects_Version" in in "Csv_content_edited" list

Fix_Version_index = fields_index[Field[1]]

# insert the matching value

insert_max_col(Fix_Version_index,Field[1])

# Obtain all current "Field" index in "Csv_content_edited" list

first_row_updated = Csv_content_edited[0]

fields_index = search_field_index(Field,first_row_updated)

# Obtain the index of "Affects_Version" in in "Csv_content_edited" list

Fix_Version_index = fields_index[Field[2]]

# insert the matching value

insert_max_col(Fix_Version_index,Field[2])

# Obtain the final edited list.

return Csv_content_edited

# Write the csv file.

def writeContent():

with open(new_file_name,'wb') as csvfile:

csv_writer = csv.writer(csvfile)

csv_writer.writerows(readContent(file_name))

# Execute the finnal function.

if __name__ == '__main__':

try:

writeContent()

except (IOError,NameError,IndexError):

print "Please type the correct file name. e.g: '" + sys.argv[0] + " testfile.csv'"

else:

print 'The result file is: %s' %new_file_name

处理前的效果

QQ20150323-1.png

处理后的效果

QQ20150323-2.png

如有雷同,纯属扯淡...

正文部分到此结束

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值