拿到这个表的时候,我是想尽快直到最后这一列标红的数据,一共有多少个数字组成,所以,就用python分割处理了下,并计算出它的数字数量,即长度
详细代码:
import csv
import pandas as pd
if __name__ == '__main__':
list = []
width_num = []
with open(r'./illumina/protein/test.csv', encoding='utf-8') as file:
f_csv = csv.reader(file)
for i, rows in enumerate(f_csv):
if i >= 1:
width_rows = rows[10] #这一列是要处理的数据所在列
list.append(width_rows)
# print(list)
for list1 in list:
list2 = list1.split('|')
num_list2 = len(list2)
# print(list1, num_list2)
width_num.append({'geneid':rows[1], 'kuandu':list1, 'num':num_list2})
df = pd.DataFrame(width_num)
df.to_csv(r"./illumina/protein/width_count_num_32996.csv", index=0)