可以使用list的defaultdict按ID列对数据分组。然后使用sum()生成总数。在from collections import defaultdict
with open('datafile.csv') as f:
d = defaultdict(list)
next(f) # skip first header line
next(f) # skip second header line
for line in f:
id_, value, amount = line.split()
d[id_].append((int(value), int(amount)))
# sum and average of column Value by ID
for id_ in d:
total = sum(t[0] for t in d[id_])
average = total / float(len(d[id_]))
print('{}: sum = {}, avg = {:.2f}'.format(id_, total, average))
输入数据的输出:
^{pr2}$
它也可以用标准的Python字典完成。解决方案非常相似:with open('datafile.csv') as f:
d = {}
next(f) # skip first header line
next(f) # skip second header line
for line in f:
id_, value, amount = line.split()
d[id_] = d.get(id_, []) + [(int(value), int(amount))]
# sum and average of column Value by ID
for id_ in d:
total = sum(t[0] for t in d[id_])
average = total / float(len(d[id_]))
print('{}: sum = {}, avg = {:.2f}'.format(id_, total, average))