我准备了以下脚本:from datetime import datetime # to parse your string as a date
from collections import defaultdict # to accumulate frequencies
import calendar # to get the names of the months
# Store the names of the months
MONTHS = [item for item in calendar.month_name]
def entries(filename):
"""Yields triplets (animal, date, station) contained in
`filename`.
"""
with open(filename, "rb") as fp:
for line in (_line.strip() for _line in fp):
# skip comments
if line.startswith("#"):
continue
try:
# obtain the entry or try next line
animal, datestr, station = line.split(":")
except ValueError:
continue
# convert date string to actual datetime object
date = datetime.strptime(datestr, "%m-%d-%Y")
# yield the value
yield animal, date, station
def visits_per_animal(data):
"""Count of visits per station sorted by animal."""
# create a dictionary whose value is implicitly created to an
# integer=0
counter = defaultdict(int)
for animal, date, station in data:
counter[animal] += 1
# print the outcome
print "Visits Per Animal"
for animal in sorted(counter.keys()):
print "{0}: {1}".format(animal, counter[animal])
def month_of_highest_frequency(data):
"""Calulates the month with the highest frequency."""
# same as above: a dictionary implicitly creation integer=0 for a
# new key
counter = defaultdict(int)
for animal, date, station in data:
counter[date.month] += 1
# select the (key, value) where value is maximum
month_max, visits_max = max(counter.iteritems(), key=lambda t: t[1])
# pretty-print
print "{0} has the most visits ({1})".format(MONTHS[month_max], visits_max)
def main(filename):
"""main program: get data, and apply functions"""
data = [entry for entry in entries(filename)]
visits_per_animal(data)
month_of_highest_frequency(data)
if __name__ == "__main__":
import sys
main(sys.argv[1])
用作:
^{pr2}$
既然这样做了我必须建议你不要使用这种方法。像这样查询数据非常低效、困难而且容易出错。我建议您将数据存储在一个实际的数据库中(Python为SQlite提供了一个优秀的绑定),并使用SQL来进行缩减。在
如果您采用SQlite原理,您只需将查询存储为纯文本文件,并按需运行它们(通过Python、GUI或命令行)。在