在上网的时候,我还尝试了一个“问题中的规范”;—)#! /usr/bin/env python
"""Learn by example - grass roots parser for kind of serialized
race reports, where records are separated by a pipe (|), fields
of result records separated y a dash (-) and some meaning is associated
with the fields per position in record."""
from __future__ import print_function
def parse_duration_string(text, min_sep=':'):
"""Simplistic parser, splitting duration sof minutes:seconds.fract
into an ordered numerical pair of (integer, float). The minute portion
is set to 0 if no min_sep is detected."""
s_text = text.strip()
minutes = 0
if min_sep in s_text:
min_cand, sec_cand = s_text.split(min_sep)
minutes = int(min_cand)
seconds = float(sec_cand)
else:
seconds = float(s_text)
return minutes, seconds
def parse_sports_concat(text, token_set, rec_sep='|', field_sep='-'):
"""Split parse the text per record separator rec_sep,
token in token_ser and subsequently split parse the associated
result serialization (by field_sep) and use Occams razor
against the noise ;-)."""
shave_these = '( )'
race_map = {}
for record in text.split(rec_sep):
s_record = record.strip()
if not s_record:
continue
dist, in_secs, magic = s_record.split(field_sep)
key = dist.strip()
if key in token_set:
if key not in race_map:
race_map[key] = []
race_map[key].append(
(parse_duration_string(in_secs), magic.strip(shave_these)))
else:
print("Warning: Distance({0}) not found in TokenSet!".format(key))
return race_map
def main():
"""Test drive the split, parse, map."""
line_seq = (
('200m - 18.70 - (18.7)|600m - 47.50 - (28.8)|'
'1000m - 1:16.62 - (29.1)|1400m - 1:45.74 - (29.1)|'
'1800m - 2:14.95 - (29.2)|2200m - 2:44.16 - (29.2)|'
'2600m - 3:13.27 - (29.1)|3000m - 3:42.40 - (29.1)|'
'3400m - 4:11.70 - (29.3)|3800m - 4:41.11 - (29.4)|'
'4200m - 5:10.75 - (29.6)|4600m - 5:40.39 - (29.6)|'
'5000m - 6:10.44 - (30.0)|'),
'600m - 45.92 - (27.7)|1000m - 1:16.24 - (30.3)|',
('300m - 23.51 - (23.5)|700m - 48.80 - (25.2)|'
'1100m - 1:14.96 - (26.1)|1500m - 1:42.48 - (27.5)|'),
)
core_seq = (
1, 2, 3, 3, 6, 7, 10, 11, 14, 15, 18, 22, 26,
30, 34, 38, 42, 46, 50)
token_seq = tuple('%dm' % (z * 100,) for z in core_seq)
total_map = {}
for line in line_seq:
total_map.update(parse_sports_concat(line, set(token_seq)))
for distance in token_seq:
print('{0}: {1}'.format(distance,
total_map.get(distance, 'NotReported')))
if __name__ == '__main__':
main()
产量:
^{pr2}$
它只使用来自已解析映射的字符串的内置方法进行解析,通过合并多行更新来扩展它,还将时间解析为一对整数分钟和浮点秒。第三个filedcalle dmagic保持原样,但是从括号中去掉了,因为它现在安全地存储在一个固定的“插槽”中。在
注意,变量in_secs可能不是一个好名字,但谁知道呢。在
通过不调用parseduration_string()函数,可以很容易地使持续时间字符串保持原样。在
享受Python学习之旅-上面的代码片段都符合PEP8,这在Python land中通常有助于了解PEP8和Python模式/习惯用法,并从帮助其他人直接跳入问题区域的统一代码读取中获益。在
注:在race_图中插入预先测试距离的存在有点笨拙,但我想,显式的(尤其是在开始学习语言时)比隐式要好。在