import urllib
import re
source = urllib.urlopen('http://disneyworld.disney.go.com/parks/magic-kingdom/c\
alendar/')
page = source.read()
prkhrs = {}
def main():
parsehours()
def parsehours():
#look for #:## AM - #:## PM
date = r'201204\d{02}'
hours = r'\d:0{2}\s\w{2}\s-\s\d:0{2}\s\w{2}'
#go through page line by line
for line in page:
times = re.findall(hours, line)
dates = re.search(date, line)
if dates:
start = dates.start()
end = dates.end()
curdate = line[start:end]
#if #:## - #:## is found, a date has been found
if times:
#create dictionary from date, stores hours in variable
#extra magic hours(emh) are stored in same format.
#if entry has 2/3 hour listings, those listings are emh
prkhrs[curdate]['hours'] = times
#just print hours for now. will change later
print prkhrs我遇到的问题是,当我将'打印线'放入通过页面的for循环中时,它会一次打印出一个字符,我假设这是搞乱了什么。
现在,'打印prkhrs'只是打印任何东西,但使用re.findall日期和小时打印出正确的时间,所以我知道正则表达式的作品。关于如何让它运作的任何建议?