用python写的爬虫练习,感觉比golang要好写一点。
1 import re 2 import urllib 3 4 origin_url = 'https://movie.douban.com/top250?start=00&filter=' 5 urls = [] 6 scores = [] 7 8 9 def get_url(): 10 step = 0 11 while step <= 250: 12 tmp = origin_url[:38] 13 tmp += str(step) 14 tmp += origin_url[40:] 15 urls.append(tmp) 16 step += 25 17 18 def get_html(url): 19 page = urllib.urlopen(url) 20 html = page.read() 21 return html 22 23 24 def get_score(html): 25 score = [] 26 reg = r'property="v:average">([0-9].[0-9])</span>' 27 score = re.findall(re.compile(reg), html) 28 return score 29 30 31 def solve(): 32 get_url() 33 for each in urls: 34 print each 35 scores.append(get_score(get_html(each))) 36 sum = 0 37 cnt = 0 38 for each in scores: 39 if cnt == 250: break 40 for i in range(0, len(each)): 41 if cnt == 250: break 42 cnt += 1 43 sum += float(each[i]) 44 return sum / 250 45 46 print solve()