爬取双色球历史数据
爬去地址:http://baidu.lecai.com/lottery/draw/list/50
需要的包:BeautifulSoup install
脚本如下 运行后生成的数据json处理后存在data文件中
# -*- coding: utf-8 -*-
import urllib
import re
import json
from bs4 import BeautifulSoup
li = []
for year in range(2003, 2015):
print year
htmlcon = urllib.urlopen('http://baidu.lecai.com/lottery/draw/list/50?d=%s-01-01' % year)
html = htmlcon.read()
htmlcon.close()
soup = BeautifulSoup(html)
table_html_set = soup.findAll(id='draw_list')
num_tuple_list = []
for table_html in table_html_set:
tr_html_set = table_html.findAll('tr')
for tr_html in tr_html_set:
span_html_set = tr_html.findAll('span', attrs={'class': re.compile('^ball_')})
num_