Task:
1. 在福彩官网抓取所有的历史双色球数据。
2. 使用历史数据进行继续学习。
Part1 数据抓取
main.pyimport re
from bs4 import BeautifulSoup
import urllib2
from mylog import MyLog as mylog
from save2excel import SavaBallDate
class DoubleColorBallItem(object):
date = None
order = None
red1 = None
red2 = None
red3 = None
red4 = None
red5 = None
red6 = None
blue = None
money = None
firstPrize = None
secondPrize = None
class GetDoubleColorBallNumber(object):
'''这个类用于获取双色球中奖号码, 返回一个txt文件
'''
def __init__(self):
self.urls =
self.log = mylog()
self.getUrls()
self.items = self.spider(self.urls)
self.pipelines(self.items)
self.log.info('beging save data to excel \r\n')
SavaBallDate(self.items)
self.log.info('save data to excel end ...\r\n')
def getUrls(self):
'''获取数据来源网页
'''
URL = r'http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html'
htmlContent = self.getResponseContent(URL)
soup = BeautifulSoup(htmlContent, 'lxml')
tag = soup.find_all(re.compile('p'))[-1]
pages = tag.strong.get_text()
for i in xrange(1, int(pages)+1):
u