需要twisted, pymongo,BeautifulSoup. #!/usr/bin/env python # encoding: utf-8 """ lott.py Created by Daniel Yang on 2011-01-23. Copyright (c) 2011 Yang. All rights reserved. """ import sys import os from twisted.internet import reactor, defer from twisted.web.client import getPage from BeautifulSoup import BeautifulSoup from pymongo import Connection connection = Connection() db = connection.lott table = db.double_color BASE_URL = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_%s.html" def handleError(error): print 'got errpr', error def processPage(pageContent): print 'got page', len(pageContent) soup = BeautifulSoup(pageContent) trs = soup.findAll('tr') for tr in trs: tds = tr.findAll('td') if len(tds) == 7: id = table.insert({'date':tds[0].text, 'issue': tds[1].text, 'lott':tds[2].text, 'amount':tds[3].text, 'first':tds[4].text, 'second':tds[5].text}, safe=True); print 'inserted ', id for i in range(1, 59): url = BASE_URL % i pageFetchedDeferred = getPage(url) pageFetchedDeferred.addCallback(processPage) pageFetchedDeferred.addErrback(handleError) reactor.run()