python:爬取双色球03年至20年中奖号码

from urllib import request
from bs4 import BeautifulSoup
import gzip
import csv
import time

def change(num):
    if num == 0:
        return "zero"
    if num == 1:
        return "one"
    if num == 2:
        return "two"
    if num == 3:
        return "three"
    if num == 4:
        return "four"
    if num == 5:
        return "five"
    if num == 6:
        return "six"
    if num == 7:
        return "seven"
    if num == 8:
        return "eight"
    if num == 9:
        return "nine"
    if num == 10:
        return "ten"
    if num == 11:
        return "eleven"
    if num == 12:
        return "twelve"
    if num == 13:
        return "thirteen"
    if num == 14:
        return "fourteen"
    if num == 15:
        return "fifteen"
    if num == 16:
        return "sixteen"
    if num == 17:
        return "seventeen"
    if num == 18:
        return "eighteen"
    if num == 19:
        return "nineteen"
    if num == 20:
        return "twenty"
    if num == 21:
        return "twenty-one"
    if num == 22:
        return "twenty-two"
    if num == 23:
        return "twenty-three"
    if num == 24:
        return "twenty-four"
    if num == 25:
        return "twenty-five"
    if num == 26:
        return "twenty-six"
    if num == 27:
        return "twenty-seven"
    if num == 28:
        return "twenty-eight"
    if num == 29:
        return "twenty-nine"
    if num == 30:
        return "thirty"
    if num == 31:
        return "thirty-one"
    if num == 32:
        return "thirty-two"
    if num == 33:
        return "thirty-three"
    if num == 34:
        return "thirty-four"


def get_data(num, zero):

    time.sleep(3)
    url = 'http://kaijiang.500.com/shtml/ssq/'+str(zero)+str(num)+'.shtml'
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36'}
    req = request.Request(url, headers = headers)
    response = request.urlopen(req)

    if response.getcode() == 200:
        data = response.read()
        #data = gzip.decompress(data)

        try:
        # 可能发生异常的代码或自己使用raise抛出异常
            data = str(data, encoding='gbk', errors='ignore')
        except:
        # 发生异常时处理代码
            data = gzip.decompress(data)
            data = str(data, encoding='gbk', errors='ignore')

        with open('index.html', mode='w', encoding='gbk') as f:
            f.write(data)


def parse_data():
    with open('index.html', mode='r', encoding='gbk') as f:
            html = f.read()
    bs = BeautifulSoup(html, 'html.parser')

    '''value = bs.select('#hello')[0].get_text.strip()
    print(value)'''
    divs_num = bs.find_all(class_='cfont2')
    divs_red = bs.find_all(class_ = 'ball_red')
    divs_blue = bs.find_all(class_='ball_blue')

    #divs = bs.select('#bballbox1')
    num = []
    red = []
    blue = []
    for i in divs_num:
        num.append(i.getText())
    for i in divs_red:
        n = change(int(i.getText()))
        red.append(n)
    for i in divs_blue:
        n = change(int(i.getText()))
        blue.append(n)

    print(num[0])
    f = open('ssq.csv', 'a', encoding="utf-8")
    writer = csv.writer(f, lineterminator='\n')
    writer.writerow((num[0], red[0], red[1], red[2], red[3], red[4], red[5], blue[0]))
    f.close()




if __name__ == '__main__':


    for i in range(3001,3090):
        zero = "0"
        get_data(i,zero)
        parse_data()
    for i in range(4001,4121):
        zero = "0"
        get_data(i,zero)
        parse_data()
    for i in range(5001,5154):
        zero = "0"
        get_data(i,zero)
        parse_data()
    for i in range(6001,6155):
        zero = "0"
        get_data(i,zero)
        parse_data()
    for i in range(7001,7154):
        zero = "0"
        get_data(i,zero)
        parse_data()
    for i in range(8001,8155):
        zero = "0"
        get_data(i,zero)
        parse_data()
    for i in range(9001,9155):
        zero = "0"
        get_data(i,zero)
        parse_data()
    for i in range(10001,10154):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(11001,11154):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(12001,12155):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(13001,13155):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(14001,14153):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(15001,15155):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(16001,16154):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(17001,17155):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(18001,18154):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(19001,19152):
        zero = ""
        get_data(i,zero)
        parse_data()
    for i in range(20001,20076):
        zero = ""
        get_data(i,zero)
        parse_data()

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值