from urllib import request
from bs4 import BeautifulSoup
import gzip
import csv
import time
def change(num):
if num == 0:
return "zero"
if num == 1:
return "one"
if num == 2:
return "two"
if num == 3:
return "three"
if num == 4:
return "four"
if num == 5:
return "five"
if num == 6:
return "six"
if num == 7:
return "seven"
if num == 8:
return "eight"
if num == 9:
return "nine"
if num == 10:
return "ten"
if num == 11:
return "eleven"
if num == 12:
return "twelve"
if num == 13:
return "thirteen"
if num == 14:
return "fourteen"
if num == 15:
return "fifteen"
if num == 16:
return "sixteen"
if num == 17:
return "seventeen"
if num == 18:
return "eighteen"
if num == 19:
return "nineteen"
if num == 20:
return "twenty"
if num == 21:
return "twenty-one"
if num == 22:
return "twenty-two"
if num == 23:
return "twenty-three"
if num == 24:
return "twenty-four"
if num == 25:
return "twenty-five"
if num == 26:
return "twenty-six"
if num == 27:
return "twenty-seven"
if num == 28:
return "twenty-eight"
if num == 29:
return "twenty-nine"
if num == 30:
return "thirty"
if num == 31:
return "thirty-one"
if num == 32:
return "thirty-two"
if num == 33:
return "thirty-three"
if num == 34:
return "thirty-four"
def get_data(num, zero):
time.sleep(3)
url = 'http://kaijiang.500.com/shtml/ssq/'+str(zero)+str(num)+'.shtml'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36'}
req = request.Request(url, headers = headers)
response = request.urlopen(req)
if response.getcode() == 200:
data = response.read()
#data = gzip.decompress(data)
try:
# 可能发生异常的代码或自己使用raise抛出异常
data = str(data, encoding='gbk', errors='ignore')
except:
# 发生异常时处理代码
data = gzip.decompress(data)
data = str(data, encoding='gbk', errors='ignore')
with open('index.html', mode='w', encoding='gbk') as f:
f.write(data)
def parse_data():
with open('index.html', mode='r', encoding='gbk') as f:
html = f.read()
bs = BeautifulSoup(html, 'html.parser')
'''value = bs.select('#hello')[0].get_text.strip()
print(value)'''
divs_num = bs.find_all(class_='cfont2')
divs_red = bs.find_all(class_ = 'ball_red')
divs_blue = bs.find_all(class_='ball_blue')
#divs = bs.select('#bballbox1')
num = []
red = []
blue = []
for i in divs_num:
num.append(i.getText())
for i in divs_red:
n = change(int(i.getText()))
red.append(n)
for i in divs_blue:
n = change(int(i.getText()))
blue.append(n)
print(num[0])
f = open('ssq.csv', 'a', encoding="utf-8")
writer = csv.writer(f, lineterminator='\n')
writer.writerow((num[0], red[0], red[1], red[2], red[3], red[4], red[5], blue[0]))
f.close()
if __name__ == '__main__':
for i in range(3001,3090):
zero = "0"
get_data(i,zero)
parse_data()
for i in range(4001,4121):
zero = "0"
get_data(i,zero)
parse_data()
for i in range(5001,5154):
zero = "0"
get_data(i,zero)
parse_data()
for i in range(6001,6155):
zero = "0"
get_data(i,zero)
parse_data()
for i in range(7001,7154):
zero = "0"
get_data(i,zero)
parse_data()
for i in range(8001,8155):
zero = "0"
get_data(i,zero)
parse_data()
for i in range(9001,9155):
zero = "0"
get_data(i,zero)
parse_data()
for i in range(10001,10154):
zero = ""
get_data(i,zero)
parse_data()
for i in range(11001,11154):
zero = ""
get_data(i,zero)
parse_data()
for i in range(12001,12155):
zero = ""
get_data(i,zero)
parse_data()
for i in range(13001,13155):
zero = ""
get_data(i,zero)
parse_data()
for i in range(14001,14153):
zero = ""
get_data(i,zero)
parse_data()
for i in range(15001,15155):
zero = ""
get_data(i,zero)
parse_data()
for i in range(16001,16154):
zero = ""
get_data(i,zero)
parse_data()
for i in range(17001,17155):
zero = ""
get_data(i,zero)
parse_data()
for i in range(18001,18154):
zero = ""
get_data(i,zero)
parse_data()
for i in range(19001,19152):
zero = ""
get_data(i,zero)
parse_data()
for i in range(20001,20076):
zero = ""
get_data(i,zero)
parse_data()