用python写了段代码爬了一个彩票网站的历年数据从2013年至今,中间加了延时,以防被禁
以下爬虫代码记录:
import urllib.request
from bs4 import BeautifulSoup
import requests
import csv
def genData(urls,qishu,dict):
with open("dst.csv","w",encoding='utf_8_sig') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["number","date","red1","red2","red3","red4","red5","red6","blue"])
for di in dict.items():
qishu,url = di
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'}
req = requests.get(url, headers=headers)
req.encoding = 'gb18030'
soup = BeautifulSoup(req.text, "lxml")
human_list = soup.find(attrs={"class":