import requests
from bs4 import BeautifulSoup
import xlwt
import time
#获取第一页的内容
def get_one_page(url):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36'
}
response = requests.get(url,headers=headers)
if response.status_code == 200:
return response.text
return None
#解析第一页内容,数据结构化
def parse_one_page(html):
soup = BeautifulSoup(html,'lxml')
i = 0
for item in soup.select('tr')[2:-1]:
yield{
'time':item.select('td')[i].text,
'issue':item.select('td')[i+1].text,
'digits':item.select('td em')[0].text,
'ten_digits':item.select('td