import requests
from bs4 import BeautifulSoup
def get_html(url,headers):
r = requests.get(url,headers=headers)
r.encoding = r.apparent_encoding
return r.text
def get_pages(html):
soup = BeautifulSoup(html,'html.parser')
all_topics=soup.find_all('tr')[1:]
for each_topic in all_topics:
topic_times = each_topic.find('td', class_='last') # 搜索指数
topic_rank = each_topic.find('td', class_='first') # 排名
topic_name = each_topic.find('td', class_='keyword') # 标题目
if topic_rank != None and topic_name != None and topic_times != None:
topic_rank = each_topic.find('td', class_='first').get_text().replace(' ', '').replace('\n', '')
topic_name = each_topic.f