实现代码:
import requests
import re
from bs4 import BeautifulSoup
url='http://www.cntour.cn/'#这里是要爬取的网站
strhtml=requests.get(url)
soup=BeautifulSoup(strhtml.text,'lxml')
data=soup.select('html body div#main div.wrapper div.mtop.firstMod.clearfix div.leftBox div.ui-tabs-panel ul.news li a')#这是过滤规则
with open('resource/temp.txt', 'w+'