所获取信息的url来自于列表页,而非详情页
from bs4 import BeautifulSoup
import requests
import time
import pymongo
client=pymongo.MongoClient('localhost',27017) #引用MongoDB
Ceshi=client['Ceshi'] #命名
sheet_tab=Ceshi['sheet_tab'] #类似于EXCEL中的sheet
'''
url = 'http://sz.xiaozhu.com/search-duanzufang-p2-0/'
'''
def get_more_pages(page):
for page_num in range(1,page):
web_data=requests.get('http://sz.xiaozhu.com/search-duanzufang-p{}-0/'.format(page_num)) #爬取多页
soup=BeautifulSoup(web_data.text,'lxml')
titles=soup.select(' div.result_btm_con.lodgeunitname > div > a > span')
prices=soup.select('span.result_price > i')
for title,price in zip(titles,prices):
data={
'title':title.get_text(),
'price':price.get_text(),
}
sheet_tab.insert_one(data)
print('Done')
get_more_pages(10)
#数据筛选
for i in sheet_tab:
if i['price'] > 500:
print (i)