目的:用这个网址http://www.jianshu.com/p/ef1028a4668e介绍的方法所爬取的内容放入mongodb中,并查询价格超过500的信息。
from bs4 import BeautifulSoupimport requestsimport pymongoclient = pymongo.MongoClient('localhost',27017)xiaozu = client['xiaozu']xinxi = xiaozu['xinxi']def sex_judge(sex): for isex in sex: if isex == 'member_ico': return 'male' elif isex == 'member_ico1': return 'female' else: return 'None'end_page = input('end_page:')for i in range(1, int(end_page)): base_url = 'http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(i) #print(base_url) html1 = requests.get(base_url) bsObj1 = BeautifulSoup(html1.text, 'lxml') detail_url = bsObj1.find_all('a', {'class':'resule_img_a'}) for i in detail_url: html2 = r