四周实现爬虫系统(2)---爬取租房信息

爬取小猪短租网 –300个详情页租房信息
1.实现每个租房详情页信息爬取
2.编写函数实现租房列表页网址获取
3.实现300个详情页租房信息

from bs4 import BeautifulSoup
import requests

url='http://sh.xiaozhu.com/fangzi/4187532729.html'
def get_info(url):
    web_data = requests.get(url)
    soup = BeautifulSoup(web_data.text, 'lxml')
    title = soup.select('div.pho_info > h4 ')[0].text
    address = soup.select('div.pho_info > p ')[0].get('title')
    # address=soup.select('div.pho_info > p > span')[0].text
    price = soup.select('div.day_l > span')[0].text
    img = soup.select('#curBigImage')[0].get('src')
    host_name = soup.select('a.lorder_name')[0].text
    host_gender = soup.select('div.member_pic > div')[0].get('class')[0]

    # print( title,address,price,img,host_name,host_gender)

    def print_gender(class_name):
        if class_name == 'member_ico1':
            return '女'
        if class_name == 'member_ico':
            return '男'

    data = {
        'title': title,
        'addrss': address,
        'price': price,
        'img': img,
        'host_name': host_name,
        'host_gender': print_gender(host_gender)
    }
    print(data)

page_link=[]
def get_page_link(page_number):
    for pageNo in range(1,page_number):
        full_url='http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(pageNo)
        wb_data=requests.get(full_url)
        soup=BeautifulSoup(wb_data.text,'lxml')
        for link in soup.select('a.resule_img_a'):
            link_url=link.get('href')#link是所有的<a>标签信息,单独获取网址取属性href
            #print(link.get('href'))
            page_link.append(link_url)
    #print(page_link)
    return page_link


urls=get_page_link(2)
#print(urls)
for single_url in urls:
    get_info(single_url)


#调用函数-实现网址的获取
get_page_link(2)//page_number:获取几页房源信息
'''
观察网页列表特征P1 P2 P3:
http://bj.xiaozhu.com/search-duanzufang-p1-0/
http://bj.xiaozhu.com/search-duanzufang-p2-0/
http://bj.xiaozhu.com/search-duanzufang-p3-0/
'''

结果展示:
这里写图片描述

阅读更多
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u010004460/article/details/52808168
文章标签: 爬虫
个人分类: python PyCharm 爬虫
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

关闭
关闭
关闭