Python实战计划学习作业1-3

代码如下:

from bs4 import BeautifulSoup
import requests

urls = ["http://bj.xiaozhu.com/search-duanzufang-p{}-0/".format(str(i)) for i in range(2, 5, 1)]
urls.insert(0, "http://bj.xiaozhu.com/")
max_of_pages = 30


def get_detail_page_url(url):
    wb_data = requests.get(url)
    soup = BeautifulSoup(wb_data.text, 'lxml')
    page_urls = soup.select("#page_list > ul > li > a")
    return [detail_page_url.get("href") for detail_page_url in page_urls]


def analyze_detail_page(url_str, num):
    page_data = requests.get(url_str)
    soup = BeautifulSoup(page_data.text, 'lxml')

    title = soup.select("body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em")
    address = soup.select("body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p")
    monthly_rental = soup.select("#pricePart > div.day_l > span")
    room_image = soup.select("#curBigImage")
    owner_image = soup.select("#floatRightBox > div.js_box.clearfix > div.member_pic > a > img")
    owner_name = soup.select("#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a")
    owner_sex_blocks = soup.select("#floatRightBox > div.js_box.clearfix > div.w_240 > h6")

    if len(owner_sex_blocks[0].find_all("span", class_="member_girl_ico")) > 0:
        sex = "female"
    else:
        sex = "male"

    print("Number: " + str(num))
    print("Title: " + title[0].get_text())
    print("Address: " + address[0].get("title"))
    print("Monthly rental: " + monthly_rental[0].get_text())
    print("Room image: " + room_image[0].get("src"))
    print("Owner image: " + owner_image[0].get("src"))
    print("Owner name: " + owner_name[0].get_text())
    print("Owner sex: " + sex)
    print("-------------------------------------------------------")


if __name__ == '__main__':
    count = 1
    for url in urls:
        for page_url in get_detail_page_url(url):
            if count < max_of_pages:
                analyze_detail_page(page_url, count)
                count += 1
            else:
                break

结果如下:

/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5 /Users/reed/PycharmProjects/web01/web_parse3.py
Number: 1
Title: 北京国贸CBD 近地铁旁精品舒适两居室
Address: 北京市朝阳区百子湾路16号后现代城
Monthly rental: 456
Room image: http://image.xiaozhustatic1.com/00,800,533/8,0,32,1048,1800,1200,10b2517f.jpg
Owner image: http://image.xiaozhustatic1.com/21/8,0,48,47,334,334,d6d989f6.jpg
Owner name: 知足小家
Owner sex: female
-------------------------------------------------------
Number: 2
Title: 【两居特价】二环鼓楼大街后海鸟巢温馨两居
Address: 北京市东城区鼓楼大街安德路55号院
Monthly rental: 398
Room image: http://image.xiaozhustatic1.com/00,800,533/8,0,79,1519,1800,1200,0af68cad.jpg
Owner image: http://image.xiaozhustatic1.com/21/8,0,61,1483,375,376,8c9f4abd.jpg
Owner name: 天天Tinny
Owner sex: female
-------------------------------------------------------
Number: 3
Title: 市中心地铁旁高端LOFT 可欣赏迷人夜景
Address: 北京市西城区广安门外大街
Monthly rental: 328
Room image: http://image.xiaozhustatic1.com/00,800,533/3,0,33,4595,3000,2000,1f72b9f8.jpg
Owner image: http://image.xiaozhustatic1.com/21/2,0,10,298
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值