混乱的学习历程 list。爬取58同城二手房,判断列表内字符串位置用(列表名).index(字符串名)

import urllib.request
import os
import re

def url_open(url):
    req = urllib.request.Request(url)
    req.add_header('User-Agent ','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.2372.400 QQBrowser/9.5.10548.400')
    response = urllib.request.urlopen(url)
    html = response.read()

    return html


num=list(range(1,5))
for a in num:
    a=str(a)
    url="http://jdz.58.com/ershoufang/pn"+a+"/"
    print(url)

    html=url_open(url).decode("utf-8")
    '''



    zongjia=re.compile(r'<div .*?qj-listright btall">.*?class="pri">(.*?)</b>(.*?)&nbsp;&nbsp;.*?(/d/d/d/d.*?)<br>.*?class="showroom">(.*?)</span>(/d{1,2}/d.*?)<br>.*?</div>')
    print(zongjia)
    <div .*?"qj-listright btall">.*?class="pri">(.*?)</b> (.*?)\s*?&nbsp;&nbsp;.*?(\d\d\d\d.*?)\s.*?class="showroom">.*?(\S*?).*?</span>.*?(\d{1,4}\S*?).*?</div>
    <div class="qj-listright btall".*?class="pri">(\d{1,3}).*?&nbsp;&nbsp;\D*(\d{3,4}).*?"showroom">\D*(\S*?)\D*?(\d{2,4}).*?</div>
    '''


    name=re.compile(r'img lazy_src=.*?alt="(.*?)"')
    name_list=name.findall(html)

    for name in name_list:
        print(name)
        zongjia = re.compile(r"class='pri.*?(\d{2,3}.\d|\d{2,3})\D*?(\d\d\d\d)\D*?(\d).*?(\d)\D*?(\d{2,4}.\d|\d{2,3})")
        zongjia_list = zongjia.findall(html)
        num=name_list.index(name)
        print(zongjia_list[num])
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值