import urllib.request import os import re def url_open(url): req = urllib.request.Request(url) req.add_header('User-Agent ','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.2372.400 QQBrowser/9.5.10548.400') response = urllib.request.urlopen(url) html = response.read() return html num=list(range(1,5)) for a in num: a=str(a) url="http://jdz.58.com/ershoufang/pn"+a+"/" print(url) html=url_open(url).decode("utf-8") ''' zongjia=re.compile(r'<div .*?qj-listright btall">.*?class="pri">(.*?)</b>(.*?) .*?(/d/d/d/d.*?)<br>.*?class="showroom">(.*?)</span>(/d{1,2}/d.*?)<br>.*?</div>') print(zongjia) <div .*?"qj-listright btall">.*?class="pri">(.*?)</b> (.*?)\s*? .*?(\d\d\d\d.*?)\s.*?class="showroom">.*?(\S*?).*?</span>.*?(\d{1,4}\S*?).*?</div> <div class="qj-listright btall".*?class="pri">(\d{1,3}).*? \D*(\d{3,4}).*?"showroom">\D*(\S*?)\D*?(\d{2,4}).*?</div> ''' name=re.compile(r'img lazy_src=.*?alt="(.*?)"') name_list=name.findall(html) for name in name_list: print(name) zongjia = re.compile(r"class='pri.*?(\d{2,3}.\d|\d{2,3})\D*?(\d\d\d\d)\D*?(\d).*?(\d)\D*?(\d{2,4}.\d|\d{2,3})") zongjia_list = zongjia.findall(html) num=name_list.index(name) print(zongjia_list[num])