'''Created on 2017-10-9
@author: wbhuangzhiqiang'''
importsysimportreimportcsvimporturllib.requestfrom bs4 importBeautifulSoup#成功打开页面时返回页面对象,否则打印错误信息,退出程序
defget_bsobj(url):
page=urllib.request.urlopen(url)if page.getcode() == 200:
html=page.read()
bsobj= BeautifulSoup(html, "html5lib")returnbsobjelse:print ("页面错误")
sys.exit()defget_house_info_list(url):
house_info_list=[]
bsobj=get_bsobj(url)if notbsobj:returnNone#获取页数
globalhouse_info_page
house_page= bsobj.find("a", {"gahref":"results_totalpage"})
house_info_page=int(house_page.get_text())#print(house_info_page)
house_list = bsobj.find_all("div", {"class":"info"})for house inhouse_list:#title = house.find("div", {"class"