已经定位找到了房源信息与价格
#-*- coding:utf-8 -*-
from bs4 import BeautifulSoup
from urlparse import urljoin
import requests
import csv
url = "http://wh.58.com/pinpaigongyu/pn/{page}/?minprice=2000_4000"
#已完成的页数序号,初时为0
page = 0
csv_file = open("rent.csv","wb")
csv_writer = csv.writer(csv_file, delimiter=',')
while True:
page += 1
print "fetch: ", url.format(page=page)
response = requests.get(url.format(page=page))
html = BeautifulSoup(response.text)
house_list = html.select(".list > li")
# 循环在读不到新的房源时结束
if not house_list:
break
for house in house_list:
house_title = house.select("h2")[0].string.encode("utf8")
house_url = urljoin(url, house.select("a")[0]["href"])
house_info_list = house_title.split()
# 如果第二列是公寓名则取第一列作为地址
if "公寓" in house_info_list[1] or "青年社区" in house_info_list[1]:
house_location = house_info_list[0]
else:
house_location = house_info_list[1]
house_money = house.select(".money")[0].select("b")[0].string.encode("utf8")
csv_writer.writerow([house_title, house_location, house_money, house_url])
csv_file.close()
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="initial-scale=1.0, user-scalable=no, width=device-width">
<title>租房价格</title>
<link rel="stylesheet" href="http://cache.amap.com/lbs/static/main1119.css" />
<link rel="stylesheet" href="http://cache.amap.com/lbs/static/jquery.range.css" />
<script src="http://cache.amap.com/lbs/static/jquery-1.9.1.js"></script>
<script src="http://cache.amap.com/lbs/static/es5.min.js"></script>
<script src="http://webapi.amap.com/maps?v=1.3&key=22d3816e107f199992666d6412fa0691&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete"></script>
<script src="http://cache.amap.com/lbs/static/jquery.range.js"></script>
<style>
.control-panel {
position: absolute;
top: 30px;
right: 20px;
}
.control-entry {
width: 280px;
background-color: rgba(119, 136, 153, 0.8);
font-family: fantasy, sans-serif;
text-align: left;
color: white;
overflow: auto;
padding: 10px;
margin-bottom: 10px;
}
.control-input {
margin-left: 120px;
}
.control-input input[type="text"] {
width: 160px;
}
.control-panel label {
float: left;
width: 120px;
}
#transfer-panel {
position: absolute;
background-color: white;
max-height: 80%;
overflow-y: auto;
top: 30px;
left: 20px;
width: 250px;
}
</style>
</head>
<body>
<div id="container"></div>
<div class="control-panel">
<div class="control-entry">
<label>选择工作地点:</label>
<div class="control-input">
<input id="work-location" type="text">
</div>
</div>
<div class="control-entry">
<label>选择通勤方式:</label>
<div class="control-input">
<input type="radio" name="vehicle" value="SUBWAY,BUS" onClick="takeBus(this)" checked/> 公交+地铁
<input type="radio" name="vehicle" value="SUBWAY" onClick="takeSubway(this)" /> 地铁
</div>
</div>
<div class="control-entry">
<label>导入房源文件:</label>
<div class="control-input">
<input type="file" name="file" onChange="importRentInfo(this)" />
</div>
</div>
</div>
<div id="transfer-panel"></div>
<script>
var map = new AMap.Map("container", {
resizeEnable: true,
zoomEnable: true,
center: [116.397428, 39.90923],
zoom: 11
});
</script>
</body>
</html>