1、提取链家信息(城市、价格、时间)
'''测试通过'''
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import io
def get_city_list():
city_list = {}
city_from_url = 'https://m.lianjia.com/city/'
mhtml = requests.get(city_from_url)
print mhtml
print mhtml.content
mobj = BeautifulSoup(mhtml.content,'lxml')
city_block = mobj.body.section.find_all('div',{'class':'block city_block'})
for cb in city_block:
for cba in cb.find_all('a'):
city_list[cba.get('href')] = cba.get_text()
return city_list
if __name__ == '__main__':
cityd = get_city_list()
f = io.open('houing_price_bycity.csv','w',encoding = 'utf-8')
for citycode,city in cityd.items():
url = 'https://{}.lianjia.com/fangjia/priceMap/'.format(citycode[1:-1])
try:
r = requests.get(url)
if r.status_code == 200:
res = r.json()
else:
continue
except:
continue
for k,v in res.items():
if isinstance(v,(int,float)):
pass
else:
cont = ','.join([city,v['name'],str(v['transPrice']),datetime.now().strftime("%Y-%m-%d %H:%M:%S")])
cont = cont + '\n'
f.write(cont)
f.close()
代码结果:csv文件