对北京二手房信息进行爬取,包括单线程和多线程。
一 单线程
import requests
from bs4 import BeautifulSoup
m1=[]
m2=[]
m3=[]
for i in range(1,101):
urli='https://bj.lianjia.com/ershoufang/pg'+str(i)
header = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
ri=requests.get(urli, headers=header)
htmli=ri.text.encode(ri.encoding).decode()
soupi=BeautifulSoup(htmli,"lxml")
#爬取地址信息
l1i=soupi.findAll('div',attrs={'class':'positionInfo'})
m1i=[i.text for i in l1i]
#爬取房子信息
l2i=soupi.findAll('div',attrs={'class':'houseInfo'})
m2i=[i.text for i in l2i]
#爬取价格信息
l3i=soupi.findAll('div',attrs={'class':'totalPrice'})
m3i=[i.text for i in l3i]
m1=m1+m1i
m2=m2+m2i
m3=m