知识点:多线程,读取csv,xpath
import json
import csv
import requests
import threading
import lxml
import lxml.etree
#递归锁
rLock=threading.RLock()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'}
#获取区域
def getAreaList(url):
html=requests.get(url,headers=headers).text
mytree=lxml.etree.HTML(html)
areList=mytree.xpath('//div[@data-role="ershoufang"]//a')
areaDict={}
for area in areList:
# 区域名
areaName=area.xpath('./text()')[0]
#url
areaUrl = 'https://gz.lianjia.com'+area.xpath('./@href')[0]
#print(areaName,areaUrl)
areaDict[areaName]=areaUrl
print(areaName,areaUrl)
return areaDict
#获取区域页数
def getAreaPage(areaUrl,areaName):
html = requests.get(are