python爬虫有趣实例_python爬虫实例

importreimportrequestsfrom bs4 importBeautifulSoup#主方法

defmain():#给请求指定一个请求头来模拟chrome浏览器

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}

page_max= 100

for i in range(1, int(page_max) + 1):if i == 1:

house= 'https://cc.lianjia.com/ershoufang/erdaoqu/'

else:

house= 'https://cc.lianjia.com/ershoufang/erdaoqu/pg'+str(i)

res= requests.get(house, headers=headers)

soup= BeautifulSoup(res.text, 'html.parser')

li_max= soup.find('ul', class_='sellListContent').find_all('li')for li inli_max:try:

house_param={}

content= li.find('div', class_='houseInfo').text

content= content.split("|")

house_param['housing_estate'] =content[0]

house_param['square_metre'] = re.findall(r'-?\d+\.?\d*e?-?\d*?', content[2])[0]#--------------------------------------------------------#

position = li.find('div', class_='positionInfo').find('a').text

house_param['position'] =position#--------------------------------------------------------#

totalprice = li.find('div', class_='totalPrice').text

house_param['total_price'] = re.sub("\D", "", totalprice)

unitprice= li.find('div', class_='unitPrice').text

house_param['unit_price'] = re.sub("\D", "", unitprice)#--------------------------------------------------------#

follow = li.find('div', class_='followInfo').text

follow= follow.split("/")

house_param['follow'] = re.sub("\D", "", follow[0])

house_param['take_look'] = re.sub("\D", "", follow[1])#--------------------------------------------------------#

title_src = li.find('div', class_='title').find('a').attrs['href']

house_param['url'] = re.sub("\D", "", title_src)

res= requests.get(title_src, headers=headers)

soup= BeautifulSoup(res.text, 'html.parser')#--------------------------------------------------------#

pub_date = soup.find('div', class_='transaction').find_all('li')[0].find_all('span')[1].text

house_param['pub_date'] =pub_dateprint(house_param)exceptException as e:print(e)if __name__ == '__main__':

main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值