import requests
from lxml import etree
import time
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
}
'''
http://cd.xiaozhu.com/search-duanzufang-p2-0/
http://cd.xiaozhu.com/search-duanzufang-p3-0/
title=file.xpath('//*[@id="page_list"]/ul/li/div[2]/div/a/span/text()')
time.sleep(3)
print(title)
//*[@id="page_list"]/ul/li[1]
'''
f=open('xiaozhu.txt','a+')
def getdetails(url):
try:
res=requests.get(url,headers=headers)
file=etree.HTML(res.text)
all=file.xpath('//*[@id="page_list"]/ul/li')
for eve in all:
title=eve.xpath('div[2]/div/a/span/text()')[0]
jiage=eve.xpath('div[2]/span[1]/i/text()')[0]
miaoshu=eve.xpath('div[2]/div/em/text()')[0].strip()
f.write(title+'\n')
f.write(jiage+'\n')
f.write(miaoshu+'\n')
time.sleep(3)
except UnicodeEncodeError:
pass
if __name__=='__main__':
for i in range(1,10):
url='http://cd.xiaozhu.com/search-duanzufang-p{}-0/'.format(i)
getdetails(url)
print('第{}页爬取完毕'.format(i))
f.close()
xpath爬取小猪短租信息
最新推荐文章于 2021-10-12 13:31:15 发布