
设置本地谷歌路径
path = r'C:\Users\PC\AppData\Local\Google\Chrome\Application\chrome.exe' # 请改为你电脑内Chrome可执行文件路径
ChromiumOptions().set_browser_path(path).save()
xpath定位元素
items = page.eles('xpath://li[@class="gl-item"]')
print(len(items))
for item in items:
info_url = item('xpath://div[contains(@class,"p-name")]/a').link
title = item('xpath://div[contains(@class,"p-name")]/a').text
shop = item('xpath://a[@class="curr-shop hd-shopname"]').text
完整代码如下
from DrissionPage import WebPage ,ChromiumOptions
from DataRecorder import Recorder
path = r'C:\Users\PC\AppData\Local\Google\Chrome\Application\chrome.exe' # 请改为你电脑内Chrome可执行文件路径
ChromiumOptions().set_browser_path(path).save()
def main():
r = Recorder('data.xlsx')
r.add_data(('url','标题','店铺名'))
co = ChromiumOptions().use_system_user_path()
page = WebPage(driver_or_options=co)
page.timeout = 1
page.set.retry_times(5) #连接失败时重连次数
page.set.retry_interval(3) #连接失败时重连间隔
page.set.NoneElement_value('')
url = 'https://search.jd.com/Search?keyword=手机'
print(url)
page.get(url, show_errmsg=True)
while True:
page.scroll.to_bottom()
page.wait(6)
items = page.eles('xpath://li[@class="gl-item"]')
print(len(items))
for item in items:
info_url = item('xpath://div[contains(@class,"p-name")]/a').link
title = item('xpath://div[contains(@class,"p-name")]/a').text
shop = item('xpath://a[@class="curr-shop hd-shopname"]').text
print(info_url,title,shop)
r.add_data((info_url,title,shop))
btn = page('下一页', timeout=2)
if btn:
btn.click()
page.wait.load_start()
# 没有则退出程序
else:
break
r.record()
if __name__ == '__main__':
main()