学习爬虫,走了很多弯路,记录下python+selenium的测试,直接上代码
#encoding=utf-8
from selenium.webdriver import Chrome
from bs4 import BeautifulSoup
driver = Chrome("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
"""
环境要求:
1. pip install selenium
2.需要将chromedriver.exe放在driver所指路径,下载时要与本地chrome版本匹配或更新,具体版本查看chrome
下载地址:http://npm.taobao.org/mirrors/chromedriver/
"""
# 加载URL网页
def grabBrands(url):
goodsname = []
try:
driver.get(url)
element = driver.find_elements_by_xpath("//*[@id='J_selector']/div[1]/div/div[3]/a[1]") #找到品牌更多btn
element[0].click() #模拟鼠标点击更多
page = driver.page_source
soup = BeautifulSoup(page,'html.parser')
data1 = soup.find('ul',{"class":"J_valueList v-fixed"})
datali =data1.find_all('li')
for i in datali:
goodsname.append(i.a.attrs['title'])
assert "No results found." not in driver.page_source
except Exception as ex:
# 关闭当前标签,也可以使用quit()关闭浏览器
driver.close()
return None
return goodsname
if __name__=="__main__":
good = input("请输入商品名称:")
url = 'https://search.jd.com/Search?keyword='+ str(good)+ '&enc=utf-8'
print(grabBrands(url))