1.配置好selenium环境,selenium环境配置地址如下:
https://blog.csdn.net/liaoqingjian/article/details/116785445?spm=1001.2014.3001.5502
2.下载selenium库:
pip install selenium
3.完整代码如下:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import csv
def spider(url, keyword):
driver = webdriver.Chrome()
# 输入京东地址
driver.get(url)
driver.maximize_window() # 窗口最大化
# 定位输入框
input_tag = driver.find_element_by_id('key_S')
# 模拟键盘输入关键字
input_tag.send_keys(keyword)
# 键盘操作
input_tag.send_keys(Keys.ENTER)
# 等待
time.sleep(5)
k = []
for j in range(3):
if j==0:
for i in range(1, 61):
# 图片
link = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/a/img').get_attribute('src')
# 书名
name = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[1]/a').text
# 价格
price = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[3]/span[1]').text
# 作者
dname = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[5]/span[1]/a').text
#出版日期
b = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[5]/span[2]').text
#出版社
c = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[5]/span[3]/a').text
#评论数
d = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[4]/a').text
#评论
e = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[2]').text
k.append([link,name,price,dname,b,c,d,e])
print(i)
print(k)
# 点击下一页,通过定位超链接上面的文本元素
driver.find_element_by_partial_link_text('下一页').click()
time.sleep(2)
else:
for i in range(1, 61):
link = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/a/img').get_attribute('src')
# 书名
name = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[1]/a').text
# 价格
price = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[3]/span[1]').text
# 作者
dname = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[@class="search_book_author"]/span[1]/a').text
# 出版日期
b = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[@class="search_book_author"]/span[2]').text
# 出版社
c = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[@class="search_book_author"]/span[3]/a').text
# 评论数
d = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[@class="search_star_line"]/a').text
# 评论
e = driver.find_element_by_xpath(
f'//ul[@id="component_59"]/li[{i}]/p[2]').text
k.append([link, name, price, dname, b, c, d, e])
print(i)
print(k)
with open("当当python.csv", "w", newline="",encoding="utf-8") as datacsv:
csvwriter = csv.writer(datacsv, dialect=("excel"))
csvwriter.writerow(["图片", "书名", "价格", "作者","出版日期","出版社","评论数","评论"])
for i in k:
csvwriter.writerow(i)
driver.quit()
if __name__ == '__main__':
spider('http://www.dangdang.com/', keyword='python')
4.跑通了的同学拜托给我一键三连,谢谢。