京东的反爬做的太厉害了,没办法一劳永逸
话不多说,先上代码,缺少的库自己pip install一下
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd
import time
import openpyxl
from urllib.parse import unquote
import time
# 初始化WebDriver
browser = webdriver.Chrome()
# 访问商品页面
browser.get('https://item.jd.com/100034710036.html')
time.sleep(40) #留给你登录的时间
text=""
number=0;
#商品ID的数组
INDEX = ["100002585808", "3972915", "4642692", "100075885857", "100078384502",
"1803367119", "100092728232", "100097665850","100092728228","100086968888"]
#接下来确定商品ID和页码,以及最终请求网址
for good in range(0,9):
ID=INDEX[0]
for page in ra