1.找到阿里巴巴网站,网站是这里
https://re.1688.com/?cosite=baidujj_pz&keywordid=&trackid=885662561117990122602&location=
2.开始写代码,第一步下载相关的库
pip install selenium
3.第二步导入库并且可以打开网站,其中打开网站时禁止打开浏览器以及禁止加载图片和css,此为关键加速过程
from selenium import webdriver
import time
import csv
import re
options=webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2,'permissions.default.stylesheet':2}
options.add_experimental_option("prefs", prefs)
options.add_argument('--headless')
driver=webdriver.Chrome(options=options)
driver.get("https://re.1688.com/?cosite=baidujj_pz&keywordid=&trackid=885662561117990122602&location=")
4.将程序写进csv表格中,并且下移动页面使其货物全部展现,然后读取你需要的货物价格参数,在这个我们以衣服为例子
with open("1688阿里巴巴采购批发网.csv", "w", newline="", encoding="utf-8-sig") as datacsv:
csvwriter = csv.writer(datacsv, dialect=("excel"))
csvwriter.writerow(['产品', '价格'])
p=input("请输入要寻找的货物:")
driver.find_element_by_xpath('//*[@id="alisearch-keywords"]').send_keys(p)
driver.find_element_by_xpath('//*[@id="alisearch-submit"]').click()
5.最后循环衣服页面的下一页,设定爬取的数量即可,这里我们选择爬取10页
for j in range(10):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(4)
a = driver.find_elements_by_xpath('//*[@id="offerList"]/div/div/div[1]/div')
print(len(a))
for i in range(1,len(a)+1):
k = []
b=driver.find_element_by_xpath(f'//*[@id="offerList"]/div/div/div[1]/div[{i}]/a/div[2]').text
c=driver.find_element_by_xpath(f'//*[@id="offerList"]/div/div/div[1]/div[{i}]/a/div[4]/a/span').text
try:
c="¥"+re.findall('¥(.*?)¥.*',c)[0]
except:
c=c
k.append(b)
k.append(c)
csvwriter.writerow(k)
print("产品:"+b,"价格:"+c)
driver.find_element_by_xpath('//*[@id="offerList"]/div/div/div[2]/div/div/button[2]').click()
6.完整代码展示
from selenium import webdriver
import time
import csv
import re
options=webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2,'permissions.default.stylesheet':2}
options.add_experimental_option("prefs", prefs)
options.add_argument('--headless')
driver=webdriver.Chrome(options=options)
driver.get("https://re.1688.com/?cosite=baidujj_pz&keywordid=&trackid=885662561117990122602&location=")
# driver=webdriver.Chrome()
with open("1688阿里巴巴采购批发网.csv", "w", newline="", encoding="utf-8-sig") as datacsv:
csvwriter = csv.writer(datacsv, dialect=("excel"))
csvwriter.writerow(['产品', '价格'])
p=input("请输入要寻找的货物:")
driver.find_element_by_xpath('//*[@id="alisearch-keywords"]').send_keys(p)
driver.find_element_by_xpath('//*[@id="alisearch-submit"]').click()
for j in range(10):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(4)
a = driver.find_elements_by_xpath('//*[@id="offerList"]/div/div/div[1]/div')
print(len(a))
for i in range(1,len(a)+1):
k = []
b=driver.find_element_by_xpath(f'//*[@id="offerList"]/div/div/div[1]/div[{i}]/a/div[2]').text
c=driver.find_element_by_xpath(f'//*[@id="offerList"]/div/div/div[1]/div[{i}]/a/div[4]/a/span').text
try:
c="¥"+re.findall('¥(.*?)¥.*',c)[0]
except:
c=c
k.append(b)
k.append(c)
csvwriter.writerow(k)
print("产品:"+b,"价格:"+c)
driver.find_element_by_xpath('//*[@id="offerList"]/div/div/div[2]/div/div/button[2]').click()
# time.sleep(4)
driver.quit()
7.运行展示
8.最后爬取了生成了741条衣服价格参数表格
9.成功的小伙伴们望一键三连!!!感谢。