python爬取淘宝数据

在网上看到很多大佬写的程序,要么是时间太早要么就是配置太复杂,所以自己研究手搓了一套。暴力简单但肥肠的好用,可以爬取指定搜索词,指定页数的商品信息。 

from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import csv
from bs4 import BeautifulSoup

search_name= input('输入你要搜索的内容')
demanded_page = input('你需要多少页的内容')
driver = webdriver.Chrome()

price_list = []
sale_list = []
name_list = []

#定义向下滑动至底部的js代码
js = 'window.scrollTo(0, document.body.scrollHeight)'

def get_price(bs):
    lis = bs.find_all('span', {'class': "Price--priceInt--ZlsSi_M"})
    for x in lis:
        price = x.get_text()
        price_list.append(price)

def get_sales(bs):
    lis_2 = bs.find_all('span', {'class': "Price--realSales--FhTZc7U"})
    for x in lis_2:
        real_sales = x.get_text().replace('人付款', '')
        sale_list.append(real_sales)

def get_names(bs):
    lis_3 = bs.find_all('div', {'class': "Title--title--jCOPvpf"})
    for x in lis_3:
        lis_4 = x.find_all('span', {'class': ""})
        for x in lis_4:
            name = x.get_text()
            if name != search_name:
                name_list.append(name)

def turn_to_next_page(driver):
    submit = driver.find_element(By.XPATH, '//*[@id="pageContent"]/div[1]/div[3]/div[4]/div/div/button[2]/span')
    submit.click()
    time.sleep(5)


def get_info(driver):
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')
    get_names(bs)
    get_price(bs)
    get_sales(bs)

#设定参数用来计数
index = 1

def main(driver):
    global index
    url = f'https://s.taobao.com/search?catId=100&from=sea_1_searchbutton&page={index}&q={search_name}&spm=a2141.241046-cn.searchbar.d_2_searchbox&tab=all&tmhkh5=&type=p'
    driver.get(url)
    time.sleep(10)
    driver.maximize_window()
    time.sleep(5)
    get_info(driver)
    while index < int(demanded_page):
        js = 'window.scrollTo(0, document.body.scrollHeight)'
        driver.execute_script(js)
        time.sleep(5)
        turn_to_next_page(driver)
        get_info(driver)
        index += 1
    driver.close()

#运行程序
main(driver)

#储存文件
csv_file = open(f'D:/{search_name}_data.csv', 'w', newline='', encoding='gbk')
writer = csv.writer(csv_file)
writer.writerow(['product', 'price', 'sales'])
for name, price, sales in zip(name_list, price_list, sale_list):
    try:
        writer.writerow([name, price, sales])
    except UnicodeEncodeError:
        pass
csv_file.close()

print('完成(done)!!!')

#后续可根据需要添加函数以获取自己想要的其他信息

  • 37
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值