selenium自动获取京东热卖水果排行榜信息存入表格

#!/usr/bin/env python
# coding=utf-8
# 自动获取京东热卖水果排行榜信息,url地址是通过js加载生成的无法获取

from selenium import webdriver
import time,re,os
from debug_info import Log_info
import openpyxl
from openpyxl import Workbook
from selenium.webdriver import ChromeOptions  # 更改Chrome配置达到反爬机制

class Saas(object):
    # 初始化工作
    def __init__(self):
        # self.driver = webdriver.Chrome()
        # 反爬机制代码开始,采用此代码在F12控制台输入window.navigator.webdriver结果不是True,而是undefined就成功了
        option = ChromeOptions()
        option.add_experimental_option('excludeSwitches', ['enable-automation'])
        self.driver = webdriver.Chrome(options=option)
        # 反爬机制代码结束

        self.driver.maximize_window()  # 将浏览器最大化
        # 本地表格名称,如果存在则加载
        self.file_name = "./jd新鲜水果榜.xlsx"
        self.wb = Workbook()
        if os.path.exists(self.file_name):
            print("存在表格")
            self.wb = openpyxl.load_workbook(self.file_name)
        self.top = ['序号', '名称', '累计购买人数', '近24小时售出(件)', '价格', '获取时间','链接地址']
        # 创建表头
        # self.ws.append(self.top)
    # 转换数字类型
    def changeNum(self,num):
        if "." in num:
            result = float(num)
        else:
            result = int(num)
        return result
    def jd(self, logger):
        # 先做模拟登陆
        self.driver.get('https://ranking.m.jd.com/comLandingPage/comLandingPage?contentId=9294&sku=%27%27&rankType=10')
        time.sleep(2)
        # target_elem = self.driver.find_element_by_link_text("更多商品")
        # js = 'document.getElementsByClassName("hide-vertical-indicator")[0].scrollTop=1000'
        # self.driver.execute_script(js)
        result_date = []
        # patt = re.compile(r'\d+\.*\d+')
        patt = re.compile(r'([1-9]+[0-9]*|0)(\.[\d]+)?')
        # 获取时间
        now_time = time.strftime("%Y-%m-%d", time.localtime())
        for i in range(1,5):
            logger("数据[%s]获取中..."%i)
            result = []
            title = self.driver.find_elements_by_xpath('//*[@id="hello"]/div/div/div/div['+str(i+2)+']/div/div/div[3]/div[1]/div[1]/span')[0].text
            title = title.replace("自营","")
            # 累计购买人数
            buy_count = self.driver.find_elements_by_xpath('//*[@id="hello"]/div/div/div/div['+str(i+2)+']/div/div/div[3]/div[1]/div[2]/div[1]/span')[0].text
            # 近24小时售出,如果购买人数不存在,则24小时售出会代替其位置
            try:
                buy_sale = self.driver.find_elements_by_xpath(
                    '//*[@id="hello"]/div/div/div/div[' + str(i + 2) + ']/div/div/div[3]/div[1]/div[2]/div[2]/span')[0].text
            except:
                logger("Warning:注意第%s个"%i)
                buy_sale = ''

            if "累计" in buy_count:
                patt_re = patt.findall(buy_count)[0]
                patt_result = patt_re[0] + patt_re[1]
                if "万" in buy_count:
                    buy_count = round(float(patt_result)*10000)
                else:
                    buy_count = self.changeNum(patt_result)
            elif "24小时" in buy_count:
                buy_sale = buy_count
                buy_count = "暂无数据"
            else:
                buy_count = "暂无数据"
            print("购买人数:", buy_count)

            if "24小时" in buy_sale:
                patt_re = patt.findall(buy_sale)[1]
                patt_result = patt_re[0] + patt_re[1]
                if "万" in buy_sale:
                    buy_sale = round(float(patt_result)*10000)
                else:
                    buy_sale = int(patt_result)
            else:
                buy_sale = "暂无数据"
            print("近24小时售出:", buy_sale)
            # 价格
            try:
                price = self.driver.find_elements_by_xpath('//*[@id="hello"]/div/div/div/div['+str(i+2)+']/div/div/div[3]/div[2]/div/span')[0].text
                price = price.replace("¥","")
                price = self.changeNum(price)
            except:
                price = "暂无定价"
            print("价格:",price)
            url = "https://item.m.jd.com/product/*.html"
            result.append(i)
            result.append(title)
            result.append(buy_count)
            result.append(buy_sale)
            result.append(price)
            result.append(now_time)
            result.append(url)
            result_date.append(result)

            time.sleep(1)
            target_elem = self.driver.find_element_by_xpath("//*[text()='" + str(i) + "']")
            self.driver.execute_script("return arguments[0].scrollIntoView();", target_elem)  # 用目标元素参考去拖动
            time.sleep(1)
        self.ws = self.wb.create_sheet(now_time)# 默认插在最后

        for row in range(len(result_date)):
            for col in range(len(self.top)):
                if result_date[row][col]:
                    value = result_date[row][col]
                else:
                    value = ""
                self.ws.cell(row=1, column=col + 1, value=self.top[col])
                self.ws.cell(row=row + 2, column=col + 1, value=value)
        time.sleep(1)
        self.wb.save('jd新鲜水果榜.xlsx')
        logger("全部处理完成!")
# def tearDown(self):
#     # 退出
#     logger("以上数据全部处理完毕!")
#     # time.sleep(15)
#     # 这里就不退出了
#     # self.driver.quit()

if __name__ == "__main__":
    # 将输出结果同时输出到屏幕和保存到文件中
    logger = Log_info().main()
    time.sleep(2)
    # 最后再执行自动化方法
    saas = Saas()
    try:
        saas.jd(logger)
    except Exception as e:
        logger("出现异常:", e)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值