python拼多多推广多店爬虫

商家后台多多推广爬虫,爬取页面

在这里插入图片描述

python+selenium

上源码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-10-29 13:53
# @Author : Aries
# @Site : 
# @File : pddDate.py
# @Software: PyCharm

from selenium import webdriver
from time import sleep
import datetime
import pymysql



def  getPdd(username,password,dr):
    dr.get('https://mms.pinduoduo.com/login')
    dr.implicitly_wait(30)
    dr.find_element_by_xpath(
        '//*[@id="root"]/div/div/div/main/div/section[2]/div/div/div/div[1]/div/div[2]').click()
    dr.find_element_by_xpath('//*[@id="usernameId"]').clear()
    dr.find_element_by_xpath('//*[@id="usernameId"]').click()
    dr.find_element_by_xpath('//*[@id="usernameId"]').send_keys(username)
    dr.find_element_by_xpath('//*[@id="passwordId"]').clear()
    dr.find_element_by_xpath('//*[@id="passwordId"]').click()
    dr.find_element_by_xpath('//*[@id="passwordId"]').send_keys(password)
    dr.find_element_by_xpath(
        '//*[@id="root"]/div/div/div/main/div/section[2]/div/div/div/div[2]/section/div/div[2]/button').click()
    dr.find_element_by_xpath('//*[@id="__next"]/div/div/nav/div/div/nav/div/div[11]/ul/li[2]').click()


def getData(dr,shopname):
    iframe = dr.find_element_by_class_name("pmsIframe")  # 定位到内嵌的iframe网页
    dr.switch_to.frame(iframe)  # 切入到iframe
    dr.find_element_by_xpath(
        '//*[@id="__next"]/div/div[2]/div/div/div/div/div[3]/div[4]/div[2]/ul/li[2]/div/div/div/div/div/div').click()
    dr.find_element_by_xpath('/html/body/div[2]/div/div/div/div/ul/li[3]').click()
    text = dr.find_element_by_xpath(
        '//*[@id="__next"]/div/div[2]/div/div/div/div/div[3]/div[4]/div[2]/div/div/div/div[1]').text

    # print(text)
    service = []
    try:
        text_split = text.split("\n")
        # print(text_split)
        for ch in text_split:
            split = ch.split(" ")
            # print(split)
            for data in split:
                if data=='智能推广':
                    continue
                service.append(data)
                # print(data)
    except Exception as e:
        print(e)
    print(service)
    l = list()
    for i in range(len(service)):
        if i % 15 == 0:
            data = []
            data.append(service[i])
            l.append(data)
        else:
            data.append(service[i])
    now_time = datetime.datetime.now()
    for i in range(len(l)):
        if i == 0:
            continue
        print(l[i])
        # 这里是落库操作
        conn = pymysql.connect(host='host', user="root", passwd="passwd", database='database', port=3306)
        cursor = conn.cursor()
        # 插入数据格式如下:insert into 表名(插入内容的表头) value(对应的数据)
        sql = "INSERT INTO ks_pdd_spread_info " \
              "(shop_name,goods_spread_name,spread_type,budget_day_money,discount,exposure,click_quantity,click_rate,expenditure,input_output_ratio,order_quantity,average_click_cost,click_conversion_rate,turnover,thousand_exposures,create_time,update_time) " \
              "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        val = (shopname,
        l[i][0], l[i][2], l[i][3], l[i][4], l[i][5], l[i][6], l[i][7], l[i][8], l[i][9], l[i][10], l[i][11], l[i][12],
        l[i][13], l[i][14],now_time,now_time)
        # 提交sql语句,映射到数据库中。
        cursor.execute(sql, val)
        conn.commit()
        # 关闭数据库连接
        conn.close()
    dr.refresh()





if __name__ == '__main__':
    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    dr2 = webdriver.Chrome(options=options,
                           executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
    dr2.maximize_window()
    dr2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => undefined
            })
          """
    })

    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    dr = webdriver.Chrome(options=options,
                          executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
    dr.maximize_window()
    dr.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => undefined
            })
          """
    })

    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    dr3 = webdriver.Chrome(options=options,
                          executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
    dr3.maximize_window()
    dr3.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """
                Object.defineProperty(navigator, 'webdriver', {
                  get: () => undefined
                })
              """
    })

    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    dr4 = webdriver.Chrome(options=options,
                          executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
    dr4.maximize_window()
    dr4.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """
                Object.defineProperty(navigator, 'webdriver', {
                  get: () => undefined
                })
              """
    })

    getPdd('店铺名', '密码', dr)
    getPdd('店铺名2', '密码', dr2)
    # getPdd('店铺名', '密码', dr3)
    getPdd('店铺名4', '密码', dr4)
    getPdd('店铺名3', '密码', dr3)
  
    for i in range(1,2):
        try:
            getData(dr,'店铺名')
            sleep(5)
        except Exception as e:
            print(e)
            sleep(10)
            dr.refresh()
        try:
            getData(dr2,'店铺名2')
            sleep(5)
        except Exception as e:
            sleep(5)
            dr2.refresh()
            print(e)
        try:
            getData(dr3,'店铺名3')
            sleep(5)
        except Exception as e:
            sleep(5)
            dr3.refresh()
            print(e)
        try:
            getData(dr4,'店铺名4')
            sleep(5)
        except Exception as e:
            sleep(5)
            dr4.refresh()
            print(e)
        now_time = datetime.datetime.now()
        print(now_time)
    # dr.quit()
    # dr2.quit()

机器人问题解决

selenium 去做登录操作的时候总是会提示机器人 需要加入下面这段代码,谷歌的驱动自己去下载我这里就不提供了

executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
    dr2.maximize_window()
    dr2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """
            Object.defineProperty(navigator, 'webdriver', {
              get: () => undefined
            })
          """
    })
  • 3
    点赞
  • 30
    收藏
    觉得还不错? 一键收藏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值