拼多多商家后台多店推广信息获取
商家后台多多推广爬虫,爬取页面
python+selenium
上源码
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-10-29 13:53
# @Author : Aries
# @Site :
# @File : pddDate.py
# @Software: PyCharm
from selenium import webdriver
from time import sleep
import datetime
import pymysql
def getPdd(username,password,dr):
dr.get('https://mms.pinduoduo.com/login')
dr.implicitly_wait(30)
dr.find_element_by_xpath(
'//*[@id="root"]/div/div/div/main/div/section[2]/div/div/div/div[1]/div/div[2]').click()
dr.find_element_by_xpath('//*[@id="usernameId"]').clear()
dr.find_element_by_xpath('//*[@id="usernameId"]').click()
dr.find_element_by_xpath('//*[@id="usernameId"]').send_keys(username)
dr.find_element_by_xpath('//*[@id="passwordId"]').clear()
dr.find_element_by_xpath('//*[@id="passwordId"]').click()
dr.find_element_by_xpath('//*[@id="passwordId"]').send_keys(password)
dr.find_element_by_xpath(
'//*[@id="root"]/div/div/div/main/div/section[2]/div/div/div/div[2]/section/div/div[2]/button').click()
dr.find_element_by_xpath('//*[@id="__next"]/div/div/nav/div/div/nav/div/div[11]/ul/li[2]').click()
def getData(dr,shopname):
iframe = dr.find_element_by_class_name("pmsIframe") # 定位到内嵌的iframe网页
dr.switch_to.frame(iframe) # 切入到iframe
dr.find_element_by_xpath(
'//*[@id="__next"]/div/div[2]/div/div/div/div/div[3]/div[4]/div[2]/ul/li[2]/div/div/div/div/div/div').click()
dr.find_element_by_xpath('/html/body/div[2]/div/div/div/div/ul/li[3]').click()
text = dr.find_element_by_xpath(
'//*[@id="__next"]/div/div[2]/div/div/div/div/div[3]/div[4]/div[2]/div/div/div/div[1]').text
# print(text)
service = []
try:
text_split = text.split("\n")
# print(text_split)
for ch in text_split:
split = ch.split(" ")
# print(split)
for data in split:
if data=='智能推广':
continue
service.append(data)
# print(data)
except Exception as e:
print(e)
print(service)
l = list()
for i in range(len(service)):
if i % 15 == 0:
data = []
data.append(service[i])
l.append(data)
else:
data.append(service[i])
now_time = datetime.datetime.now()
for i in range(len(l)):
if i == 0:
continue
print(l[i])
# 这里是落库操作
conn = pymysql.connect(host='host', user="root", passwd="passwd", database='database', port=3306)
cursor = conn.cursor()
# 插入数据格式如下:insert into 表名(插入内容的表头) value(对应的数据)
sql = "INSERT INTO ks_pdd_spread_info " \
"(shop_name,goods_spread_name,spread_type,budget_day_money,discount,exposure,click_quantity,click_rate,expenditure,input_output_ratio,order_quantity,average_click_cost,click_conversion_rate,turnover,thousand_exposures,create_time,update_time) " \
"VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
val = (shopname,
l[i][0], l[i][2], l[i][3], l[i][4], l[i][5], l[i][6], l[i][7], l[i][8], l[i][9], l[i][10], l[i][11], l[i][12],
l[i][13], l[i][14],now_time,now_time)
# 提交sql语句,映射到数据库中。
cursor.execute(sql, val)
conn.commit()
# 关闭数据库连接
conn.close()
dr.refresh()
if __name__ == '__main__':
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
dr2 = webdriver.Chrome(options=options,
executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
dr2.maximize_window()
dr2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
dr = webdriver.Chrome(options=options,
executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
dr.maximize_window()
dr.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
dr3 = webdriver.Chrome(options=options,
executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
dr3.maximize_window()
dr3.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
dr4 = webdriver.Chrome(options=options,
executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
dr4.maximize_window()
dr4.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
getPdd('店铺名', '密码', dr)
getPdd('店铺名2', '密码', dr2)
# getPdd('店铺名', '密码', dr3)
getPdd('店铺名4', '密码', dr4)
getPdd('店铺名3', '密码', dr3)
for i in range(1,2):
try:
getData(dr,'店铺名')
sleep(5)
except Exception as e:
print(e)
sleep(10)
dr.refresh()
try:
getData(dr2,'店铺名2')
sleep(5)
except Exception as e:
sleep(5)
dr2.refresh()
print(e)
try:
getData(dr3,'店铺名3')
sleep(5)
except Exception as e:
sleep(5)
dr3.refresh()
print(e)
try:
getData(dr4,'店铺名4')
sleep(5)
except Exception as e:
sleep(5)
dr4.refresh()
print(e)
now_time = datetime.datetime.now()
print(now_time)
# dr.quit()
# dr2.quit()
机器人问题解决
selenium 去做登录操作的时候总是会提示机器人 需要加入下面这段代码,谷歌的驱动自己去下载我这里就不提供了
executable_path=r'C:\Users\Administrator\PycharmProjects\untitled\chromedriver.exe')
dr2.maximize_window()
dr2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})