import requests
from lxml import etree
import urllib3
import time
import ssl
from openpyxl import workbook
from openpyxl import load_workbook
import math
import random
import os
import socket
from openpyxl.drawing.image import Image
ssl._create_default_https_context = ssl._create_unverified_context
urllib3.disable_warnings()
headers = {
"authority": "www.sellersprite.com",
"referer": "https://www.sellersprite.com/w/user/login?callback=/v2/keyword-research-weekly%3Fstation%3DUS%26order.field%3Dsearchfrequencyrank%26order.desc%3Dfalse%26usestatic%3DR%26usesWeekly%3Dweekly%26marketId%3D1%26limitUserStatic%3Dtrue%26adminDes%3DS%26departments%255B8%255D%3Dfashion%26departments%255B19%255D%3Dsporting%26table%3Dara_20210522%26minSearchRank%3D%26maxSearchRank%3D20000%26rankGrowthType%3DW1%26rankGrowthValue%3D%26rankGrowthRate%3D30%26minMonopolyClickRate%3D%26maxMonopolyClickRate%3D%26includeKeywords%3D%26excludeKeywords%3D",
"cookie": 'ecookie=92Eo8RhbjmmgZRRF_CN; rank_c_s_ind=1; current_guest=cdklCvVfIr7L_210112-138175; crisp-client%2Fsession%2F02ce6ae3-e1ab-4bb7-ae11-b1a839c52e78=session_9af6804b-33bc-4415-8201-d9e385a463b3; crisp-client%2Fsocket%2F02ce6ae3-e1ab-4bb7-ae11-b1a839c52e78=1; _ga=GA1.1.615595330.1610015957; _ga_CN0F80S6GL=GS1.1.1622623233.16.0.1622623233.0; rank-login-user=0539182261IrZXNTSoIlHhPKyHGfg/7TMbw6xY7YpCjminsqgfQO01sO7Vs0fjP++yMJ6b2vGp; rank-login-user-info="eyJuaWNrbmFtZSI6IkZTMDAyR1oiLCJpc0FkbWluIjpmYWxzZSwiYWNjb3VudCI6IkZTMDAyR1oiLCJ0b2tlbiI6IjA1MzkxODIyNjFJclpYTlRTb0lsSGhQS3lIR2ZnLzdUTWJ3NnhZN1lwQ2ptaW5zcWdmUU8wMXNPN1ZzMGZqUCsreU1KNmIydkdwIn0="; ao_lo_to_n="0539182261IrZXNTSoIlHhPKyHGfg/7f5qKmPRnt6HO48ic0PwAPgIQajuil1uqdcqigRmRGHWRt3NPT84Nj3oG9Y9Hm9uOJyJBHjET9hDGQbyyFMZQDY="; JSESSIONID=9B36F9363416CC18CF8727E83A0430D9',
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36",
"path": "/v2/keyword-research-weekly?station=US&table=ara_20210522&departments%5B0%5D=&departments%5B1%5D=&departments%5B2%5D=&departments%5B3%5D=&departments%5B4%5D=&departments%5B5%5D=&departments%5B6%5D=&departments%5B7%5D=&departments%5B8%5D=fashion&departments%5B9%5D=&departments%5B10%5D=&departments%5B11%5D=&departments%5B12%5D=&departments%5B13%5D=&departments%5B14%5D=&departments%5B15%5D=&departments%5B16%5D=&departments%5B17%5D=&departments%5B18%5D=&departments%5B19%5D=sporting&order.field=searchfrequencyrank&order.desc=false&rankGrowthType=W1&rankGrowthRate=30.0&maxSearchRank=20000&includeKeywords=&excludeKeywords=&page=1&size=50",
"sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
}
def get_page_url_list(num, size):
page_url_list = []
basic_url = "https://www.sellersprite.com/v2/keyword-research-weekly?station=US&table=ara_20210522&departments%5B0%5D=&departments%5B1%5D=&departments%5B2%5D=&departments%5B3%5D=&departments%5B4%5D=&departments%5B5%5D=&departments%5B6%5D=&departments%5B7%5D=&departments%5B8%5D=fashion&departments%5B9%5D=&departments%5B10%5D=&departments%5B11%5D=&departments%5B12%5D=&departments%5B13%5D=&departments%5B14%5D=&departments%5B15%5D=&departments%5B16%5D=&departments%5B17%5D=&departments%5B18%5D=&departments%5B19%5D=sporting&order.field=searchfrequencyrank&order.desc=false&rankGrowthType=W1&rankGrowthRate=30.0&maxSearchRank=20000&includeKeywords=&excludeKeywords=&page={}&size={}"
for i in range(math.ceil(num / size)):
page_url_list.append(basic_url.format(i + 1, size))
return page_url_list
# 创建存放图片的文件夹
def get_image_file_name_list(keyword, image_name_list):
# 当前工作目录
current_dir = os.getcwd()
target_dir = current_dir + "\\" + keyword + "\\"
# 如果文件夹不存在就创建
if os.path.exists(target_dir) == False:
os.mkdir(keyword)
image_file_name_list = [target_dir + x + ".jpg" for x in image_name_list]
return image_file_name_list
# 下载图片
def download_image(image_url_list, image_file_name_list):
exception_image_url_list = []
exception_image_name_list = []
for i in image_url_list:
try:
res = requests.get(i, headers=headers, verify=False, timeout=100)
# time.sleep(random.randint(1, 5))
res.close()
socket.setdefaulttimeout(30)
with open(image_file_name_list[image_url_list.index(i)], 'wb') as f:
f.write(res.content<
抓取卖家精灵关键词趋势选品数据并写入excel(包含图片)
最新推荐文章于 2024-08-06 20:52:07 发布