拿取商品信息的名称:
id = '自增ID'
task_id = '任务ID'
platform = '平台'
search_key = '搜索词'
goods_id = '商品ID'
goods_title = '商品标题'
goods_url = '商品链接'
goods_brand = '商品品牌'
goods_now_price = '商品优惠价'
goods_old_price = '商品原价'
month_sale_num = '30天销量'
discount_info = '优惠券信息'
place_of_delivery = '发货地'
shop_name = '店铺名称'
shopkeeper_nick = '掌柜昵称'
shop_id = '店铺ID'
shop_level = '店铺等级'
shop_url = '店铺地址'
delivery_score = '物流评分'
item_score = '描述评分'
score_p = '服务评分'
is_authorize = '是否授权'
inv_count = '商品库存量'
fav_count = '商品收藏数'
fans_count = '商品粉丝数'
goods_pic = '商品主图'
sku_att_class = 'sku属性分类'
sku_now_price = 'sku商品价格'
sku_old_price = 'sku商品原价'
sku_inv_count = 'sku商品库存量'
sku_url = 'sku商品链接'
create_at = '创建时间'
update_at = '更新时间'
代码:
import requests
import urllib.parse
import re
import json
from lxml.html import etree
import time
import csv
import random
class Get1688Info(object):
def __init__(self, task_id=None, search_key=None, id=None):
self.headers = {
'authority': 'search.1688.com',
'origin': 'https://s.1688.com',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'accept': '*/*',
'sec-fetch-site': 'same-site',
'sec-fetch-mode': 'cors',
'referer': 'https://s.1688.com/selloffer/offer_search.htm',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
}
self.user_agents = [
"Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11",
"Mozilla/5.0(Macintosh;IntelMacOSX10_7_3)AppleWebKit/535.20(KHTML,likeGecko)Chrome/19.0.1036.7Safari/535.20",
"Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/536.11(KHTML,likeGecko)Chrome/20.0.1132.11TaoBrowser/2.0Safari/536.11",
"Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.71Safari/537.1LBBROWSER",
"Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.84Safari/535.11LBBROWSER",
"Mozilla/5.0(WindowsNT5.1)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.89Safari/537.1",
"Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.89Safari/537.1",
"Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.11(KHTML,likeGecko)Chrome/23.0.1271.64Safari/537.11",
"Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/58.0.3029.110Safari/537.36",
"Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/22.0.1207.1Safari/537.1",
]
self.uuid = id
self.task_id = task_id
self.search_key = search_key
self.platform = '1688'
def agent_proxies(self): #获取代理IP
ip_use = True
i = 0
while ip_use:
try:
ag_url = '拿代理ip的网链接'
ip_res = requests.get(ag_url).text.strip()
resp_json = json.loads(ip_res)['data'][0]
proxies = f'{resp_json["IP"]}'
ip_use = self.test_ip(proxies)
# success = json.loads(ip_res)['success']
# if success == True:
# resp_json = json.loads(ip_res)['data'][0]
# proxies = f'{resp_json["ip"]}:{resp_json["port"]}'
# ip_use = self.test_ip(proxies)
# else:
# print('请添加白名单')
# jixu = input('可以继续吗?')
except:
if i < 5:
i += 1
time.sleep(2)
pass
else:
print('获取代理ip失败,代理网站失效')
jixu = input('可以继续吗?')
return proxies
def test_ip(self,pro): #测试代理ip是否可用
try:
proxies = {
'http': f'http://{pro}', 'https': f'https://{pro}'}
res = requests.get('https://www.baidu.com/', timeout=10, proxies=proxies)
a = res.status_code
if a == 200:
print('ip可用')
ip_use = False
return ip_use
else:
print('代理ip无效')
time.sleep(2)
ip_use = True
return ip_use
except:
print('代理IP无效')
ip_tests = True
time.sleep(2)
return ip_use
#获取商品总页数
def num_page(self):
params = {
'n': 'y',
'netType': '1,11,16',
'beginPage': '1',
'async': 'true',
'asyncCount': '20',
'pageSize': '60',
'startIndex': '0',
'offset': '9', }
response = requests.get(url, headers=self.headers, params=params, timeout=10)
goodsinformation_json = response.json()
num_page = goodsinformation_json['data']['data']['pageCount']
print(f'商品列表一共{num_page}页')
self.get_goods_ids(num_page)
#获取每页商品的ID
def get_goods_ids(self,num_page):
goods_id_lists = []
x = 0
agent_proxies = self.agent_p