首先声明这里说的实时更新是指2~3小时更新一次,因为店铺太多没有办法实现秒级的更新,除非堆资源。要想实时更新就需要解决下面几个问题:
1.各个平台商家后台子账号
2.各个子账号后台登录问题
3.频繁登录风控问题
4.速度问题
第一个问题是业务问题可以忽略,第二个问题可以借助RPA工具来自动实现,但又不能兼顾第三个和第四个问题,抛开风控不谈几百家店每次登录验证需要大量的时间。
以下是经过实践的可行性方案:
1.GKLogin手动或自动登录一次后一直保持登录状态,之后可以不需要验证直接登录获取cookie,以下21号登录过的店可直接登录进去连验证也不需要,同时可以分任务分批次当天获取更新一次cookie
2.通过cookie直接获取数据再解析,代码如下
import requests
from datetime import datetime
import pymssql
createTime = datetime.now()
db = pymssql.connect('服务器', '账号', '密码', '数据库', charset='utf8')
cursor = db.cursor()
cookie_sql = f'''select [平台店铺ID],[请求头],[Cookie] ,平台店铺名称 FROM AiData.dbo.[表名] a
where 平台='京东' and [Cookie] is not null
'''
cursor.execute(cookie_sql)
result_login = cursor.fetchall()
db.commit()
#清数据
delete_sql = f'''delete from [sycm].[dbo].[realTime_data ]
'''
cursor.execute(delete_sql)
db.commit()
for rec in result_login:
cookie = rec[2]
shopId = rec[0]
shopName = rec[3].encode('latin1').decode('gbk')
headers = {
"Host": "szgateway.jd.com",
"Connection": "keep-alive",
"sec-ch-ua": "\"Google Chrome\";v=\"119\", \"Chromium\";v=\"119\", \"Not?A_Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"Accept": "application/json, text/plain, */*",
"User-mup": "1716270999401",
"p-pin": "%E7%8C%AB%E4%BA%BA%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97_%E4%B8%AD%E5%8F%B0",
"uuid": "788240d8-802f-4283-b025-8fef812f67c4",
"User-mnp": "1d6de85b92a6c65a55cc009fa6767d9a",
"sec-ch-ua-platform": "\"Windows\"",
"Origin": "https://shop.jd.com",
"Sec-Fetch-Site": "same-site",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Dest": "empty",
"Referer": "https://shop.jd.com/jdm/home/",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
'Cookie': f"{cookie}"
}
url = f'https://szgateway.jd.com/szajax/sz/jm/shopdata/findOverviewData.ajax?'#.format()
json_data = requests.request("GET", url, headers=headers).json()
print(shopName,json_data)
try:
data_list = json_data['content']['shopDataModuleItemVOS']
# 提取需要的字段并构建新的列表
rows = [{'thisMonthNumber': item['thisMonthNumber'],'realTimeNumber': item['realTimeNumber'], 'yesterdayNumber': item['yesterdayNumber'], 'name': item['name']} for item in data_list]
# 转换为DataFrame
for data in rows:
insert_query = """
INSERT INTO [sycm].[dbo].[realTime_data ] (platform,shopName,shopId,thisMonthNumber, realTimeNumber,yesterdayNumber,[name],[createTime])
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
"""
cursor.execute(insert_query, ('京东',shopName,shopId,data['thisMonthNumber'],data['realTimeNumber'], data['yesterdayNumber'],data['name'],createTime))
db.commit()
except Exception as e:
#print(e)
print(shopName)
cursor.close()
db.close()
可+luocy0207