python 生意参谋_电商 Python 抓取 生意参谋-访客数据

import requests

import json

import pandas as pd

import time

# getRtVisitor.json

session = requests.Session() # 创建一个session对象

headers = {

‘accept‘:‘*/*‘,

‘accept-encoding‘:‘gzip, deflate, br‘,

‘accept-language‘:‘en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7‘,

‘cookie‘:‘t=4a09a3c800e9591a2c198b124d80e02a; cna=G2veFsG5YX0CAQ5pIwfTWHFB; lgc=%5Cu5927%5Cu5468%5Cu4F73; tracknick=%5Cu5927%5Cu5468%5Cu4F73; tg=0; _euacm_ac_l_uid_=1639181234; 1639181234_euacm_ac_c_uid_=1639181234; 1639181234_euacm_ac_rs_uid_=1639181234; _portal_version_=new; cc_gray=1; thw=cn; mt=ci=84_1; _euacm_ac_rs_sid_=155550734; enc=Rk6EFG1Zi%2F5sBqHEJADmGhcHg%2F8HVbpOVQDb72MK8zmamaAE7C23fINFdlW5BT%2FNfkRKZLPkB8gKw%2Bj0cK0hig%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; cookie2=1a5b17269a2a570500957e7dcce65c45; _tb_token_=35e1e66785eb7; _samesite_flag_=true; sgcookie=Ep88649ag1i3VPa4P6akw; unb=1639181234; uc3=vt3=F8dBxd9nWWLAbiRWGZE%3D&lg2=UtASsssmOIJ0bQ%3D%3D&nk2=1z8nwQRn&id2=Uoe3fo1fFq44Zg%3D%3D; csg=aa9633c3; cookie17=Uoe3fo1fFq44Zg%3D%3D; dnk=%5Cu5927%5Cu5468%5Cu4F73; skt=69bf89596c5fe0d1; existShop=MTU4NDU4MTc0MQ%3D%3D; uc4=nk4=0%401fDckZcjfHBEZVI1NQCO3RY%3D&id4=0%40UO%2BxIxkhtoiLbBQVUeEnpSn1KHbH; _cc_=U%2BGCWk%2F7og%3D%3D; _l_g_=Ug%3D%3D; sg=%E4%BD%B34e; _nk_=%5Cu5927%5Cu5468%5Cu4F73; cookie1=BYXJ7V2Aq8c%2FnceEFKLACXxZ7qw2VwJYwgQbxY%2Feb6A%3D; tfstk=cKiNBsfOWhKwV3THPur4G7Z13THOZAHinMy7S4xicwaWPJ4GiQfYKvNUL-NJxPf..; v=0; _m_h5_tk=8b49ef5c39700daef25a92f8c520cc65_1584590383214; _m_h5_tk_enc=4d9374ef7e4743b408cb6247793448be; uc1=cookie14=UoTUPvXUU0lRTg%3D%3D&lng=zh_CN&cookie16=Vq8l%2BKCLySLZMFWHxqs8fwqnEw%3D%3D&existShop=true&cookie21=UtASsssmfavZrexPkAwn7A%3D%3D&tag=8&cookie15=W5iHLLyFOGW7aA%3D%3D&pas=0; XSRF-TOKEN=673a0ad9-3e96-4fb2-bbd9-af3ffa50a9bc; JSESSIONID=B0A57AC70F717D1231606D512F674ECB; l=dBjCTtrcQuukdAdWBOfgqK_ahxbOrIRbzsPPlS9CCICP9O1wrJp1WZ4V8-8eCnGN36J6R3WhGKB3BqLTFPaOhtikBBrsDOsCydTBR; isg=BAoK5gRhYDd8MOy0NcE2jJ5BW_Cs-45VYmG9hZRDct30R6gBfIkRZ2NxV7ubtwbt‘,

‘referer‘:‘https://sycm.taobao.com/ipoll/visitor.htm?spm=a21ag.7622617.LeftMenu.d181.758a1be9MfheaI‘,

‘sec-fetch-mode‘:‘cors‘,

‘sec-fetch-site‘:‘same-origin‘,

‘user-agent‘:‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36‘

}

def loadPage(page):

strPage=str(page)

strToken=‘6f5921e86‘

strLastPara=‘1584581855701‘

url = ‘https://sycm.taobao.com/ipoll/live/visitor/getRtVisitor.json?device=2&limit=20&page=‘+strPage+‘&token=‘+strToken+‘&type=Y&_=‘+strLastPara

# https://sycm.taobao.com/ipoll/live/visitor/getRtVisitor.json?device=2&limit=20&page=2&token=97890b4c6&type=Y&_=1584409617907

req = session.get(url, headers=headers) #发起get请求

text = req.text

with open(str(page)+".txt", "w") as f:

f.write(text)

return json.loads(text)

def GetValueFromDic(item, key, defaultValue=None):

if key in item:

return item[key]

if defaultValue is None:

return ‘‘

return defaultValue

def appendList(json_res):

list_res = json_res[‘data‘][‘data‘][‘list‘]

for item in list_res:

arr=[]

if item[‘pageTypeId‘] == 5:# 商品分类页

arr.append(item[‘visitTime‘])

arr.append(‘其它来源‘)

arr.append(‘‘)# 搜索关键字

arr.append(‘商品分类页‘)

arr.append(‘‘)# 访客位置

arr.append(item[‘oid‘])

else:

arr.append(GetValueFromDic(item, ‘visitTime‘))

arr.append(GetValueFromDic(item, ‘srcGrpName‘, ‘其它来源‘))

arr.append(GetValueFromDic(item, ‘preSeKeyword‘))

arr.append(GetValueFromDic(item, ‘title‘))

arr.append(GetValueFromDic(item, ‘cityName‘))

arr.append(GetValueFromDic(item, ‘oid‘))

list_excel.append(arr)

# list集合

list_excel=[]

# 数据源

json_res = loadPage(1)

# 解析数据

totalPage = json_res[‘data‘][‘data‘][‘totalPage‘]

curPage = json_res[‘data‘][‘data‘][‘page‘]

print(‘总页数:‘, totalPage, ‘当前页:‘, curPage)

# 追加数据

appendList(json_res)

# 循环下载

for i in range(totalPage-1):

page = i + 2

print(‘当前页:‘, page)

json_res = loadPage(page)

appendList(json_res)

# 转换为DataFrame

#df = pd.DataFrame(list_excel, columns=[‘visitTime‘, ‘srcGrpName‘, ‘preSeKeyword‘, ‘title‘, ‘cityName‘, ‘oid‘])

df = pd.DataFrame(list_excel, columns=[‘访问时间‘, ‘入店来源‘, ‘搜索关键字‘, ‘被访页面‘, ‘访客位置‘, ‘访客编号‘])

# 保存到本地excel

curTime = time.strftime(‘%Y-%m-%d %H%M%S‘,time.localtime(time.time()))

df.to_excel(‘访客-‘+curTime+‘.xlsx‘, index=False)

原文:https://www.cnblogs.com/guxingy/p/12932364.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
生意参谋-市场大盘,市场排行爬虫脚本,采用pyppeteer+PIL方式处理数值图片问题,人工登录,并将爬取结果保存为csv文件 脚本截取: import asyncio from pyppeteer import launch from PIL import Image import pytesseract import datetime import time import pandas as pd async def main(): start_time = datetime.datetime.now() print('浏览器初始化……') browser = await launch(headless=False, args=['--disable-infobars',f'--window-size=1366,768']) page = await browser.newPage() await page.setViewport({'width': 1366, 'height': 768}) await page.goto('https://login.taobao.com/member/login.jhtml?redirectURL=https://www.taobao.com/') #从淘宝登录 await page.evaluate( '''() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } }) }''') await asyncio.sleep(30) #人工输入账号密码登录 print('开始登录……') await page.goto('https://sycm.taobao.com/mc/mq/overview?cateFlag=2&cateId=50022270&dateType=week') #登录后跳转到生意参谋 await asyncio.sleep(10) #await page.click('#content-container > div.ebase-metaDecorator__root > div > div.ebase-FaCommonFilter__root > div > div > div > div > div.ebase-FaCommonFilter__top > div.ebase-FaCommonFilter__right > div > div > div.oui-date-picker-particle-button > button.ant-btn.oui-canary-btn.ant-btn-primary.ant-btn-sm > span') await page.evaluate( '''() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } }) }''') #uvHits = await page.waitForSelector('#sycm-mc-mq-cate-trend > div.oui-card-content.alife-one-design-sycm-indexes-trend-card-content > div > div.alife-one-design-sycm-indexes-trend-index-container > div > div > div:nth-child(1) > div.alife-one-design-sycm-indexes-trend-index-item-multiple-line-selectable.alife-one-design-sycm-indexes-trend-index-item-uvHits.active > div > div.oui-index-cell-indexValue.oui-num > div > div:nth-child(2) > img') #await uvHits.screenshot({'path': 'uvHits.png'}) #await asyncio.sleep(3) print('开始爬取市场大盘……')

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值