基于用户误触误杀频次在新项目上变高。需要在抓取到用户的点击数据后生成点击图与热力图便于修改手机防误触防误杀算法
由于公司内网此处有些内容后期制作
一.数据获取
在公司有内部手机产品体验用户的beta问题网站和问题打点网站,这里采用python爬虫的方式去爬取网站用户提交的inputlog作为原始数据
1.使用Fiddler抓包工具分析网站内容
刚开始抓包只用了正常的session登录后发现无法正常抓取也没有重定向,然后使用selenium抓取发现速度跳转网页的速度太慢。
通过Fiddler抓包清洗网站后发现网站登录后获取的session格式与后续响应的cookies格式不同
查看fiddler后发现多了一条参数通过中间用一个链接重新设置了cookies的一个参数,重新设置再次访问后就可得到正常的数据
2.抓取数据
通过抓包工具分析后抓取数据,下面是抓取一个提交的问题log
import requests
import json
# 都不用,直接格式正确就ok,直接用session不行?
# 登录后获取的session格式与后续响应的cookies格式不同
# 查看fiddler后发现多了一条参数hwssot3 把字典获取下来后重新拼接
# https://xxxxx/login post 登录url
# 登陆后会出现setcookie
# http://xxxxxx/2102292844 响应
# 响应对象['data']['downloadPath'] = 下载链接
# 响应对象['data']['fileName'] = 文件名
# 参数为session对象 用途:在登录url获取cookie格式的cookie
def getcookies(session):
login_url = 'https://auth.hihonor.com/login1/rest/hwidcenter/login'
formData = {
# 'cid': "",
# 'device': "36aff3e36780f2539de476e988127f6c",
# 'lang': "zh_CN",
'username': "xxxx",
'password': "xxx",
'targetUrl': "httpxxxxxxxx",
'uid': "xxxxx"
}
login_headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection': 'keep-alive',
'Content-Length': '240',
'Content-Type': 'application/json; charset=UTF-8',
'Host': 'xxxxxx.com',
'Origin': 'https://xxxxxxx.com',
# 'Referer': https://xxxxxx.com/login1/?redirect=http%3A%2F%2Fbetaclub.hihonor.com%2Fflight%2Fplatform%2F%23%2FdataCenter%2FquestionAll
'sec-ch-ua': '" Not;A Brand";v="99", "Microsoft Edge";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': 'Windows',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.62'
}
session.post(url=login_url, data=json.dumps(formData), headers=login_headers)
cookies_dict = requests.utils.dict_from_cookiejar(session.cookies)
del cookies_dict['auth__sticky']
del cookies_dict['auth_login1_sticky']
del cookies_dict['JSESSIONID']
cookies_dict['hwssot3'] = cookies_dict['hwssotinter3']
# cookies = json.dumps(cookies_dict)
# print(cookies)
cookies_1 = ""
for key in cookies_dict:
cookies_1 = cookies_1 + key + '=' + cookies_dict[key] + ';'
return cookies_1
#
# url_mudi =
# url1 =
# response = session.get(url1,headers=headers)
# aaa = response.headers['Set-Cookie']
#
# print(type(aaa))
# print(aaa)
#
# print(aaa.split(',')[-1].split(';')[0])
# print(aaa.split(';')[0])
# cookies_dict['hwssot3'] = aaa.split(';')[0].split('=')[-1]
def downloadfile(cookie):
url2 = 'http://xxxxxx.com/flight/operaserver/attach/getQuesAttachProcess/2102296088'
# cookies_1 = ""
# for key in cookies_dict:
# cookies_1 = cookies_1 + key + '=' +cookies_dict[key] + ';'
headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection': 'keep-alive',
'Cookie': cookies,
'Host': 'xxxxxx.com',
'Referer': 'http://xxxxxxx.com/flight/platform/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37'
}
response = session.get(url2, headers=headers)
print(response.text)
bbb = json.loads(response.text)
response3 = session.get(bbb['data'][0]['downloadPath'], headers=headers)
with open(bbb['data'][0]['fileName'], 'wb')as fp:
fp.write(response3.content)
fp.close()
session = requests.session()
cookies = getcookies(session)
downloadfile(cookies)
二.图像展示
数据的压缩包获取后对压缩包进行解压后把关于点击划线动作的inputlog 坐标用数组的方式清晰出来用HeatMap画图
import os
import rarfile
import pandas as pd
from pyheatmap.heatmap import HeatMap
path = r'C:\Users\.\Downloads\adb1.0.32\adb/'
#解压该目录下所有文件
def dealfile(path):
for filepath, dirnames, filenames in os.walk(path):
for filename in filenames:
print(filename)
if '.rar' in filename:
print(os.path.join(filepath, filename))
rar = rarfile.RarFile(os.path.join(filepath, filename))
# 解压缩到指定目录
rar.extractall(path + filename.replace('.rar', ''))
os.remove(os.path.join(filepath, filename))
path = filepath + filename.replace('.rar', '')
dealfile(path)
#处理log文件为数组
def dealfile2(log):
read_csv = pd.read_csv(log)
dian = []
aa = 0
for i in read_csv[read_csv.columns[0]]:
if i.find("ABS_MT_POSITION_X") != -1 or i.find("ABS_MT_POSITION_Y") != -1:
if i.find("ABS_MT_POSITION_X") != -1:
dian.append([0,0])
# print(dian)
dian[aa][0] = int(i[71:79], 16)
print("X", i[71:79], int(i[71:79], 16))
if i.find("ABS_MT_POSITION_Y") != -1:
dian[aa][1] = int(i[71:79], 16)
print("Y", i[71:79], int(i[71:79], 16))
aa = aa + 1
return dian
shuju = []
dealfile(path)
for filepath, dirnames, filenames in os.walk(path):
for filename in filenames:
if '.log' in filename:
for i in dealfile2(os.path.join(filepath, filename)):
shuju.append(i)
hm = HeatMap(shuju)
hm.clickmap(save_as="hit100.png")
hm.heatmap(save_as="heat100.png",r=30)
结果如下。可以看到用户所点击的位置和热区