难点:js加密分析、请求头参数构造
# - * - coding : utf-8 - * -
"""====================================================================================================================
function : http://www.titanmatrix.com/tgxx
按照品牌和系列可以看到选型参数
不同参数的选择可以得到不同的订货号
===================================================================================================================="""
import os
import csv
import json
import time
import execjs
import random
import hashlib
import requests
import pandas as pd
from loguru import logger
from bs4 import BeautifulSoup
from datetime import datetime
from itertools import product
from utils.request import Request
class TG():
def __init__(self):
self.rq = Request()
self.headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'apiVersion': '1.0',
'Content-Type': 'application/json;charset=UTF-8',
'Host': 'macafe.titanmatrix.com',
'Origin': 'https://www.titanmatrix.com',
'Referer': 'https://www.titanmatrix.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36'
}
self.detail_headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'apiVersion': '1.0',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Length': '390',
'Content-Type': 'application/json;charset=UTF-8',
'Host': 'macafe.titanmatrix.com',
'Origin': 'https://www.titanmatrix.com',
'Referer': 'https://www.titanmatrix.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
self.writer = pd.ExcelWriter('天工数据.xlsx')
self.treated_combines = []
def gen_sign(self, u, rank, seriesid):
"""需要手动或通过selenium拿到seriesid"""
t = '201010'
f = '{"system":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36","version":"1.0.0"}' # 不要空格!
s = {"serviceId":"a4966e02741c4cc091fe1834d00f149c","sid":seriesid,"pkey":"","withParam":True,"noWaterMark":True}
s = "¶m=" + json.dumps(s)
s = s.replace(' ', '')
l = rank
i = ''
r = "72933362EAA649B893699E6191BC898F"
h = "appid=" + t + "&client=" + f + s + "&rank=" + l + "×tamp=" + u + "&token=" + i + "&key=" + r
# print(h)
with open('get_sign.js', 'r', encoding='UTF-8') as file:
js_file = file.read()
context = execjs.compile(js_file)
sign = context.call("c", h)
# print(sign)
return sign
def gen_sign_more(self, u, rank, seriesid, pkey, pid, id): #
t = '201010'
f = '{"system":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36","version":"1.0.0"}' # 不要空格!
s = {"serviceId":"a4966e02741c4cc091fe1834d00f149c","sid":seriesid,"pkey":pkey,"action":{"type":"SELECT_PROP","payload":{"pid":pid,"id":id}},"withParam":True,"noWaterMark":True}
s = "¶m=" + json.dumps(s)
s = s.replace(' ', '')
l = rank
i = ''
r = "72933362EAA649B893699E6191BC898F"
h = "appid=" + t + "&client=" + f + s + "&rank=" + l + "×tamp=" + u + "&token=" + i + "&key=" + r
# print(h)
with open('get_sign.js', 'r', encoding='UTF-8') as file:
js_file = file.read()
context = execjs.compile(js_file)
sign = context.call("c", h)
# print(sign)
return sign
def gen_payload(self, seriesid, rank, u, sign):
payload = {
"appid": "201010",
"client": {
"system": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
"version": "1.0.0"
},
"param": {
"serviceId": "a4966e02741c4cc091fe1834d00f149c",
"sid": seriesid,
"pkey": "",
"withParam": True,
"noWaterMark": True
},
"timestamp": u,
"rank": rank,
"sign": sign
}
return payload
def gen_more_payload(self, seriesid, rank, u, sign, pkey, pid, id):
payload = {
"appid": "201010",
"client": {
"system": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
"version": "1.0.0"
},
"param": {
"serviceId": "a4966e02741c4cc091fe1834d00f149c",
"sid": seriesid,
"pkey": pkey,
"action": {
"type": "SELECT_PROP",
"payload": {
"pid": pid, # 具体参数对应的id 顺序不能颠倒!
"id": id # 这一类参数对应的id
}
},
"withParam": True,
"noWaterMark": True
},
"timestamp": u,
"rank": rank,
"sign": sign
}
return payload
def request(self, payload):
url = 'https://macafe.titanmatrix.com/macafe/getState'
i = 0
while i < 5:
try:
# response = requests.post(url, data=json.dumps(payload), headers=headers)
response = self.rq.requests_post(url, data=json.dumps(payload), headers=self.headers)
return response
except:
i += 1
print(f'try {i} times')
else:
raise
def get_all_combines(self, all_para):
choice_options = list(all_para.values())
all_combines = []
for i in product(*choice_options):
all_combines.append(i)
return all_combines
def get_replace_combine(self, replace_para, now_value, combine):
"""获取一个替换值"""
ids = replace_para['ids'] # 元组
# 获取ids中非now_value的值
for id in ids:
if id != now_value:
replace_id = id
break
else:
raise
new_combine = []
for i in combine:
if i != now_value:
new_combine.append(i)
else:
new_combine.append(replace_id)
return new_combine
def get_datas(self, seriesid, all_para, replace_para, csv_f, treated_models):
# 获取需要替换的位置索引
index = replace_para['index']
pid = replace_para['pid']
all_combines = self.get_all_combines(all_para)
print('all_combines', len(all_combines))
datas = []
for i, combine in enumerate(all_combines): # 例如 获取组合12345的数据,则先将1替换成0,再id位置放置1对应的id,前提是1对应的参数有多个选项,每个组合都只需要替换这个位置即可!!定义为replace_para_values
logger.info(f'{i}, {combine}')
time.sleep(0.1)
# 获取combine中对应位置的值
now_value = combine[index]
new_combine = self.get_replace_combine(replace_para, now_value, combine)
# print('new_combine', new_combine)
u = datetime.now().strftime('%Y/%m/%d %H:%M:%S')
rank = str(random.randint(1000000000000000, 9999999999999999))
pkey = '_'.join([str(i) for i in new_combine])
sign = self.gen_sign_more(u, rank, seriesid, pkey, pid, now_value)
payload = self.gen_more_payload(seriesid, rank, u, sign, pkey, pid, now_value)
# print(payload)
response = self.request(payload)
result = json.loads(response.text)
# print('result', result)
entity = result['entity']
name = entity['name']
mcode = entity['mcode']
price = entity['price']
date = entity['version']
paras = entity['props']
para_values = [] # 选择的参数
for para in paras:
para_items = para['items']
for para_item in para_items:
is_selected = para_item['selected']
if is_selected:
para_values.append(para_item['name'])
continue
data = [name, mcode, price, date] + para_values
print(data)
if name not in treated_models:
csv_f.writerow(data)
datas.append(data)
return datas
def treat_seriesid(self, seriesid):
"""这里获取组合的参数"""
company_name, series_name, seriesid = seriesid[0], seriesid[1], seriesid[2]
f = open(f'{company_name}_{series_name}.csv', 'a+', encoding='utf-8', newline='')
csv_f = csv.writer(f)
with open(f'{company_name}_{series_name}.csv', encoding='utf-8') as f1:
treated_models = f1.read().split('\n')
treated_models = [i.split(',')[0] for i in treated_models]
u = datetime.now().strftime('%Y/%m/%d %H:%M:%S')
rank = str(random.randint(1000000000000000, 9999999999999999))
sign = self.gen_sign(u, rank, seriesid)
payload = self.gen_payload(seriesid, rank, u, sign)
response = self.request(payload)
result = json.loads(response.text)
# print('result', result)
entity = result['entity']
# 部件名称 订货号 表价 日期
paras = entity['props']
para_names = [] # 参数字段名称
all_para = {} # 所有参数 去掉只有一个参数的,因为不用遍历
replace_para = {}
for index, para in enumerate(paras):
para_id = para['id']
items_ids = []
para_name = para['name'].replace(':', '')
para_names.append(para_name)
para_items = para['items']
if not replace_para and len(para_items) > 1:
replace_para['index'] = index
replace_para['ids'] = tuple([i['id'] for i in para_items])
replace_para['pid'] = para_id
# print(para_name, para_items)
for para_item in para_items:
items_ids.append(para_item['id'])
all_para[para_id] = items_ids
# print('all para', all_para)
# print('replace_para', replace_para)
columns = ['部件名称', '订货号', '表价', '日期'] + para_names
# print('columns', columns)
csv_f.writerow(columns)
datas = self.get_datas(seriesid, all_para, replace_para, csv_f, treated_models) # 先假设所有参数不变 TODO:参数变化
df = pd.DataFrame(data=datas, columns=columns)
df.to_excel(self.writer, encoding='utf-8', index=False, sheet_name=f'{company_name}_{series_name}')
def main(self):
series_ids = [('常熟开关制造有限公司', 'CH3N-63系列小型断路器', 24031),
('北元电器有限公司', 'BB1L-63系列小型漏电断路器', 22234)]
for series_id in series_ids:
logger.info(series_id)
self.treat_seriesid(series_id)
break
self.writer.save()
if __name__ == '__main__':
a = TG()
a.main()
注:本文仅可用于技术交流,请勿用于非法用途,欢迎咨询(q 1461124250)。