首先是js逆向
orderType = 0
uuid = "18953ca9f19c8-0eaf202d6b863a-26031d51-144000-18953ca9f1ac8"
timeStamp = int(time.time() * 1000)
key = "A013F70DB97834C0A5492378BD76C53A"
UA = "TW96aWxsYS81LjAgKFdpbmRvd3MgTlQgMTAuMDsgV2luNjQ7IHg2NCkgQXBwbGVXZWJLaXQvNTM3LjM2IChLSFRNTCwgbGlrZSBHZWNrbykgQ2hyb21lLzExNC4wLjAuMCBTYWZhcmkvNTM3LjM2"
index = 1000 * random.random() + 1
enstr = f"method=GET&timeStamp={timeStamp}&User-Agent={UA}&index={index}&channelId=40009&sVersion=2&key={key}"
signKey = hashlib.md5(enstr.encode(encoding='utf-8')).hexdigest()
url = f"https://piaofang.maoyan.com/dashboard-ajax?orderType=0&uuid={uuid}&timeStamp={timeStamp}&User-Agent={UA}&index={index}&channelId=40009&sVersion=2&signKey={signKey}"
headers = {
"Accept": "application/json, text/plain, */*",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Referer": "https://piaofang.maoyan.com/dashboard"
}
其次是字体
font = ImageFont.truetype('font.woff', 40)
for uchar in uni_list[:10]:
unknown_char = f"\\u{uchar[3:]}".encode().decode('unicode_escape')
im = Image.new(mode='RGB', size=(42, 40), color="white")
draw = ImageDraw.Draw(im=im)
draw.text(xy=(0, 0), text=unknown_char, fill=0, font=font)
img_byte = io.BytesIO()
im.save(img_byte, format='JPEG')
charList.append(ocr.classification(img_byte.getvalue()))
然后是字体一对一的与网站爬取的相对应,这里我用的是调用函数
def func(d):
for i in range(0, 10):
if d == new_lower_list[i]:
d = dict_list[i]
w.append(d[new_lower_list[i]])
for p in num1:
func(p)
以下是源码:代码很乱
import io
import random
import hashlib
import requests
import time
import re
from fontTools.ttLib import TTFont
from PIL import Image, ImageFont, ImageDraw
import ddddocr
ocr = ddddocr.DdddOcr()
orderType = 0
uuid = "18953ca9f19c8-0eaf202d6b863a-26031d51-144000-18953ca9f1ac8"
timeStamp = int(time.time() * 1000)
key = "A013F70DB97834C0A5492378BD76C53A"
UA = "TW96aWxsYS81LjAgKFdpbmRvd3MgTlQgMTAuMDsgV2luNjQ7IHg2NCkgQXBwbGVXZWJLaXQvNTM3LjM2IChLSFRNTCwgbGlrZSBHZWNrbykgQ2hyb21lLzExNC4wLjAuMCBTYWZhcmkvNTM3LjM2"
index = 1000 * random.random() + 1
enstr = f"method=GET&timeStamp={timeStamp}&User-Agent={UA}&index={index}&channelId=40009&sVersion=2&key={key}"
signKey = hashlib.md5(enstr.encode(encoding='utf-8')).hexdigest()
url = f"https://piaofang.maoyan.com/dashboard-ajax?orderType=0&uuid={uuid}&timeStamp={timeStamp}&User-Agent={UA}&index={index}&channelId=40009&sVersion=2&signKey={signKey}"
headers = {
"Accept": "application/json, text/plain, */*",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Referer": "https://piaofang.maoyan.com/dashboard"
}
response = requests.get(url=url, headers=headers)
# response.encoding = "utf-8"
# page_text = response.text
fonturl = 'http:' + re.search('opentype"\),url\("(//.*?\.woff)"', response.json()['fontStyle']).group(1)
r = requests.get(url=fonturl)
with open('font.woff', 'wb') as f:
f.write(r.content)
f.close()
tfont = TTFont('font.woff')
uni_list = tfont.getGlyphOrder()[2:]
lower_list = [element.lower() for element in uni_list]
print('uni列表', uni_list)
print('lower_list', lower_list)
charList = []
new_lower_list = []
dict_list = []
w = []
def func(d):
for i in range(0, 10):
if d == new_lower_list[i]:
d = dict_list[i]
w.append(d[new_lower_list[i]])
# print("-----",d,d[new_lower_list[i]])
for i in lower_list:
a = "&#x" + i.replace("uni", "")
new_lower_list.append(a)
print('new_lower_list------', new_lower_list)
font = ImageFont.truetype('font.woff', 40)
for uchar in uni_list[:10]:
unknown_char = f"\\u{uchar[3:]}".encode().decode('unicode_escape')
im = Image.new(mode='RGB', size=(42, 40), color="white")
draw = ImageDraw.Draw(im=im)
draw.text(xy=(0, 0), text=unknown_char, fill=0, font=font)
img_byte = io.BytesIO()
im.save(img_byte, format='JPEG')
charList.append(ocr.classification(img_byte.getvalue()))
print('对应字符', charList)
page_dict = response.json()
print(len(page_dict['movieList']['data']['list']))
for i in page_dict['movieList']['data']['list']:
movieName = i['movieInfo']['movieName']
num = i["boxSplitUnit"]["num"] + i["boxSplitUnit"]["unit"]
# 上映天数
releaseInfo = i["movieInfo"]["releaseInfo"]
# 排片场次
showCount = i["showCount"]
num1 = num.replace(".", "").split(";")
# print("列表的票房num1",num1)
for i in range(0, 10):
dict = {new_lower_list[i]: charList[i]}
dict_list.append(dict)
for p in num1:
func(p)
new_num1 = ''.join(w)
# print(new_num1)
new_num2 = new_num1[0:-2] + '.' + new_num1[-2:]
w.clear()
print(f"电影名字:{movieName},综合票房:{new_num2}万,上映天数:{releaseInfo},排片场次:{showCount}")