1.极验登录
import re
import requests
from lxml import etree
from fontTools.ttLib import TTFont
def get_dict(url):
#向.css文件地址发送请求 返回响应
response = requests.get(url=url,headers=headers)
#利用正则提取woff文件路径
woff = re.compile(r',url\("(.*?)"')
woff_pattern = woff.findall(response.text)
for woff_list in woff_pattern:
#获取woff文件名
woff_name = woff_list.split('/')[-1]
#拼接完整woff_list地址
woff_list_full = 'http:' + woff_list
#发送请求 下载woff文件
woff_response = requests.get(url=woff_list_full,headers=headers)
#保存文件
with open('./css/' + woff_name, 'wb' ) as fp:
fp.write(woff_response.content)
#读取文件
woff_font = TTFont('./css/' + woff_name)
#保存xml
# woff_font.saveXML('woff.xml')
#获取16位进制
woff_content = woff_font.getGlyphOrder()
keys = woff_content[2::]
for k,v in zip(keys,woff_li):
data[k]=v
...
def get_content():
# 发送请求
response = requests.get(base_url,headers=headers)
#拿到请求页面
content = response.text
#利用正则规则 获取.css文件
woff = re.compile(r'href="(//s3plus.*?)"')
#查找 .css文件路径并取出
woff_pattern = woff.findall(content)[0]
#拼接网址
woff_url = 'http:' + woff_pattern
get_dict(woff_url)
#开始替换
for k,v in data.items():
#替换
content = content.replace('&#x' + k[3::]+';',v )
print(content)
if __name__ == '__main__':
data ={
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'
}
# 定义基础url
base_url='http://www.dianping.com/beijing/ch10'
woff_li = ['1', '2', '3', '4', '5', '6', '7', '8',
'9', '0', '店', '中', '美', '家', '馆', '小', '车', '大',
'市', '公', '酒', '行', '国', '品', '发', '电', '金', '心',
'业', '商', '司', '超', '生', '装', '园', '场', '食', '有',
'新', '限', '天', '面', '工', '服', '海', '华', '水', '房',
'饰', '城', '乐', '汽', '香', '部', '利', '子', '老', '艺',
'花', '专', '东', '肉', '菜', '学', '福', '饭', '人', '百',
'餐', '茶', '务', '通', '味', '所', '山', '区', '门', '药',
'银', '农', '龙', '停', '尚', '安', '广', '鑫', '一', '容',
'动', '南', '具', '源', '兴', '鲜', '记', '时', '机', '烤',
'文', '康', '信', '果', '阳', '理', '锅', '宝', '达', '地',
'儿', '衣', '特', '产', '西', '批', '坊', '州', '牛', '佳',
'化', '五', '米', '修', '爱', '北', '养', '卖', '建', '材',
'三', '会', '鸡', '室', '红', '站', '德', '王', '光', '名',
'丽', '油', '院', '堂', '烧', '江', '社', '合', '星', '货',
'型', '村', '自', '科', '快', '便', '日', '民', '营', '和',
'活', '童', '明', '器', '烟', '育', '宾', '精', '屋', '经',
'居', '庄', '石', '顺', '林', '尔', '县', '手', '厅', '销',
'用', '好', '客', '火', '雅', '盛', '体', '旅', '之', '鞋',
'辣', '作', '粉', '包', '楼', '校', '鱼', '平', '彩', '上',
'吧', '保', '永', '万', '物', '教', '吃', '设', '医', '正',
'造', '丰', '健', '点', '汤', '网', '庆', '技', '斯', '洗',
'料', '配', '汇', '木', '缘', '加', '麻', '联', '卫', '川',
'泰', '色', '世', '方', '寓', '风', '幼', '羊', '烫', '来',
'高', '厂', '兰', '阿', '贝', '皮', '全', '女', '拉', '成',
'云', '维', '贸', '道', '术', '运', '都', '口', '博', '河',
'瑞', '宏', '京', '际', '路', '祥', '青', '镇', '厨', '培',
'力', '惠', '连', '马', '鸿', '钢', '训', '影', '甲', '助',
'窗', '布', '富', '牌', '头', '四', '多', '妆', '吉', '苑',
'沙', '恒', '隆', '春', '干', '饼', '氏', '里', '二', '管',
'诚', '制', '售', '嘉', '长', '轩', '杂', '副', '清', '计',
'黄', '讯', '太', '鸭', '号', '街', '交', '与', '叉', '附',
'近', '层', '旁', '对', '巷', '栋', '环', '省', '桥', '湖',
'段', '乡', '厦', '府', '铺', '内', '侧', '元', '购', '前',
'幢', '滨', '处', '向', '座', '下', '県', '凤', '港', '开',
'关', '景', '泉', '塘', '放', '昌', '线', '湾', '政', '步',
'宁', '解', '白', '田', '町', '溪', '十', '八', '古', '双',
'胜', '本', '单', '同', '九', '迎', '第', '台', '玉', '锦',
'底', '后', '七', '斜', '期', '武', '岭', '松', '角', '纪',
'朝', '峰', '六', '振', '珠', '局', '岗', '洲', '横', '边',
'济', '井', '办', '汉', '代', '临', '弄', '团', '外', '塔',
'杨', '铁', '浦', '字', '年', '岛', '陵', '原', '梅', '进',
'荣', '友', '虹', '央', '桂', '沿', '事', '津', '凯', '莲',
'丁', '秀', '柳', '集', '紫', '旗', '张', '谷', '的', '是',
'不', '了', '很', '还', '个', '也', '这', '我', '就', '在',
'以', '可', '到', '错', '没', '去', '过', '感', '次', '要',
'比', '觉', '看', '得', '说', '常', '真', '们', '但', '最',
'喜', '哈', '么', '别', '位', '能', '较', '境', '非', '为',
'欢', '然', '他', '挺', '着', '价', '那', '意', '种', '想',
'出', '员', '两', '推', '做', '排', '实', '分', '间', '甜',
'度', '起', '满', '给', '热', '完', '格', '荐', '喝', '等',
'其', '再', '几', '只', '现', '朋', '候', '样', '直', '而',
'买', '于', '般', '豆', '量', '选', '奶', '打', '每', '评',
'少', '算', '又', '因', '情', '找', '些', '份', '置', '适',
'什', '蛋', '师', '气', '你', '姐', '棒', '试', '总', '定',
'啊', '足', '级', '整', '带', '虾', '如', '态', '且', '尝',
'主', '话', '强', '当', '更', '板', '知', '己', '无', '酸',
'让', '入', '啦', '式', '笑', '赞', '片', '酱', '差', '像',
'提', '队', '走', '嫩', '才', '刚', '午', '接', '重', '串',
'回', '晚', '微', '周', '值', '费', '性', '桌', '拍', '跟',
'块', '调', '糕']
get_content()
2.有道翻译
'''
js加密,是非常常见的加密算法 大部分的网页都会存在
js加密一般都是对参数进行加密 比如 salt sign token...
js加密的分析步骤:
1.找到那些参数在影响数据的获取?
需要做不痛的请求,对比参数,找出不同的参数即可
2.找到参数之后,需要查找这些参数生成的原理是什么?
(1) 这些参数可能是通过之前的一些请求传递过来的
(2) 参数是在某个js文件中生成的-->找到对应的js文件,分析js代码,得到参数的生成原理
'''
import requests,json
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Length': '239',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': 'OUTFOX_SEARCH_USER_ID=-837851221@121.69.97.22; JSESSIONID=aaaViJguhyZi2_EoPZTvx; OUTFOX_SEARCH_USER_ID_NCOO=1234353394.9478836; ___rl__test__cookies=1603866433807',
'Host': 'fanyi.youdao.com',
'Origin': 'http://fanyi.youdao.com',
'Referer': 'http://fanyi.youdao.com/',
}
'''
# 分析 salt
i = r + parseInt(10 * Math.random(), 10)
# 分析 sign
sign: n.md5("fanyideskweb" + e + i + "]BjuETDhU)zqSxf-=B#7m")
'''
ws = input('请输入单词:')
data ={
'i': ws,
'from': 'AUTO',
'to': 'AUTO',
'smartresult': 'dict',
'client': 'fanyideskweb',
'salt': '16038664338124',
'sign': 'b126f4248b97baf89971755d48c02f5b',
'lts': '1603866433812',
'bv': '43ba53ed4342a66afd304068011c3e0c',
'doctype': 'json',
'version': '2.1',
'keyfrom': 'fanyi.web',
'action': 'FY_BY_REALTlME',
}
response= requests.post(url='http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule',headers=headers,data=data)
print(response.json()['translateResult'][0][0]['tgt'])
3.百度翻译
# Define here the models for your spider middleware
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
from scrapy import signals
# useful for handling different item types with a single interface
from itemadapter import is_item, ItemAdapter
class Image360SpiderMiddleware:
# Not all methods need to be defined. If a method is not defined,
# scrapy acts as if the spider middleware does not modify the
# passed objects.
@classmethod
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_spider_input(self, response, spider):
# Called for each response that goes through the spider
# middleware and into the spider.
# Should return None or raise an exception.
return None
def process_spider_output(self, response, result, spider):
# Called with the results returned from the Spider, after
# it has processed the response.
# Must return an iterable of Request, or item objects.
for i in result:
yield i
def process_spider_exception(self, response, exception, spider):
# Called when a spider or process_spider_input() method
# (from other spider middleware) raises an exception.
# Should return either None or an iterable of Request or item objects.
pass
def process_start_requests(self, start_requests, spider):
# Called with the start requests of the spider, and works
# similarly to the process_spider_output() method, except
# that it doesn’t have a response associated.
# Must return only requests (not items).
for r in start_requests:
yield r
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)
class Image360DownloaderMiddleware:
# Not all methods need to be defined. If a method is not defined,
# scrapy acts as if the downloader middleware does not modify the
# passed objects.
@classmethod
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
# Called for each request that goes through the downloader
# middleware.
# Must either:
# - return None: continue processing this request
# - or return a Response object
# - or return a Request object
# - or raise IgnoreRequest: process_exception() methods of
# installed downloader middleware will be called
return None
def process_response(self, request, response, spider):
# Called with the response returned from the downloader.
# Must either;
# - return a Response object
# - return a Request object
# - or raise IgnoreRequest
return response
def process_exception(self, request, exception, spider):
# Called when a download handler or a process_request()
# (from other downloader middleware) raises an exception.
# Must either:
# - return None: continue processing this exception
# - return a Response object: stops process_exception() chain
# - return a Request object: stops process_exception() chain
pass
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)
4.大众点评
import re
import requests
from lxml import etree
from fontTools.ttLib import TTFont
def get_dict(url):
#向.css文件地址发送请求 返回响应
response = requests.get(url=url,headers=headers)
#利用正则提取woff文件路径
woff = re.compile(r',url\("(.*?)"')
woff_pattern = woff.findall(response.text)
for woff_list in woff_pattern:
#获取woff文件名
woff_name = woff_list.split('/')[-1]
#拼接完整woff_list地址
woff_list_full = 'http:' + woff_list
#发送请求 下载woff文件
woff_response = requests.get(url=woff_list_full,headers=headers)
#保存文件
with open('./css/' + woff_name, 'wb' ) as fp:
fp.write(woff_response.content)
#读取文件
woff_font = TTFont('./css/' + woff_name)
#保存xml
# woff_font.saveXML('woff.xml')
#获取16位进制
woff_content = woff_font.getGlyphOrder()
keys = woff_content[2::]
for k,v in zip(keys,woff_li):
data[k]=v
...
def get_content():
# 发送请求
response = requests.get(base_url,headers=headers)
#拿到请求页面
content = response.text
#利用正则规则 获取.css文件
woff = re.compile(r'href="(//s3plus.*?)"')
#查找 .css文件路径并取出
woff_pattern = woff.findall(content)[0]
#拼接网址
woff_url = 'http:' + woff_pattern
get_dict(woff_url)
#开始替换
for k,v in data.items():
#替换
content = content.replace('&#x' + k[3::]+';',v )
print(content)
if __name__ == '__main__':
data ={
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'
}
# 定义基础url
base_url='http://www.dianping.com/beijing/ch10'
woff_li = ['1', '2', '3', '4', '5', '6', '7', '8',
'9', '0', '店', '中', '美', '家', '馆', '小', '车', '大',
'市', '公', '酒', '行', '国', '品', '发', '电', '金', '心',
'业', '商', '司', '超', '生', '装', '园', '场', '食', '有',
'新', '限', '天', '面', '工', '服', '海', '华', '水', '房',
'饰', '城', '乐', '汽', '香', '部', '利', '子', '老', '艺',
'花', '专', '东', '肉', '菜', '学', '福', '饭', '人', '百',
'餐', '茶', '务', '通', '味', '所', '山', '区', '门', '药',
'银', '农', '龙', '停', '尚', '安', '广', '鑫', '一', '容',
'动', '南', '具', '源', '兴', '鲜', '记', '时', '机', '烤',
'文', '康', '信', '果', '阳', '理', '锅', '宝', '达', '地',
'儿', '衣', '特', '产', '西', '批', '坊', '州', '牛', '佳',
'化', '五', '米', '修', '爱', '北', '养', '卖', '建', '材',
'三', '会', '鸡', '室', '红', '站', '德', '王', '光', '名',
'丽', '油', '院', '堂', '烧', '江', '社', '合', '星', '货',
'型', '村', '自', '科', '快', '便', '日', '民', '营', '和',
'活', '童', '明', '器', '烟', '育', '宾', '精', '屋', '经',
'居', '庄', '石', '顺', '林', '尔', '县', '手', '厅', '销',
'用', '好', '客', '火', '雅', '盛', '体', '旅', '之', '鞋',
'辣', '作', '粉', '包', '楼', '校', '鱼', '平', '彩', '上',
'吧', '保', '永', '万', '物', '教', '吃', '设', '医', '正',
'造', '丰', '健', '点', '汤', '网', '庆', '技', '斯', '洗',
'料', '配', '汇', '木', '缘', '加', '麻', '联', '卫', '川',
'泰', '色', '世', '方', '寓', '风', '幼', '羊', '烫', '来',
'高', '厂', '兰', '阿', '贝', '皮', '全', '女', '拉', '成',
'云', '维', '贸', '道', '术', '运', '都', '口', '博', '河',
'瑞', '宏', '京', '际', '路', '祥', '青', '镇', '厨', '培',
'力', '惠', '连', '马', '鸿', '钢', '训', '影', '甲', '助',
'窗', '布', '富', '牌', '头', '四', '多', '妆', '吉', '苑',
'沙', '恒', '隆', '春', '干', '饼', '氏', '里', '二', '管',
'诚', '制', '售', '嘉', '长', '轩', '杂', '副', '清', '计',
'黄', '讯', '太', '鸭', '号', '街', '交', '与', '叉', '附',
'近', '层', '旁', '对', '巷', '栋', '环', '省', '桥', '湖',
'段', '乡', '厦', '府', '铺', '内', '侧', '元', '购', '前',
'幢', '滨', '处', '向', '座', '下', '県', '凤', '港', '开',
'关', '景', '泉', '塘', '放', '昌', '线', '湾', '政', '步',
'宁', '解', '白', '田', '町', '溪', '十', '八', '古', '双',
'胜', '本', '单', '同', '九', '迎', '第', '台', '玉', '锦',
'底', '后', '七', '斜', '期', '武', '岭', '松', '角', '纪',
'朝', '峰', '六', '振', '珠', '局', '岗', '洲', '横', '边',
'济', '井', '办', '汉', '代', '临', '弄', '团', '外', '塔',
'杨', '铁', '浦', '字', '年', '岛', '陵', '原', '梅', '进',
'荣', '友', '虹', '央', '桂', '沿', '事', '津', '凯', '莲',
'丁', '秀', '柳', '集', '紫', '旗', '张', '谷', '的', '是',
'不', '了', '很', '还', '个', '也', '这', '我', '就', '在',
'以', '可', '到', '错', '没', '去', '过', '感', '次', '要',
'比', '觉', '看', '得', '说', '常', '真', '们', '但', '最',
'喜', '哈', '么', '别', '位', '能', '较', '境', '非', '为',
'欢', '然', '他', '挺', '着', '价', '那', '意', '种', '想',
'出', '员', '两', '推', '做', '排', '实', '分', '间', '甜',
'度', '起', '满', '给', '热', '完', '格', '荐', '喝', '等',
'其', '再', '几', '只', '现', '朋', '候', '样', '直', '而',
'买', '于', '般', '豆', '量', '选', '奶', '打', '每', '评',
'少', '算', '又', '因', '情', '找', '些', '份', '置', '适',
'什', '蛋', '师', '气', '你', '姐', '棒', '试', '总', '定',
'啊', '足', '级', '整', '带', '虾', '如', '态', '且', '尝',
'主', '话', '强', '当', '更', '板', '知', '己', '无', '酸',
'让', '入', '啦', '式', '笑', '赞', '片', '酱', '差', '像',
'提', '队', '走', '嫩', '才', '刚', '午', '接', '重', '串',
'回', '晚', '微', '周', '值', '费', '性', '桌', '拍', '跟',
'块', '调', '糕']
get_content()