1.安居客租房 (base64加密)
#思考
# 1.请求内容 获取base加密内容
# 2.使用base64位模块进行解密
# 3.替换原代码中的16位进制字符
import requests,re,base64,io
from fontTools.ttLib import TTFont
def get_base_content(base_content_pattern):
#用base方法解密 需要使用base64模块,返回二进制类型数据
r_base =base64.b64decode(base_content_pattern)
# 保存woff文件中
# with open('anjuke.woff', 'wb') as fp:
# fp.write(r_base)
#读取文件内容
font = TTFont(io.BytesIO(r_base))
# font.saveXML('anjuke.xml')
#获取cmap
# getBestCmap():返回的是一个字典,字典中的key是10进制数
cmap = font.getBestCmap()
dic ={
hex(k): v for k, v in cmap.items()
}
# print(dic)
data = {}
for k, v in dic.items():
data[k] = int(v[-2::]) - 1
return data
def get_content():
response = requests.get(url=base_url,headers=headers)
#利用正则提取加密内容
base_pattern = re.compile(r'base64,(.*?)\'\)')
#提取加密内容
base_content_pattern = base_pattern.findall(response.text)[0]
data = get_base_content(base_content_pattern)
#开始替换
for k, v in data.items():
content = response.text.replace('&#x' + k[2::] + ';', str(v))
print(content)
...
if __name__ == '__main__':
base_url ='https://bj.zu.anjuke.com/?from=navigation'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'
}
get_content()
2. 哔哩哔哩弹幕实时获取
import requests
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
'Cookie': "_uuid=BDB740E5-FE0A-2DDA-C261-FEC313D41B8314007infoc; buvid3=5810BAE2-CC62-45C0-9C80-C0EC5A22EF08143075infoc; CURRENT_FNVAL=80; blackside_state=1; rpdid=|(k|JRl)mm~R0J'uY||muku)u; LIVE_BUVID=AUTO2816011938977343; sid=682boz7c; DedeUserID=234533129; DedeUserID__ckMd5=0f1d4b4dabd519f9; SESSDATA=e12f517c%2C1616746236%2C10bb9*91; bili_jct=90899519cb69bc72d08e996dcb03138f; LIVE_PLAYER_TYPE=2; bp_video_offset_234533129=445091004236092920; bp_t_offset_234533129=445091004236092920; bsource=search_baidu; PVID=1; bfe_id=0c3a1998eda2972db3dbce4811a80de6"
}
response = requests.get(url='https://api.bilibili.com/x/v2/dm/history?type=1&oid=248319821&date=2020-10-28',headers=headers,verify=False)
html = etree.HTML(response.content)
#提取弹幕
html_list = html.xpath(r'//d/text()')
with open('danmu.txt','a',encoding='utf8') as fp:
fp.write('\n'.join(html_list))
3.app梨视频
import re
import requests
headers = {
'User-Agent': 'okhttp/3.11.0',
'Cookie': '__secdyid=b3f5ba03467d2b416daee88a98aef8b27b79461f473b34fe021603958487; acw_tc=781bad3616039584871561176e2892c573270d69a2ff652333908b9ae80ca3; PEAR_PLATFORM=2; PEAR_UUID=866174905831844; JSESSIONID=CF05F7DC65EB3F0BEC6CB26E8C3F6CFC; SERVERID=ed8d5ad7d9b044d0dd5993c7c771ef48|1603958677|1603958487'
}
response = requests.get(
url='http://app.pearvideo.com/clt/jsp/v4/home.jsp?start=60&index=60&pstart=59&lastScore=1.6038826350001704E12&isHome=1&channelCode=110100',
headers=headers, verify=False)
video_name_pattern = re.compile(r'[:\?"<>\|\\/\*]')
content = response.json()['dataList']
for data in content:
data_list_full = data['contList']
for data_content in data_list_full:
try:
# 获取视频名字
video_name = data_content['name']
video_name = video_name_pattern.sub('', video_name)
# 获取视频 url
video_url = data_content['videos'][0]['url']
# for video_url_full in video_url:
# video_url_full_ac = video_url_full['url']
# video_response = requests.get(url=video_url, headers=headers)
print(video_name, video_url)
# with open(video_name + '.mp4', 'wb') as fp:
# fp.write(video_response.content)
except:
pass