知识点:
1、编码设置、特殊字符处理
2、正则表达式提取JSON字符串
3、递归打印JSON属性值
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import os, sys, time
import urllib.request, requests, bs4
import re, json, demjson
import importlib
# 设置utf-8编码
importlib.reload(sys)
# 特殊字符处理
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
'''
断点打印
'''
def dump(msg):
print(msg)
os._exit(0)
'''
解析jsonp格式为json
'''
def loads_jsonp(jsonp):
#jsonp = 'jsonp1({"code": 0,"msg": "","times": 1570073177610})'
return json.loads(re.match(".*?({.*}).*", str(jsonp), re.S).group(1))
'''
下载文件
'''
def downfiles(imglist):
#fname = time.strftime("%Y%m%d%H%M%S", time.localtime()) # 日期命名
x = 0
# 遍历
for imgurl in imglist:
# 获取获得的从imglist中遍历得到的imgurl
imgres = requests.get(imgurl)
fname = imgurl.split('/')[-1]
with open("D:\\360Downloads\\{}.jpg".format(fname), "wb") as f:
f.write(imgres.content)
x += 1
print("第", x ,"张")
print("下载完毕")
'''
读取详情页
'''
def getdetails(url):
res = requests.get(url)
downloadedList = [] # 下载网址列表
res.raise_for_status()
html = bs4.BeautifulSoup(res.text, 'html5lib')
data = html.select('.reveal-work-wrap > img') # 返回数组
for path in data:
target = path.get('src') # 返回src属性
target = target.split('@')[0] # 图片路径处理
downloadedList.append(target) # 加入全局数组
print(target)
downfiles(downloadedList)
'''
获取页面源码
'''
def geturl(url):
res = requests.get(url)
res.raise_for_status()
html = bs4.BeautifulSoup(res.text, 'html5lib')
return html
'''
解析json,仅一层
'''
def printjson(json):
obj = demjson.decode(json)
for name in obj:
print(name, ':', obj[name])
'''
递归解析json
'''
def dict_generator(indict, pre=None):
pre = pre[:] if pre else []
if isinstance(indict, dict):
for key, value in indict.items():
if isinstance(value, dict):
if len(value) == 0:
yield pre+[key, '{}']
else:
for d in dict_generator(value, pre + [key]):
yield d
elif isinstance(value, list):
if len(value) == 0:
yield pre+[key, '[]']
else:
for v in value:
for d in dict_generator(v, pre + [key]):
yield d
elif isinstance(value, tuple):
if len(value) == 0:
yield pre+[key, '()']
else:
for v in value:
for d in dict_generator(v, pre + [key]):
yield d
else:
yield pre + [key, value]
else:
yield indict
'''
打印目标类型
'''
def typeof(target):
print(type(target))
#jsonp = 'jsonp1({"code": 0,"msg": "","times": 1570073177610})'
jsonp = geturl('http://acsing.kugou.com/sing7/web/jsonp/cdn/opus/listenGetData?callback=jsonp1&data=OTkwMDY4MDk0&sign=84d875624381eda2b448b082e22d2eb7&channelId=0&_=1570073173877:formatted')
result = re.match(r'.*?({.*}).*', str(jsonp), re.S).group(1)
# 利用递归解析json
result = result.translate(non_bmp_map) #特殊字符处理
sValue = json.loads(result)
for i in dict_generator(sValue):
print('.'.join(i[0:-1]), ':', i[-1])
os._exit(0)