import re,requests,json,os
from urllib import request
url = 'https://www.toutiao.com/a6589933439766495747/'
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
response = requests.get(url,headers=headers)
req = response.text
line = r'gallery: JSON.parse\((.*)\),'
res_match = re.search(line,req)
print(res_match.group(1))
print(type(res_match.group(1)))
ph = json.loads(res_match.group(1))
ph_lj = json.loads(ph)
print(ph_lj)
print(ph_lj['sub_images'])
tp = []
for j in ph_lj['sub_images']:
tp.append(j['url_list'][-1]['url'])
print(tp)
if not os.path.exists('download') :
os.mkdir('download')
for x in tp:
filename = 'download/' + x.split('/')[-1] + '.jpg'
request.urlretrieve(x,filename)
今日头条街拍图片-爬取部分
最新推荐文章于 2020-09-08 21:59:30 发布