好看视频:https://haokan.baidu.com/?fr=pc_pz
import requests
import pprint
import csv
import time
import re
# from fake_useragent import UserAgent
# ua=UserAgent()
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
url= 'https://haokan.baidu.com/web/video/feed?tab=yingshi_new&act=pcFeed&pd=pc&num=5&shuaxin_id=1629905300426'
response=requests.get(url,headers=header)
data=response.json()
time.sleep(1)
# pprint.pprint(response.json()['data']['response']['videos'])
video_list=response.json()['data']['response']['videos']
name = 1
for video in video_list:
title=video['title']
content=video['play_url']
# print(title)
# print(content)
time.sleep(1)
res=requests.get(content)
filename='/Users/tianshan/PycharmProjects/firsr/爬虫/{}.mp4'.format(title)
print('正在写入%s视频' %title)
with open(filename, 'wb') as f:
# content 获取图片 视频 音频
f.write(res.content)
# name += 1
疑问:Request URL: https://haokan.baidu.com/web/video/feed?tab=yingshi_new&act=pcFeed&pd=pc&num=5&shuaxin_id=1629905481967
Request URL: https://haokan.baidu.com/web/video/feed?tab=yinyue_new&act=pcFeed&pd=pc&num=5&shuaxin_id=1629905481967
Request URL: https://haokan.baidu.com/web/video/feed?tab=yunying_vlog&act=pcFeed&pd=pc&num=5&shuaxin_id=1629905481967
这些地址之间除了tab= ***不一样,但这个命名没找到什么规律
拓展:http://www.xinfadi.com.cn/priceDetail.html
等讲了再写