爬过...
#模板#代码不全,仅供参考!
import urllib.request
import os
import re
import time
def url_open(url):
headers={
"User-Agent":""#改动处#User-Agent
}
req=urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(req)
html = response.read()
return html
def find_videos(url):
partern = re.compile('''
.*?
''')#改动处#匹配模式partern2 = re.compile('''''')#改动处#匹配模式
#爬取主页
html = url_open(url).decode('utf-8')
video_mian_addrs = re.findall(partern,html)
## print(video_mian_addrs)
## print("主页地址爬取完毕")
## print("sum have:",len(video_mian_addrs))
#视频爬取
video_addrs =[]
for video in video_mian_addrs:
html = url_open("https://www.xxx.com" + video).decode('utf-8')#改动处#打开主页#此处拼接完整网址,有些不用
temp =re.findall(partern2,html)
temp_addr = "https://www.ccc.com" + temp[0]#改动处#拼接视频网址#此处拼接完整网址,有些不用
video_addrs.append(temp_addr)
## print(video_addrs)
## print("视频网址爬取完毕!")
## print("sum have:",len(video_addrs))
return video_addrs
def save_videos(floder,video_addrs):
n=len(video_addrs) #下载视频的个数
for j in range(1,n+1):
each = video_addrs[j-1]
filename = each.split('/')[-1]
with open(filename,'wb') as f:
print("正在下载视频:",j)
video = url_open(each)
f.write(video) #保存视频
print("视频",j,"下载完毕。。")
time.sleep(0.2)
def mm(floder='download'):
os.chdir(floder)
url = "" #改动处#视频网址
video_addrs = find_videos(url)#查找视频地址
save_videos(floder,video_addrs)#保存视频
print("视频爬取完毕!")
if __name__ == '__main__':
mm()