新建一个py文件,代码如下:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import urllib,re,requests
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
url_name = [] #url name
def get():
#获取源码
hd = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}
url = 'http://www.budejie.com/video/'
html = requests.get(url,headers=hd).text
url_content = re.compile(r'(
.*?
.*?)',re.S) #编译
url_contents = re.findall(url_content,html) #匹配
for i in url_contents:
#匹配视频
url_reg = r'data-mp4="(.*?)"' #视频地址
url_items = re.findall(url_reg,i)
#print url_items
if url_items: #判断视频是否存在
name_reg = re.compile(r'