import urllib2,re
basePath = r'e:\\open163\\'
fileFormat = r'.mp4'
pageUrl = r"http://v.163.com/special/Khan/european.html"
limitPat = r'<table class="m-clist" id="list2" style="display:none">.*?</table>'
needPat = r'<tr class="u-(even|odd)">\s*<td class="u-ctitle">\s*(.*?)\s*<a.*?>(.*?)</a>.*?<a class="refbtn" href="(.*?)".*?>.*?</tr>'
print 'get page:', pageUrl
content = urllib2.urlopen(pageUrl).read()
mat = re.search(limitPat, content, re.S)
limitContent = mat.group()
for each in re.findall(needPat, limitContent, re.S):
filename = basePath+each[1]+each[2]+fileFormat
print 'creatint file:', filename
curMp4 = open(filename, 'wb')
print 'fetching url:', each[3],'......'
mp4 = urllib2.urlopen(each[3]).read()
curMp4.write(mp4)
curMp4.close()
print 'file done!'
也不复杂,修改python源码里的pageUrl为你想抓取的那一页的所有视频的URL,默认会保存在E:\\open163目录下。